1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qu8-gemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)28 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
29 TEST_REQUIRES_ARM_NEON_DOT;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(16)
33 .kr(4)
34 .sr(1)
35 .m(4)
36 .n(16)
37 .k(16)
38 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
39 }
40
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)41 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_DOT;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(16)
46 .kr(4)
47 .sr(1)
48 .m(4)
49 .n(16)
50 .k(16)
51 .cn_stride(19)
52 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
53 }
54
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_strided_a)55 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
56 TEST_REQUIRES_ARM_NEON_DOT;
57 GemmMicrokernelTester()
58 .mr(4)
59 .nr(16)
60 .kr(4)
61 .sr(1)
62 .m(4)
63 .n(16)
64 .k(16)
65 .a_stride(19)
66 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
67 }
68
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)69 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
70 TEST_REQUIRES_ARM_NEON_DOT;
71 for (uint32_t n = 1; n <= 16; n++) {
72 for (uint32_t m = 1; m <= 4; m++) {
73 GemmMicrokernelTester()
74 .mr(4)
75 .nr(16)
76 .kr(4)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(16)
81 .iterations(1)
82 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
83 }
84 }
85 }
86
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)87 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
88 TEST_REQUIRES_ARM_NEON_DOT;
89 for (uint32_t m = 1; m <= 4; m++) {
90 GemmMicrokernelTester()
91 .mr(4)
92 .nr(16)
93 .kr(4)
94 .sr(1)
95 .m(m)
96 .n(16)
97 .k(16)
98 .iterations(1)
99 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
100 }
101 }
102
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)103 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
104 TEST_REQUIRES_ARM_NEON_DOT;
105 for (uint32_t n = 1; n <= 16; n++) {
106 GemmMicrokernelTester()
107 .mr(4)
108 .nr(16)
109 .kr(4)
110 .sr(1)
111 .m(4)
112 .n(n)
113 .k(16)
114 .iterations(1)
115 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
116 }
117 }
118
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)119 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
120 TEST_REQUIRES_ARM_NEON_DOT;
121 for (size_t k = 1; k < 16; k++) {
122 GemmMicrokernelTester()
123 .mr(4)
124 .nr(16)
125 .kr(4)
126 .sr(1)
127 .m(4)
128 .n(16)
129 .k(k)
130 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
131 }
132 }
133
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_strided_a)134 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
135 TEST_REQUIRES_ARM_NEON_DOT;
136 for (size_t k = 1; k < 16; k++) {
137 GemmMicrokernelTester()
138 .mr(4)
139 .nr(16)
140 .kr(4)
141 .sr(1)
142 .m(4)
143 .n(16)
144 .k(k)
145 .a_stride(19)
146 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
147 }
148 }
149
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)150 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
151 TEST_REQUIRES_ARM_NEON_DOT;
152 for (size_t k = 1; k < 16; k++) {
153 for (uint32_t n = 1; n <= 16; n++) {
154 for (uint32_t m = 1; m <= 4; m++) {
155 GemmMicrokernelTester()
156 .mr(4)
157 .nr(16)
158 .kr(4)
159 .sr(1)
160 .m(m)
161 .n(n)
162 .k(k)
163 .iterations(1)
164 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
165 }
166 }
167 }
168 }
169
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)170 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
171 TEST_REQUIRES_ARM_NEON_DOT;
172 for (size_t k = 17; k < 32; k++) {
173 GemmMicrokernelTester()
174 .mr(4)
175 .nr(16)
176 .kr(4)
177 .sr(1)
178 .m(4)
179 .n(16)
180 .k(k)
181 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
182 }
183 }
184
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_strided_a)185 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
186 TEST_REQUIRES_ARM_NEON_DOT;
187 for (size_t k = 17; k < 32; k++) {
188 GemmMicrokernelTester()
189 .mr(4)
190 .nr(16)
191 .kr(4)
192 .sr(1)
193 .m(4)
194 .n(16)
195 .k(k)
196 .a_stride(37)
197 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
198 }
199 }
200
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)201 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
202 TEST_REQUIRES_ARM_NEON_DOT;
203 for (size_t k = 17; k < 32; k++) {
204 for (uint32_t n = 1; n <= 16; n++) {
205 for (uint32_t m = 1; m <= 4; m++) {
206 GemmMicrokernelTester()
207 .mr(4)
208 .nr(16)
209 .kr(4)
210 .sr(1)
211 .m(m)
212 .n(n)
213 .k(k)
214 .iterations(1)
215 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
216 }
217 }
218 }
219 }
220
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)221 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
222 TEST_REQUIRES_ARM_NEON_DOT;
223 for (size_t k = 32; k <= 160; k += 16) {
224 GemmMicrokernelTester()
225 .mr(4)
226 .nr(16)
227 .kr(4)
228 .sr(1)
229 .m(4)
230 .n(16)
231 .k(k)
232 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
233 }
234 }
235
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_strided_a)236 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
237 TEST_REQUIRES_ARM_NEON_DOT;
238 for (size_t k = 32; k <= 160; k += 16) {
239 GemmMicrokernelTester()
240 .mr(4)
241 .nr(16)
242 .kr(4)
243 .sr(1)
244 .m(4)
245 .n(16)
246 .k(k)
247 .a_stride(163)
248 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
249 }
250 }
251
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)252 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
253 TEST_REQUIRES_ARM_NEON_DOT;
254 for (size_t k = 32; k <= 160; k += 16) {
255 for (uint32_t n = 1; n <= 16; n++) {
256 for (uint32_t m = 1; m <= 4; m++) {
257 GemmMicrokernelTester()
258 .mr(4)
259 .nr(16)
260 .kr(4)
261 .sr(1)
262 .m(m)
263 .n(n)
264 .k(k)
265 .iterations(1)
266 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
267 }
268 }
269 }
270 }
271
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)272 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
273 TEST_REQUIRES_ARM_NEON_DOT;
274 for (uint32_t n = 17; n < 32; n++) {
275 for (size_t k = 1; k <= 80; k += 17) {
276 GemmMicrokernelTester()
277 .mr(4)
278 .nr(16)
279 .kr(4)
280 .sr(1)
281 .m(4)
282 .n(n)
283 .k(k)
284 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
285 }
286 }
287 }
288
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)289 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
290 TEST_REQUIRES_ARM_NEON_DOT;
291 for (uint32_t n = 17; n < 32; n++) {
292 for (size_t k = 1; k <= 80; k += 17) {
293 GemmMicrokernelTester()
294 .mr(4)
295 .nr(16)
296 .kr(4)
297 .sr(1)
298 .m(4)
299 .n(n)
300 .k(k)
301 .cn_stride(19)
302 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
303 }
304 }
305 }
306
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_a)307 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
308 TEST_REQUIRES_ARM_NEON_DOT;
309 for (uint32_t n = 17; n < 32; n++) {
310 for (size_t k = 1; k <= 80; k += 17) {
311 GemmMicrokernelTester()
312 .mr(4)
313 .nr(16)
314 .kr(4)
315 .sr(1)
316 .m(4)
317 .n(n)
318 .k(k)
319 .a_stride(83)
320 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
321 }
322 }
323 }
324
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)325 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
326 TEST_REQUIRES_ARM_NEON_DOT;
327 for (uint32_t n = 17; n < 32; n++) {
328 for (size_t k = 1; k <= 80; k += 17) {
329 for (uint32_t m = 1; m <= 4; m++) {
330 GemmMicrokernelTester()
331 .mr(4)
332 .nr(16)
333 .kr(4)
334 .sr(1)
335 .m(m)
336 .n(n)
337 .k(k)
338 .iterations(1)
339 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
340 }
341 }
342 }
343 }
344
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)345 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
346 TEST_REQUIRES_ARM_NEON_DOT;
347 for (uint32_t n = 32; n <= 48; n += 16) {
348 for (size_t k = 1; k <= 80; k += 17) {
349 GemmMicrokernelTester()
350 .mr(4)
351 .nr(16)
352 .kr(4)
353 .sr(1)
354 .m(4)
355 .n(n)
356 .k(k)
357 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
358 }
359 }
360 }
361
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)362 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
363 TEST_REQUIRES_ARM_NEON_DOT;
364 for (uint32_t n = 32; n <= 48; n += 16) {
365 for (size_t k = 1; k <= 80; k += 17) {
366 GemmMicrokernelTester()
367 .mr(4)
368 .nr(16)
369 .kr(4)
370 .sr(1)
371 .m(4)
372 .n(n)
373 .k(k)
374 .cn_stride(19)
375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
376 }
377 }
378 }
379
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_a)380 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
381 TEST_REQUIRES_ARM_NEON_DOT;
382 for (uint32_t n = 32; n <= 48; n += 16) {
383 for (size_t k = 1; k <= 80; k += 17) {
384 GemmMicrokernelTester()
385 .mr(4)
386 .nr(16)
387 .kr(4)
388 .sr(1)
389 .m(4)
390 .n(n)
391 .k(k)
392 .a_stride(83)
393 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
394 }
395 }
396 }
397
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)398 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
399 TEST_REQUIRES_ARM_NEON_DOT;
400 for (uint32_t n = 32; n <= 48; n += 16) {
401 for (size_t k = 1; k <= 80; k += 17) {
402 for (uint32_t m = 1; m <= 4; m++) {
403 GemmMicrokernelTester()
404 .mr(4)
405 .nr(16)
406 .kr(4)
407 .sr(1)
408 .m(m)
409 .n(n)
410 .k(k)
411 .iterations(1)
412 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
413 }
414 }
415 }
416 }
417
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)418 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
419 TEST_REQUIRES_ARM_NEON_DOT;
420 for (size_t k = 1; k <= 80; k += 17) {
421 for (uint32_t n = 1; n <= 16; n++) {
422 for (uint32_t m = 1; m <= 4; m++) {
423 GemmMicrokernelTester()
424 .mr(4)
425 .nr(16)
426 .kr(4)
427 .sr(1)
428 .m(m)
429 .n(n)
430 .k(k)
431 .cm_stride(19)
432 .iterations(1)
433 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
434 }
435 }
436 }
437 }
438
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)439 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
440 TEST_REQUIRES_ARM_NEON_DOT;
441 GemmMicrokernelTester()
442 .mr(4)
443 .nr(16)
444 .kr(4)
445 .sr(1)
446 .m(4)
447 .n(16)
448 .k(16)
449 .qmin(128)
450 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
451 }
452
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)453 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
454 TEST_REQUIRES_ARM_NEON_DOT;
455 GemmMicrokernelTester()
456 .mr(4)
457 .nr(16)
458 .kr(4)
459 .sr(1)
460 .m(4)
461 .n(16)
462 .k(16)
463 .qmax(128)
464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
465 }
466
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)467 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
468 TEST_REQUIRES_ARM_NEON_DOT;
469 GemmMicrokernelTester()
470 .mr(4)
471 .nr(16)
472 .kr(4)
473 .sr(1)
474 .m(4)
475 .n(16)
476 .k(16)
477 .cm_stride(19)
478 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
479 }
480
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)481 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
482 TEST_REQUIRES_ARM_NEON_DOT;
483 for (size_t k = 1; k <= 80; k += 17) {
484 GemmMicrokernelTester()
485 .mr(4)
486 .nr(16)
487 .kr(4)
488 .sr(1)
489 .m(4)
490 .n(16)
491 .k(k)
492 .a_zero_point(0)
493 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
494 }
495 }
496
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)497 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
498 TEST_REQUIRES_ARM_NEON_DOT;
499 for (size_t k = 1; k <= 80; k += 17) {
500 GemmMicrokernelTester()
501 .mr(4)
502 .nr(16)
503 .kr(4)
504 .sr(1)
505 .m(4)
506 .n(16)
507 .k(k)
508 .b_zero_point(0)
509 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
510 }
511 }
512
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)513 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
514 TEST_REQUIRES_ARM_NEON_DOT;
515 for (size_t k = 1; k <= 80; k += 17) {
516 GemmMicrokernelTester()
517 .mr(4)
518 .nr(16)
519 .kr(4)
520 .sr(1)
521 .m(4)
522 .n(16)
523 .k(k)
524 .a_zero_point(0)
525 .b_zero_point(0)
526 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
527 }
528 }
529 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
530
531
532 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)533 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
534 TEST_REQUIRES_ARM_NEON_DOT;
535 GemmMicrokernelTester()
536 .mr(4)
537 .nr(16)
538 .kr(4)
539 .sr(1)
540 .m(4)
541 .n(16)
542 .k(16)
543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
544 }
545
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)546 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
547 TEST_REQUIRES_ARM_NEON_DOT;
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(16)
551 .kr(4)
552 .sr(1)
553 .m(4)
554 .n(16)
555 .k(16)
556 .cn_stride(19)
557 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
558 }
559
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_strided_a)560 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_strided_a) {
561 TEST_REQUIRES_ARM_NEON_DOT;
562 GemmMicrokernelTester()
563 .mr(4)
564 .nr(16)
565 .kr(4)
566 .sr(1)
567 .m(4)
568 .n(16)
569 .k(16)
570 .a_stride(19)
571 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
572 }
573
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)574 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
575 TEST_REQUIRES_ARM_NEON_DOT;
576 for (uint32_t n = 1; n <= 16; n++) {
577 for (uint32_t m = 1; m <= 4; m++) {
578 GemmMicrokernelTester()
579 .mr(4)
580 .nr(16)
581 .kr(4)
582 .sr(1)
583 .m(m)
584 .n(n)
585 .k(16)
586 .iterations(1)
587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
588 }
589 }
590 }
591
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)592 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
593 TEST_REQUIRES_ARM_NEON_DOT;
594 for (uint32_t m = 1; m <= 4; m++) {
595 GemmMicrokernelTester()
596 .mr(4)
597 .nr(16)
598 .kr(4)
599 .sr(1)
600 .m(m)
601 .n(16)
602 .k(16)
603 .iterations(1)
604 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
605 }
606 }
607
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)608 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
609 TEST_REQUIRES_ARM_NEON_DOT;
610 for (uint32_t n = 1; n <= 16; n++) {
611 GemmMicrokernelTester()
612 .mr(4)
613 .nr(16)
614 .kr(4)
615 .sr(1)
616 .m(4)
617 .n(n)
618 .k(16)
619 .iterations(1)
620 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
621 }
622 }
623
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)624 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
625 TEST_REQUIRES_ARM_NEON_DOT;
626 for (size_t k = 1; k < 16; k++) {
627 GemmMicrokernelTester()
628 .mr(4)
629 .nr(16)
630 .kr(4)
631 .sr(1)
632 .m(4)
633 .n(16)
634 .k(k)
635 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
636 }
637 }
638
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_strided_a)639 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_strided_a) {
640 TEST_REQUIRES_ARM_NEON_DOT;
641 for (size_t k = 1; k < 16; k++) {
642 GemmMicrokernelTester()
643 .mr(4)
644 .nr(16)
645 .kr(4)
646 .sr(1)
647 .m(4)
648 .n(16)
649 .k(k)
650 .a_stride(19)
651 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
652 }
653 }
654
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)655 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
656 TEST_REQUIRES_ARM_NEON_DOT;
657 for (size_t k = 1; k < 16; k++) {
658 for (uint32_t n = 1; n <= 16; n++) {
659 for (uint32_t m = 1; m <= 4; m++) {
660 GemmMicrokernelTester()
661 .mr(4)
662 .nr(16)
663 .kr(4)
664 .sr(1)
665 .m(m)
666 .n(n)
667 .k(k)
668 .iterations(1)
669 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
670 }
671 }
672 }
673 }
674
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)675 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
676 TEST_REQUIRES_ARM_NEON_DOT;
677 for (size_t k = 17; k < 32; k++) {
678 GemmMicrokernelTester()
679 .mr(4)
680 .nr(16)
681 .kr(4)
682 .sr(1)
683 .m(4)
684 .n(16)
685 .k(k)
686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
687 }
688 }
689
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_strided_a)690 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_strided_a) {
691 TEST_REQUIRES_ARM_NEON_DOT;
692 for (size_t k = 17; k < 32; k++) {
693 GemmMicrokernelTester()
694 .mr(4)
695 .nr(16)
696 .kr(4)
697 .sr(1)
698 .m(4)
699 .n(16)
700 .k(k)
701 .a_stride(37)
702 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
703 }
704 }
705
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)706 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
707 TEST_REQUIRES_ARM_NEON_DOT;
708 for (size_t k = 17; k < 32; k++) {
709 for (uint32_t n = 1; n <= 16; n++) {
710 for (uint32_t m = 1; m <= 4; m++) {
711 GemmMicrokernelTester()
712 .mr(4)
713 .nr(16)
714 .kr(4)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
721 }
722 }
723 }
724 }
725
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)726 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
727 TEST_REQUIRES_ARM_NEON_DOT;
728 for (size_t k = 32; k <= 160; k += 16) {
729 GemmMicrokernelTester()
730 .mr(4)
731 .nr(16)
732 .kr(4)
733 .sr(1)
734 .m(4)
735 .n(16)
736 .k(k)
737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
738 }
739 }
740
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_strided_a)741 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_strided_a) {
742 TEST_REQUIRES_ARM_NEON_DOT;
743 for (size_t k = 32; k <= 160; k += 16) {
744 GemmMicrokernelTester()
745 .mr(4)
746 .nr(16)
747 .kr(4)
748 .sr(1)
749 .m(4)
750 .n(16)
751 .k(k)
752 .a_stride(163)
753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
754 }
755 }
756
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)757 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
758 TEST_REQUIRES_ARM_NEON_DOT;
759 for (size_t k = 32; k <= 160; k += 16) {
760 for (uint32_t n = 1; n <= 16; n++) {
761 for (uint32_t m = 1; m <= 4; m++) {
762 GemmMicrokernelTester()
763 .mr(4)
764 .nr(16)
765 .kr(4)
766 .sr(1)
767 .m(m)
768 .n(n)
769 .k(k)
770 .iterations(1)
771 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
772 }
773 }
774 }
775 }
776
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)777 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
778 TEST_REQUIRES_ARM_NEON_DOT;
779 for (uint32_t n = 17; n < 32; n++) {
780 for (size_t k = 1; k <= 80; k += 17) {
781 GemmMicrokernelTester()
782 .mr(4)
783 .nr(16)
784 .kr(4)
785 .sr(1)
786 .m(4)
787 .n(n)
788 .k(k)
789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
790 }
791 }
792 }
793
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)794 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
795 TEST_REQUIRES_ARM_NEON_DOT;
796 for (uint32_t n = 17; n < 32; n++) {
797 for (size_t k = 1; k <= 80; k += 17) {
798 GemmMicrokernelTester()
799 .mr(4)
800 .nr(16)
801 .kr(4)
802 .sr(1)
803 .m(4)
804 .n(n)
805 .k(k)
806 .cn_stride(19)
807 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
808 }
809 }
810 }
811
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_a)812 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_a) {
813 TEST_REQUIRES_ARM_NEON_DOT;
814 for (uint32_t n = 17; n < 32; n++) {
815 for (size_t k = 1; k <= 80; k += 17) {
816 GemmMicrokernelTester()
817 .mr(4)
818 .nr(16)
819 .kr(4)
820 .sr(1)
821 .m(4)
822 .n(n)
823 .k(k)
824 .a_stride(83)
825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
826 }
827 }
828 }
829
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)830 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
831 TEST_REQUIRES_ARM_NEON_DOT;
832 for (uint32_t n = 17; n < 32; n++) {
833 for (size_t k = 1; k <= 80; k += 17) {
834 for (uint32_t m = 1; m <= 4; m++) {
835 GemmMicrokernelTester()
836 .mr(4)
837 .nr(16)
838 .kr(4)
839 .sr(1)
840 .m(m)
841 .n(n)
842 .k(k)
843 .iterations(1)
844 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
845 }
846 }
847 }
848 }
849
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)850 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
851 TEST_REQUIRES_ARM_NEON_DOT;
852 for (uint32_t n = 32; n <= 48; n += 16) {
853 for (size_t k = 1; k <= 80; k += 17) {
854 GemmMicrokernelTester()
855 .mr(4)
856 .nr(16)
857 .kr(4)
858 .sr(1)
859 .m(4)
860 .n(n)
861 .k(k)
862 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
863 }
864 }
865 }
866
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)867 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
868 TEST_REQUIRES_ARM_NEON_DOT;
869 for (uint32_t n = 32; n <= 48; n += 16) {
870 for (size_t k = 1; k <= 80; k += 17) {
871 GemmMicrokernelTester()
872 .mr(4)
873 .nr(16)
874 .kr(4)
875 .sr(1)
876 .m(4)
877 .n(n)
878 .k(k)
879 .cn_stride(19)
880 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
881 }
882 }
883 }
884
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_a)885 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_a) {
886 TEST_REQUIRES_ARM_NEON_DOT;
887 for (uint32_t n = 32; n <= 48; n += 16) {
888 for (size_t k = 1; k <= 80; k += 17) {
889 GemmMicrokernelTester()
890 .mr(4)
891 .nr(16)
892 .kr(4)
893 .sr(1)
894 .m(4)
895 .n(n)
896 .k(k)
897 .a_stride(83)
898 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
899 }
900 }
901 }
902
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)903 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
904 TEST_REQUIRES_ARM_NEON_DOT;
905 for (uint32_t n = 32; n <= 48; n += 16) {
906 for (size_t k = 1; k <= 80; k += 17) {
907 for (uint32_t m = 1; m <= 4; m++) {
908 GemmMicrokernelTester()
909 .mr(4)
910 .nr(16)
911 .kr(4)
912 .sr(1)
913 .m(m)
914 .n(n)
915 .k(k)
916 .iterations(1)
917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
918 }
919 }
920 }
921 }
922
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)923 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
924 TEST_REQUIRES_ARM_NEON_DOT;
925 for (size_t k = 1; k <= 80; k += 17) {
926 for (uint32_t n = 1; n <= 16; n++) {
927 for (uint32_t m = 1; m <= 4; m++) {
928 GemmMicrokernelTester()
929 .mr(4)
930 .nr(16)
931 .kr(4)
932 .sr(1)
933 .m(m)
934 .n(n)
935 .k(k)
936 .cm_stride(19)
937 .iterations(1)
938 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
939 }
940 }
941 }
942 }
943
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)944 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
945 TEST_REQUIRES_ARM_NEON_DOT;
946 GemmMicrokernelTester()
947 .mr(4)
948 .nr(16)
949 .kr(4)
950 .sr(1)
951 .m(4)
952 .n(16)
953 .k(16)
954 .qmin(128)
955 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
956 }
957
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)958 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
959 TEST_REQUIRES_ARM_NEON_DOT;
960 GemmMicrokernelTester()
961 .mr(4)
962 .nr(16)
963 .kr(4)
964 .sr(1)
965 .m(4)
966 .n(16)
967 .k(16)
968 .qmax(128)
969 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
970 }
971
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)972 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
973 TEST_REQUIRES_ARM_NEON_DOT;
974 GemmMicrokernelTester()
975 .mr(4)
976 .nr(16)
977 .kr(4)
978 .sr(1)
979 .m(4)
980 .n(16)
981 .k(16)
982 .cm_stride(19)
983 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
984 }
985
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_a_zero_point)986 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
987 TEST_REQUIRES_ARM_NEON_DOT;
988 for (size_t k = 1; k <= 80; k += 17) {
989 GemmMicrokernelTester()
990 .mr(4)
991 .nr(16)
992 .kr(4)
993 .sr(1)
994 .m(4)
995 .n(16)
996 .k(k)
997 .a_zero_point(0)
998 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
999 }
1000 }
1001
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_b_zero_point)1002 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
1003 TEST_REQUIRES_ARM_NEON_DOT;
1004 for (size_t k = 1; k <= 80; k += 17) {
1005 GemmMicrokernelTester()
1006 .mr(4)
1007 .nr(16)
1008 .kr(4)
1009 .sr(1)
1010 .m(4)
1011 .n(16)
1012 .k(k)
1013 .b_zero_point(0)
1014 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1015 }
1016 }
1017
TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_zero_point)1018 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
1019 TEST_REQUIRES_ARM_NEON_DOT;
1020 for (size_t k = 1; k <= 80; k += 17) {
1021 GemmMicrokernelTester()
1022 .mr(4)
1023 .nr(16)
1024 .kr(4)
1025 .sr(1)
1026 .m(4)
1027 .n(16)
1028 .k(k)
1029 .a_zero_point(0)
1030 .b_zero_point(0)
1031 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1032 }
1033 }
1034 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1035
1036
1037 #if XNN_ARCH_ARM
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)1038 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
1039 TEST_REQUIRES_ARM_SIMD32;
1040 GemmMicrokernelTester()
1041 .mr(1)
1042 .nr(1)
1043 .kr(4)
1044 .sr(1)
1045 .m(1)
1046 .n(1)
1047 .k(4)
1048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1049 }
1050
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)1051 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
1052 TEST_REQUIRES_ARM_SIMD32;
1053 GemmMicrokernelTester()
1054 .mr(1)
1055 .nr(1)
1056 .kr(4)
1057 .sr(1)
1058 .m(1)
1059 .n(1)
1060 .k(4)
1061 .cn_stride(3)
1062 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1063 }
1064
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_strided_a)1065 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_strided_a) {
1066 TEST_REQUIRES_ARM_SIMD32;
1067 GemmMicrokernelTester()
1068 .mr(1)
1069 .nr(1)
1070 .kr(4)
1071 .sr(1)
1072 .m(1)
1073 .n(1)
1074 .k(4)
1075 .a_stride(7)
1076 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1077 }
1078
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)1079 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
1080 TEST_REQUIRES_ARM_SIMD32;
1081 for (uint32_t n = 1; n <= 1; n++) {
1082 for (uint32_t m = 1; m <= 1; m++) {
1083 GemmMicrokernelTester()
1084 .mr(1)
1085 .nr(1)
1086 .kr(4)
1087 .sr(1)
1088 .m(m)
1089 .n(n)
1090 .k(4)
1091 .iterations(1)
1092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1093 }
1094 }
1095 }
1096
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)1097 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
1098 TEST_REQUIRES_ARM_SIMD32;
1099 for (uint32_t m = 1; m <= 1; m++) {
1100 GemmMicrokernelTester()
1101 .mr(1)
1102 .nr(1)
1103 .kr(4)
1104 .sr(1)
1105 .m(m)
1106 .n(1)
1107 .k(4)
1108 .iterations(1)
1109 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1110 }
1111 }
1112
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)1113 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
1114 TEST_REQUIRES_ARM_SIMD32;
1115 for (uint32_t n = 1; n <= 1; n++) {
1116 GemmMicrokernelTester()
1117 .mr(1)
1118 .nr(1)
1119 .kr(4)
1120 .sr(1)
1121 .m(1)
1122 .n(n)
1123 .k(4)
1124 .iterations(1)
1125 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1126 }
1127 }
1128
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)1129 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
1130 TEST_REQUIRES_ARM_SIMD32;
1131 for (size_t k = 1; k < 4; k++) {
1132 GemmMicrokernelTester()
1133 .mr(1)
1134 .nr(1)
1135 .kr(4)
1136 .sr(1)
1137 .m(1)
1138 .n(1)
1139 .k(k)
1140 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1141 }
1142 }
1143
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_strided_a)1144 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_strided_a) {
1145 TEST_REQUIRES_ARM_SIMD32;
1146 for (size_t k = 1; k < 4; k++) {
1147 GemmMicrokernelTester()
1148 .mr(1)
1149 .nr(1)
1150 .kr(4)
1151 .sr(1)
1152 .m(1)
1153 .n(1)
1154 .k(k)
1155 .a_stride(7)
1156 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1157 }
1158 }
1159
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)1160 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
1161 TEST_REQUIRES_ARM_SIMD32;
1162 for (size_t k = 1; k < 4; k++) {
1163 for (uint32_t n = 1; n <= 1; n++) {
1164 for (uint32_t m = 1; m <= 1; m++) {
1165 GemmMicrokernelTester()
1166 .mr(1)
1167 .nr(1)
1168 .kr(4)
1169 .sr(1)
1170 .m(m)
1171 .n(n)
1172 .k(k)
1173 .iterations(1)
1174 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1175 }
1176 }
1177 }
1178 }
1179
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)1180 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
1181 TEST_REQUIRES_ARM_SIMD32;
1182 for (size_t k = 5; k < 8; k++) {
1183 GemmMicrokernelTester()
1184 .mr(1)
1185 .nr(1)
1186 .kr(4)
1187 .sr(1)
1188 .m(1)
1189 .n(1)
1190 .k(k)
1191 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1192 }
1193 }
1194
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_strided_a)1195 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_strided_a) {
1196 TEST_REQUIRES_ARM_SIMD32;
1197 for (size_t k = 5; k < 8; k++) {
1198 GemmMicrokernelTester()
1199 .mr(1)
1200 .nr(1)
1201 .kr(4)
1202 .sr(1)
1203 .m(1)
1204 .n(1)
1205 .k(k)
1206 .a_stride(11)
1207 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1208 }
1209 }
1210
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)1211 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
1212 TEST_REQUIRES_ARM_SIMD32;
1213 for (size_t k = 5; k < 8; k++) {
1214 for (uint32_t n = 1; n <= 1; n++) {
1215 for (uint32_t m = 1; m <= 1; m++) {
1216 GemmMicrokernelTester()
1217 .mr(1)
1218 .nr(1)
1219 .kr(4)
1220 .sr(1)
1221 .m(m)
1222 .n(n)
1223 .k(k)
1224 .iterations(1)
1225 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1226 }
1227 }
1228 }
1229 }
1230
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)1231 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
1232 TEST_REQUIRES_ARM_SIMD32;
1233 for (size_t k = 8; k <= 40; k += 4) {
1234 GemmMicrokernelTester()
1235 .mr(1)
1236 .nr(1)
1237 .kr(4)
1238 .sr(1)
1239 .m(1)
1240 .n(1)
1241 .k(k)
1242 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1243 }
1244 }
1245
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_strided_a)1246 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_strided_a) {
1247 TEST_REQUIRES_ARM_SIMD32;
1248 for (size_t k = 8; k <= 40; k += 4) {
1249 GemmMicrokernelTester()
1250 .mr(1)
1251 .nr(1)
1252 .kr(4)
1253 .sr(1)
1254 .m(1)
1255 .n(1)
1256 .k(k)
1257 .a_stride(43)
1258 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1259 }
1260 }
1261
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)1262 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
1263 TEST_REQUIRES_ARM_SIMD32;
1264 for (size_t k = 8; k <= 40; k += 4) {
1265 for (uint32_t n = 1; n <= 1; n++) {
1266 for (uint32_t m = 1; m <= 1; m++) {
1267 GemmMicrokernelTester()
1268 .mr(1)
1269 .nr(1)
1270 .kr(4)
1271 .sr(1)
1272 .m(m)
1273 .n(n)
1274 .k(k)
1275 .iterations(1)
1276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1277 }
1278 }
1279 }
1280 }
1281
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)1282 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
1283 TEST_REQUIRES_ARM_SIMD32;
1284 for (uint32_t n = 2; n < 2; n++) {
1285 for (size_t k = 1; k <= 20; k += 5) {
1286 GemmMicrokernelTester()
1287 .mr(1)
1288 .nr(1)
1289 .kr(4)
1290 .sr(1)
1291 .m(1)
1292 .n(n)
1293 .k(k)
1294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1295 }
1296 }
1297 }
1298
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)1299 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
1300 TEST_REQUIRES_ARM_SIMD32;
1301 for (uint32_t n = 2; n < 2; n++) {
1302 for (size_t k = 1; k <= 20; k += 5) {
1303 GemmMicrokernelTester()
1304 .mr(1)
1305 .nr(1)
1306 .kr(4)
1307 .sr(1)
1308 .m(1)
1309 .n(n)
1310 .k(k)
1311 .cn_stride(3)
1312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1313 }
1314 }
1315 }
1316
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_a)1317 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_a) {
1318 TEST_REQUIRES_ARM_SIMD32;
1319 for (uint32_t n = 2; n < 2; n++) {
1320 for (size_t k = 1; k <= 20; k += 5) {
1321 GemmMicrokernelTester()
1322 .mr(1)
1323 .nr(1)
1324 .kr(4)
1325 .sr(1)
1326 .m(1)
1327 .n(n)
1328 .k(k)
1329 .a_stride(23)
1330 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1331 }
1332 }
1333 }
1334
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)1335 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
1336 TEST_REQUIRES_ARM_SIMD32;
1337 for (uint32_t n = 2; n < 2; n++) {
1338 for (size_t k = 1; k <= 20; k += 5) {
1339 for (uint32_t m = 1; m <= 1; m++) {
1340 GemmMicrokernelTester()
1341 .mr(1)
1342 .nr(1)
1343 .kr(4)
1344 .sr(1)
1345 .m(m)
1346 .n(n)
1347 .k(k)
1348 .iterations(1)
1349 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1350 }
1351 }
1352 }
1353 }
1354
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)1355 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
1356 TEST_REQUIRES_ARM_SIMD32;
1357 for (uint32_t n = 2; n <= 3; n += 1) {
1358 for (size_t k = 1; k <= 20; k += 5) {
1359 GemmMicrokernelTester()
1360 .mr(1)
1361 .nr(1)
1362 .kr(4)
1363 .sr(1)
1364 .m(1)
1365 .n(n)
1366 .k(k)
1367 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1368 }
1369 }
1370 }
1371
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)1372 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
1373 TEST_REQUIRES_ARM_SIMD32;
1374 for (uint32_t n = 2; n <= 3; n += 1) {
1375 for (size_t k = 1; k <= 20; k += 5) {
1376 GemmMicrokernelTester()
1377 .mr(1)
1378 .nr(1)
1379 .kr(4)
1380 .sr(1)
1381 .m(1)
1382 .n(n)
1383 .k(k)
1384 .cn_stride(3)
1385 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1386 }
1387 }
1388 }
1389
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_a)1390 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_a) {
1391 TEST_REQUIRES_ARM_SIMD32;
1392 for (uint32_t n = 2; n <= 3; n += 1) {
1393 for (size_t k = 1; k <= 20; k += 5) {
1394 GemmMicrokernelTester()
1395 .mr(1)
1396 .nr(1)
1397 .kr(4)
1398 .sr(1)
1399 .m(1)
1400 .n(n)
1401 .k(k)
1402 .a_stride(23)
1403 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1404 }
1405 }
1406 }
1407
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)1408 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
1409 TEST_REQUIRES_ARM_SIMD32;
1410 for (uint32_t n = 2; n <= 3; n += 1) {
1411 for (size_t k = 1; k <= 20; k += 5) {
1412 for (uint32_t m = 1; m <= 1; m++) {
1413 GemmMicrokernelTester()
1414 .mr(1)
1415 .nr(1)
1416 .kr(4)
1417 .sr(1)
1418 .m(m)
1419 .n(n)
1420 .k(k)
1421 .iterations(1)
1422 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1423 }
1424 }
1425 }
1426 }
1427
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)1428 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
1429 TEST_REQUIRES_ARM_SIMD32;
1430 for (size_t k = 1; k <= 20; k += 5) {
1431 for (uint32_t n = 1; n <= 1; n++) {
1432 for (uint32_t m = 1; m <= 1; m++) {
1433 GemmMicrokernelTester()
1434 .mr(1)
1435 .nr(1)
1436 .kr(4)
1437 .sr(1)
1438 .m(m)
1439 .n(n)
1440 .k(k)
1441 .cm_stride(3)
1442 .iterations(1)
1443 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1444 }
1445 }
1446 }
1447 }
1448
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)1449 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
1450 TEST_REQUIRES_ARM_SIMD32;
1451 GemmMicrokernelTester()
1452 .mr(1)
1453 .nr(1)
1454 .kr(4)
1455 .sr(1)
1456 .m(1)
1457 .n(1)
1458 .k(4)
1459 .qmin(128)
1460 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1461 }
1462
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)1463 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
1464 TEST_REQUIRES_ARM_SIMD32;
1465 GemmMicrokernelTester()
1466 .mr(1)
1467 .nr(1)
1468 .kr(4)
1469 .sr(1)
1470 .m(1)
1471 .n(1)
1472 .k(4)
1473 .qmax(128)
1474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1475 }
1476
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)1477 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
1478 TEST_REQUIRES_ARM_SIMD32;
1479 GemmMicrokernelTester()
1480 .mr(1)
1481 .nr(1)
1482 .kr(4)
1483 .sr(1)
1484 .m(1)
1485 .n(1)
1486 .k(4)
1487 .cm_stride(3)
1488 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1489 }
1490
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_a_zero_point)1491 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_a_zero_point) {
1492 TEST_REQUIRES_ARM_SIMD32;
1493 for (size_t k = 1; k <= 20; k += 5) {
1494 GemmMicrokernelTester()
1495 .mr(1)
1496 .nr(1)
1497 .kr(4)
1498 .sr(1)
1499 .m(1)
1500 .n(1)
1501 .k(k)
1502 .a_zero_point(0)
1503 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1504 }
1505 }
1506
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_b_zero_point)1507 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_b_zero_point) {
1508 TEST_REQUIRES_ARM_SIMD32;
1509 for (size_t k = 1; k <= 20; k += 5) {
1510 GemmMicrokernelTester()
1511 .mr(1)
1512 .nr(1)
1513 .kr(4)
1514 .sr(1)
1515 .m(1)
1516 .n(1)
1517 .k(k)
1518 .b_zero_point(0)
1519 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1520 }
1521 }
1522
TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_zero_point)1523 TEST(QU8_GEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_zero_point) {
1524 TEST_REQUIRES_ARM_SIMD32;
1525 for (size_t k = 1; k <= 20; k += 5) {
1526 GemmMicrokernelTester()
1527 .mr(1)
1528 .nr(1)
1529 .kr(4)
1530 .sr(1)
1531 .m(1)
1532 .n(1)
1533 .k(k)
1534 .a_zero_point(0)
1535 .b_zero_point(0)
1536 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1537 }
1538 }
1539 #endif // XNN_ARCH_ARM
1540
1541
1542 #if XNN_ARCH_ARM
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)1543 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
1544 TEST_REQUIRES_ARM_SIMD32;
1545 GemmMicrokernelTester()
1546 .mr(2)
1547 .nr(1)
1548 .kr(4)
1549 .sr(1)
1550 .m(2)
1551 .n(1)
1552 .k(4)
1553 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1554 }
1555
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)1556 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
1557 TEST_REQUIRES_ARM_SIMD32;
1558 GemmMicrokernelTester()
1559 .mr(2)
1560 .nr(1)
1561 .kr(4)
1562 .sr(1)
1563 .m(2)
1564 .n(1)
1565 .k(4)
1566 .cn_stride(3)
1567 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1568 }
1569
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_strided_a)1570 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_strided_a) {
1571 TEST_REQUIRES_ARM_SIMD32;
1572 GemmMicrokernelTester()
1573 .mr(2)
1574 .nr(1)
1575 .kr(4)
1576 .sr(1)
1577 .m(2)
1578 .n(1)
1579 .k(4)
1580 .a_stride(7)
1581 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1582 }
1583
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)1584 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
1585 TEST_REQUIRES_ARM_SIMD32;
1586 for (uint32_t n = 1; n <= 1; n++) {
1587 for (uint32_t m = 1; m <= 2; m++) {
1588 GemmMicrokernelTester()
1589 .mr(2)
1590 .nr(1)
1591 .kr(4)
1592 .sr(1)
1593 .m(m)
1594 .n(n)
1595 .k(4)
1596 .iterations(1)
1597 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1598 }
1599 }
1600 }
1601
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)1602 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
1603 TEST_REQUIRES_ARM_SIMD32;
1604 for (uint32_t m = 1; m <= 2; m++) {
1605 GemmMicrokernelTester()
1606 .mr(2)
1607 .nr(1)
1608 .kr(4)
1609 .sr(1)
1610 .m(m)
1611 .n(1)
1612 .k(4)
1613 .iterations(1)
1614 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1615 }
1616 }
1617
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)1618 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
1619 TEST_REQUIRES_ARM_SIMD32;
1620 for (uint32_t n = 1; n <= 1; n++) {
1621 GemmMicrokernelTester()
1622 .mr(2)
1623 .nr(1)
1624 .kr(4)
1625 .sr(1)
1626 .m(2)
1627 .n(n)
1628 .k(4)
1629 .iterations(1)
1630 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1631 }
1632 }
1633
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)1634 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
1635 TEST_REQUIRES_ARM_SIMD32;
1636 for (size_t k = 1; k < 4; k++) {
1637 GemmMicrokernelTester()
1638 .mr(2)
1639 .nr(1)
1640 .kr(4)
1641 .sr(1)
1642 .m(2)
1643 .n(1)
1644 .k(k)
1645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1646 }
1647 }
1648
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_strided_a)1649 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_strided_a) {
1650 TEST_REQUIRES_ARM_SIMD32;
1651 for (size_t k = 1; k < 4; k++) {
1652 GemmMicrokernelTester()
1653 .mr(2)
1654 .nr(1)
1655 .kr(4)
1656 .sr(1)
1657 .m(2)
1658 .n(1)
1659 .k(k)
1660 .a_stride(7)
1661 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1662 }
1663 }
1664
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)1665 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
1666 TEST_REQUIRES_ARM_SIMD32;
1667 for (size_t k = 1; k < 4; k++) {
1668 for (uint32_t n = 1; n <= 1; n++) {
1669 for (uint32_t m = 1; m <= 2; m++) {
1670 GemmMicrokernelTester()
1671 .mr(2)
1672 .nr(1)
1673 .kr(4)
1674 .sr(1)
1675 .m(m)
1676 .n(n)
1677 .k(k)
1678 .iterations(1)
1679 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1680 }
1681 }
1682 }
1683 }
1684
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)1685 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
1686 TEST_REQUIRES_ARM_SIMD32;
1687 for (size_t k = 5; k < 8; k++) {
1688 GemmMicrokernelTester()
1689 .mr(2)
1690 .nr(1)
1691 .kr(4)
1692 .sr(1)
1693 .m(2)
1694 .n(1)
1695 .k(k)
1696 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1697 }
1698 }
1699
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_strided_a)1700 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_strided_a) {
1701 TEST_REQUIRES_ARM_SIMD32;
1702 for (size_t k = 5; k < 8; k++) {
1703 GemmMicrokernelTester()
1704 .mr(2)
1705 .nr(1)
1706 .kr(4)
1707 .sr(1)
1708 .m(2)
1709 .n(1)
1710 .k(k)
1711 .a_stride(11)
1712 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1713 }
1714 }
1715
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)1716 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
1717 TEST_REQUIRES_ARM_SIMD32;
1718 for (size_t k = 5; k < 8; k++) {
1719 for (uint32_t n = 1; n <= 1; n++) {
1720 for (uint32_t m = 1; m <= 2; m++) {
1721 GemmMicrokernelTester()
1722 .mr(2)
1723 .nr(1)
1724 .kr(4)
1725 .sr(1)
1726 .m(m)
1727 .n(n)
1728 .k(k)
1729 .iterations(1)
1730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1731 }
1732 }
1733 }
1734 }
1735
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)1736 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
1737 TEST_REQUIRES_ARM_SIMD32;
1738 for (size_t k = 8; k <= 40; k += 4) {
1739 GemmMicrokernelTester()
1740 .mr(2)
1741 .nr(1)
1742 .kr(4)
1743 .sr(1)
1744 .m(2)
1745 .n(1)
1746 .k(k)
1747 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1748 }
1749 }
1750
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_strided_a)1751 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_strided_a) {
1752 TEST_REQUIRES_ARM_SIMD32;
1753 for (size_t k = 8; k <= 40; k += 4) {
1754 GemmMicrokernelTester()
1755 .mr(2)
1756 .nr(1)
1757 .kr(4)
1758 .sr(1)
1759 .m(2)
1760 .n(1)
1761 .k(k)
1762 .a_stride(43)
1763 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1764 }
1765 }
1766
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)1767 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
1768 TEST_REQUIRES_ARM_SIMD32;
1769 for (size_t k = 8; k <= 40; k += 4) {
1770 for (uint32_t n = 1; n <= 1; n++) {
1771 for (uint32_t m = 1; m <= 2; m++) {
1772 GemmMicrokernelTester()
1773 .mr(2)
1774 .nr(1)
1775 .kr(4)
1776 .sr(1)
1777 .m(m)
1778 .n(n)
1779 .k(k)
1780 .iterations(1)
1781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1782 }
1783 }
1784 }
1785 }
1786
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)1787 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
1788 TEST_REQUIRES_ARM_SIMD32;
1789 for (uint32_t n = 2; n < 2; n++) {
1790 for (size_t k = 1; k <= 20; k += 5) {
1791 GemmMicrokernelTester()
1792 .mr(2)
1793 .nr(1)
1794 .kr(4)
1795 .sr(1)
1796 .m(2)
1797 .n(n)
1798 .k(k)
1799 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1800 }
1801 }
1802 }
1803
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)1804 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
1805 TEST_REQUIRES_ARM_SIMD32;
1806 for (uint32_t n = 2; n < 2; n++) {
1807 for (size_t k = 1; k <= 20; k += 5) {
1808 GemmMicrokernelTester()
1809 .mr(2)
1810 .nr(1)
1811 .kr(4)
1812 .sr(1)
1813 .m(2)
1814 .n(n)
1815 .k(k)
1816 .cn_stride(3)
1817 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1818 }
1819 }
1820 }
1821
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_a)1822 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_a) {
1823 TEST_REQUIRES_ARM_SIMD32;
1824 for (uint32_t n = 2; n < 2; n++) {
1825 for (size_t k = 1; k <= 20; k += 5) {
1826 GemmMicrokernelTester()
1827 .mr(2)
1828 .nr(1)
1829 .kr(4)
1830 .sr(1)
1831 .m(2)
1832 .n(n)
1833 .k(k)
1834 .a_stride(23)
1835 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1836 }
1837 }
1838 }
1839
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)1840 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
1841 TEST_REQUIRES_ARM_SIMD32;
1842 for (uint32_t n = 2; n < 2; n++) {
1843 for (size_t k = 1; k <= 20; k += 5) {
1844 for (uint32_t m = 1; m <= 2; m++) {
1845 GemmMicrokernelTester()
1846 .mr(2)
1847 .nr(1)
1848 .kr(4)
1849 .sr(1)
1850 .m(m)
1851 .n(n)
1852 .k(k)
1853 .iterations(1)
1854 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1855 }
1856 }
1857 }
1858 }
1859
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)1860 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
1861 TEST_REQUIRES_ARM_SIMD32;
1862 for (uint32_t n = 2; n <= 3; n += 1) {
1863 for (size_t k = 1; k <= 20; k += 5) {
1864 GemmMicrokernelTester()
1865 .mr(2)
1866 .nr(1)
1867 .kr(4)
1868 .sr(1)
1869 .m(2)
1870 .n(n)
1871 .k(k)
1872 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1873 }
1874 }
1875 }
1876
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)1877 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
1878 TEST_REQUIRES_ARM_SIMD32;
1879 for (uint32_t n = 2; n <= 3; n += 1) {
1880 for (size_t k = 1; k <= 20; k += 5) {
1881 GemmMicrokernelTester()
1882 .mr(2)
1883 .nr(1)
1884 .kr(4)
1885 .sr(1)
1886 .m(2)
1887 .n(n)
1888 .k(k)
1889 .cn_stride(3)
1890 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1891 }
1892 }
1893 }
1894
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_a)1895 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_a) {
1896 TEST_REQUIRES_ARM_SIMD32;
1897 for (uint32_t n = 2; n <= 3; n += 1) {
1898 for (size_t k = 1; k <= 20; k += 5) {
1899 GemmMicrokernelTester()
1900 .mr(2)
1901 .nr(1)
1902 .kr(4)
1903 .sr(1)
1904 .m(2)
1905 .n(n)
1906 .k(k)
1907 .a_stride(23)
1908 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1909 }
1910 }
1911 }
1912
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)1913 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
1914 TEST_REQUIRES_ARM_SIMD32;
1915 for (uint32_t n = 2; n <= 3; n += 1) {
1916 for (size_t k = 1; k <= 20; k += 5) {
1917 for (uint32_t m = 1; m <= 2; m++) {
1918 GemmMicrokernelTester()
1919 .mr(2)
1920 .nr(1)
1921 .kr(4)
1922 .sr(1)
1923 .m(m)
1924 .n(n)
1925 .k(k)
1926 .iterations(1)
1927 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1928 }
1929 }
1930 }
1931 }
1932
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)1933 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
1934 TEST_REQUIRES_ARM_SIMD32;
1935 for (size_t k = 1; k <= 20; k += 5) {
1936 for (uint32_t n = 1; n <= 1; n++) {
1937 for (uint32_t m = 1; m <= 2; m++) {
1938 GemmMicrokernelTester()
1939 .mr(2)
1940 .nr(1)
1941 .kr(4)
1942 .sr(1)
1943 .m(m)
1944 .n(n)
1945 .k(k)
1946 .cm_stride(3)
1947 .iterations(1)
1948 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1949 }
1950 }
1951 }
1952 }
1953
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)1954 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
1955 TEST_REQUIRES_ARM_SIMD32;
1956 GemmMicrokernelTester()
1957 .mr(2)
1958 .nr(1)
1959 .kr(4)
1960 .sr(1)
1961 .m(2)
1962 .n(1)
1963 .k(4)
1964 .qmin(128)
1965 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1966 }
1967
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)1968 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
1969 TEST_REQUIRES_ARM_SIMD32;
1970 GemmMicrokernelTester()
1971 .mr(2)
1972 .nr(1)
1973 .kr(4)
1974 .sr(1)
1975 .m(2)
1976 .n(1)
1977 .k(4)
1978 .qmax(128)
1979 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1980 }
1981
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)1982 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
1983 TEST_REQUIRES_ARM_SIMD32;
1984 GemmMicrokernelTester()
1985 .mr(2)
1986 .nr(1)
1987 .kr(4)
1988 .sr(1)
1989 .m(2)
1990 .n(1)
1991 .k(4)
1992 .cm_stride(3)
1993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1994 }
1995
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_a_zero_point)1996 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_a_zero_point) {
1997 TEST_REQUIRES_ARM_SIMD32;
1998 for (size_t k = 1; k <= 20; k += 5) {
1999 GemmMicrokernelTester()
2000 .mr(2)
2001 .nr(1)
2002 .kr(4)
2003 .sr(1)
2004 .m(2)
2005 .n(1)
2006 .k(k)
2007 .a_zero_point(0)
2008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
2009 }
2010 }
2011
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_b_zero_point)2012 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_b_zero_point) {
2013 TEST_REQUIRES_ARM_SIMD32;
2014 for (size_t k = 1; k <= 20; k += 5) {
2015 GemmMicrokernelTester()
2016 .mr(2)
2017 .nr(1)
2018 .kr(4)
2019 .sr(1)
2020 .m(2)
2021 .n(1)
2022 .k(k)
2023 .b_zero_point(0)
2024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
2025 }
2026 }
2027
TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_zero_point)2028 TEST(QU8_GEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_zero_point) {
2029 TEST_REQUIRES_ARM_SIMD32;
2030 for (size_t k = 1; k <= 20; k += 5) {
2031 GemmMicrokernelTester()
2032 .mr(2)
2033 .nr(1)
2034 .kr(4)
2035 .sr(1)
2036 .m(2)
2037 .n(1)
2038 .k(k)
2039 .a_zero_point(0)
2040 .b_zero_point(0)
2041 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
2042 }
2043 }
2044 #endif // XNN_ARCH_ARM
2045
2046
2047 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8)2048 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
2049 TEST_REQUIRES_ARM_NEON_DOT;
2050 GemmMicrokernelTester()
2051 .mr(1)
2052 .nr(16)
2053 .kr(4)
2054 .sr(1)
2055 .m(1)
2056 .n(16)
2057 .k(8)
2058 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2059 }
2060
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cn)2061 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
2062 TEST_REQUIRES_ARM_NEON_DOT;
2063 GemmMicrokernelTester()
2064 .mr(1)
2065 .nr(16)
2066 .kr(4)
2067 .sr(1)
2068 .m(1)
2069 .n(16)
2070 .k(8)
2071 .cn_stride(19)
2072 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2073 }
2074
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_strided_a)2075 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_strided_a) {
2076 TEST_REQUIRES_ARM_NEON_DOT;
2077 GemmMicrokernelTester()
2078 .mr(1)
2079 .nr(16)
2080 .kr(4)
2081 .sr(1)
2082 .m(1)
2083 .n(16)
2084 .k(8)
2085 .a_stride(11)
2086 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2087 }
2088
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile)2089 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
2090 TEST_REQUIRES_ARM_NEON_DOT;
2091 for (uint32_t n = 1; n <= 16; n++) {
2092 for (uint32_t m = 1; m <= 1; m++) {
2093 GemmMicrokernelTester()
2094 .mr(1)
2095 .nr(16)
2096 .kr(4)
2097 .sr(1)
2098 .m(m)
2099 .n(n)
2100 .k(8)
2101 .iterations(1)
2102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2103 }
2104 }
2105 }
2106
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_m)2107 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
2108 TEST_REQUIRES_ARM_NEON_DOT;
2109 for (uint32_t m = 1; m <= 1; m++) {
2110 GemmMicrokernelTester()
2111 .mr(1)
2112 .nr(16)
2113 .kr(4)
2114 .sr(1)
2115 .m(m)
2116 .n(16)
2117 .k(8)
2118 .iterations(1)
2119 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2120 }
2121 }
2122
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_n)2123 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
2124 TEST_REQUIRES_ARM_NEON_DOT;
2125 for (uint32_t n = 1; n <= 16; n++) {
2126 GemmMicrokernelTester()
2127 .mr(1)
2128 .nr(16)
2129 .kr(4)
2130 .sr(1)
2131 .m(1)
2132 .n(n)
2133 .k(8)
2134 .iterations(1)
2135 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2136 }
2137 }
2138
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8)2139 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
2140 TEST_REQUIRES_ARM_NEON_DOT;
2141 for (size_t k = 1; k < 8; k++) {
2142 GemmMicrokernelTester()
2143 .mr(1)
2144 .nr(16)
2145 .kr(4)
2146 .sr(1)
2147 .m(1)
2148 .n(16)
2149 .k(k)
2150 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2151 }
2152 }
2153
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_strided_a)2154 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_strided_a) {
2155 TEST_REQUIRES_ARM_NEON_DOT;
2156 for (size_t k = 1; k < 8; k++) {
2157 GemmMicrokernelTester()
2158 .mr(1)
2159 .nr(16)
2160 .kr(4)
2161 .sr(1)
2162 .m(1)
2163 .n(16)
2164 .k(k)
2165 .a_stride(11)
2166 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2167 }
2168 }
2169
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_subtile)2170 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
2171 TEST_REQUIRES_ARM_NEON_DOT;
2172 for (size_t k = 1; k < 8; k++) {
2173 for (uint32_t n = 1; n <= 16; n++) {
2174 for (uint32_t m = 1; m <= 1; m++) {
2175 GemmMicrokernelTester()
2176 .mr(1)
2177 .nr(16)
2178 .kr(4)
2179 .sr(1)
2180 .m(m)
2181 .n(n)
2182 .k(k)
2183 .iterations(1)
2184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2185 }
2186 }
2187 }
2188 }
2189
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8)2190 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
2191 TEST_REQUIRES_ARM_NEON_DOT;
2192 for (size_t k = 9; k < 16; k++) {
2193 GemmMicrokernelTester()
2194 .mr(1)
2195 .nr(16)
2196 .kr(4)
2197 .sr(1)
2198 .m(1)
2199 .n(16)
2200 .k(k)
2201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2202 }
2203 }
2204
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_strided_a)2205 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_strided_a) {
2206 TEST_REQUIRES_ARM_NEON_DOT;
2207 for (size_t k = 9; k < 16; k++) {
2208 GemmMicrokernelTester()
2209 .mr(1)
2210 .nr(16)
2211 .kr(4)
2212 .sr(1)
2213 .m(1)
2214 .n(16)
2215 .k(k)
2216 .a_stride(19)
2217 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2218 }
2219 }
2220
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_subtile)2221 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
2222 TEST_REQUIRES_ARM_NEON_DOT;
2223 for (size_t k = 9; k < 16; k++) {
2224 for (uint32_t n = 1; n <= 16; n++) {
2225 for (uint32_t m = 1; m <= 1; m++) {
2226 GemmMicrokernelTester()
2227 .mr(1)
2228 .nr(16)
2229 .kr(4)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
2235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2236 }
2237 }
2238 }
2239 }
2240
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8)2241 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
2242 TEST_REQUIRES_ARM_NEON_DOT;
2243 for (size_t k = 16; k <= 80; k += 8) {
2244 GemmMicrokernelTester()
2245 .mr(1)
2246 .nr(16)
2247 .kr(4)
2248 .sr(1)
2249 .m(1)
2250 .n(16)
2251 .k(k)
2252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2253 }
2254 }
2255
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_strided_a)2256 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_strided_a) {
2257 TEST_REQUIRES_ARM_NEON_DOT;
2258 for (size_t k = 16; k <= 80; k += 8) {
2259 GemmMicrokernelTester()
2260 .mr(1)
2261 .nr(16)
2262 .kr(4)
2263 .sr(1)
2264 .m(1)
2265 .n(16)
2266 .k(k)
2267 .a_stride(83)
2268 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2269 }
2270 }
2271
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_subtile)2272 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
2273 TEST_REQUIRES_ARM_NEON_DOT;
2274 for (size_t k = 16; k <= 80; k += 8) {
2275 for (uint32_t n = 1; n <= 16; n++) {
2276 for (uint32_t m = 1; m <= 1; m++) {
2277 GemmMicrokernelTester()
2278 .mr(1)
2279 .nr(16)
2280 .kr(4)
2281 .sr(1)
2282 .m(m)
2283 .n(n)
2284 .k(k)
2285 .iterations(1)
2286 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2287 }
2288 }
2289 }
2290 }
2291
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16)2292 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
2293 TEST_REQUIRES_ARM_NEON_DOT;
2294 for (uint32_t n = 17; n < 32; n++) {
2295 for (size_t k = 1; k <= 40; k += 9) {
2296 GemmMicrokernelTester()
2297 .mr(1)
2298 .nr(16)
2299 .kr(4)
2300 .sr(1)
2301 .m(1)
2302 .n(n)
2303 .k(k)
2304 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2305 }
2306 }
2307 }
2308
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_cn)2309 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
2310 TEST_REQUIRES_ARM_NEON_DOT;
2311 for (uint32_t n = 17; n < 32; n++) {
2312 for (size_t k = 1; k <= 40; k += 9) {
2313 GemmMicrokernelTester()
2314 .mr(1)
2315 .nr(16)
2316 .kr(4)
2317 .sr(1)
2318 .m(1)
2319 .n(n)
2320 .k(k)
2321 .cn_stride(19)
2322 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2323 }
2324 }
2325 }
2326
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_a)2327 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_a) {
2328 TEST_REQUIRES_ARM_NEON_DOT;
2329 for (uint32_t n = 17; n < 32; n++) {
2330 for (size_t k = 1; k <= 40; k += 9) {
2331 GemmMicrokernelTester()
2332 .mr(1)
2333 .nr(16)
2334 .kr(4)
2335 .sr(1)
2336 .m(1)
2337 .n(n)
2338 .k(k)
2339 .a_stride(43)
2340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2341 }
2342 }
2343 }
2344
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_subtile)2345 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
2346 TEST_REQUIRES_ARM_NEON_DOT;
2347 for (uint32_t n = 17; n < 32; n++) {
2348 for (size_t k = 1; k <= 40; k += 9) {
2349 for (uint32_t m = 1; m <= 1; m++) {
2350 GemmMicrokernelTester()
2351 .mr(1)
2352 .nr(16)
2353 .kr(4)
2354 .sr(1)
2355 .m(m)
2356 .n(n)
2357 .k(k)
2358 .iterations(1)
2359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2360 }
2361 }
2362 }
2363 }
2364
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16)2365 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
2366 TEST_REQUIRES_ARM_NEON_DOT;
2367 for (uint32_t n = 32; n <= 48; n += 16) {
2368 for (size_t k = 1; k <= 40; k += 9) {
2369 GemmMicrokernelTester()
2370 .mr(1)
2371 .nr(16)
2372 .kr(4)
2373 .sr(1)
2374 .m(1)
2375 .n(n)
2376 .k(k)
2377 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2378 }
2379 }
2380 }
2381
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_cn)2382 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
2383 TEST_REQUIRES_ARM_NEON_DOT;
2384 for (uint32_t n = 32; n <= 48; n += 16) {
2385 for (size_t k = 1; k <= 40; k += 9) {
2386 GemmMicrokernelTester()
2387 .mr(1)
2388 .nr(16)
2389 .kr(4)
2390 .sr(1)
2391 .m(1)
2392 .n(n)
2393 .k(k)
2394 .cn_stride(19)
2395 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2396 }
2397 }
2398 }
2399
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_a)2400 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_a) {
2401 TEST_REQUIRES_ARM_NEON_DOT;
2402 for (uint32_t n = 32; n <= 48; n += 16) {
2403 for (size_t k = 1; k <= 40; k += 9) {
2404 GemmMicrokernelTester()
2405 .mr(1)
2406 .nr(16)
2407 .kr(4)
2408 .sr(1)
2409 .m(1)
2410 .n(n)
2411 .k(k)
2412 .a_stride(43)
2413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2414 }
2415 }
2416 }
2417
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_subtile)2418 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
2419 TEST_REQUIRES_ARM_NEON_DOT;
2420 for (uint32_t n = 32; n <= 48; n += 16) {
2421 for (size_t k = 1; k <= 40; k += 9) {
2422 for (uint32_t m = 1; m <= 1; m++) {
2423 GemmMicrokernelTester()
2424 .mr(1)
2425 .nr(16)
2426 .kr(4)
2427 .sr(1)
2428 .m(m)
2429 .n(n)
2430 .k(k)
2431 .iterations(1)
2432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2433 }
2434 }
2435 }
2436 }
2437
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm_subtile)2438 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
2439 TEST_REQUIRES_ARM_NEON_DOT;
2440 for (size_t k = 1; k <= 40; k += 9) {
2441 for (uint32_t n = 1; n <= 16; n++) {
2442 for (uint32_t m = 1; m <= 1; m++) {
2443 GemmMicrokernelTester()
2444 .mr(1)
2445 .nr(16)
2446 .kr(4)
2447 .sr(1)
2448 .m(m)
2449 .n(n)
2450 .k(k)
2451 .cm_stride(19)
2452 .iterations(1)
2453 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2454 }
2455 }
2456 }
2457 }
2458
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,qmin)2459 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
2460 TEST_REQUIRES_ARM_NEON_DOT;
2461 GemmMicrokernelTester()
2462 .mr(1)
2463 .nr(16)
2464 .kr(4)
2465 .sr(1)
2466 .m(1)
2467 .n(16)
2468 .k(8)
2469 .qmin(128)
2470 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2471 }
2472
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,qmax)2473 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
2474 TEST_REQUIRES_ARM_NEON_DOT;
2475 GemmMicrokernelTester()
2476 .mr(1)
2477 .nr(16)
2478 .kr(4)
2479 .sr(1)
2480 .m(1)
2481 .n(16)
2482 .k(8)
2483 .qmax(128)
2484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2485 }
2486
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm)2487 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
2488 TEST_REQUIRES_ARM_NEON_DOT;
2489 GemmMicrokernelTester()
2490 .mr(1)
2491 .nr(16)
2492 .kr(4)
2493 .sr(1)
2494 .m(1)
2495 .n(16)
2496 .k(8)
2497 .cm_stride(19)
2498 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2499 }
2500
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,no_a_zero_point)2501 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_a_zero_point) {
2502 TEST_REQUIRES_ARM_NEON_DOT;
2503 for (size_t k = 1; k <= 40; k += 9) {
2504 GemmMicrokernelTester()
2505 .mr(1)
2506 .nr(16)
2507 .kr(4)
2508 .sr(1)
2509 .m(1)
2510 .n(16)
2511 .k(k)
2512 .a_zero_point(0)
2513 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2514 }
2515 }
2516
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,no_b_zero_point)2517 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_b_zero_point) {
2518 TEST_REQUIRES_ARM_NEON_DOT;
2519 for (size_t k = 1; k <= 40; k += 9) {
2520 GemmMicrokernelTester()
2521 .mr(1)
2522 .nr(16)
2523 .kr(4)
2524 .sr(1)
2525 .m(1)
2526 .n(16)
2527 .k(k)
2528 .b_zero_point(0)
2529 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2530 }
2531 }
2532
TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT,no_zero_point)2533 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_zero_point) {
2534 TEST_REQUIRES_ARM_NEON_DOT;
2535 for (size_t k = 1; k <= 40; k += 9) {
2536 GemmMicrokernelTester()
2537 .mr(1)
2538 .nr(16)
2539 .kr(4)
2540 .sr(1)
2541 .m(1)
2542 .n(16)
2543 .k(k)
2544 .a_zero_point(0)
2545 .b_zero_point(0)
2546 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2547 }
2548 }
2549 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
2550
2551
2552 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)2553 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
2554 TEST_REQUIRES_ARM_NEON;
2555 GemmMicrokernelTester()
2556 .mr(4)
2557 .nr(16)
2558 .kr(1)
2559 .sr(1)
2560 .m(4)
2561 .n(16)
2562 .k(8)
2563 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2564 }
2565
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)2566 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
2567 TEST_REQUIRES_ARM_NEON;
2568 GemmMicrokernelTester()
2569 .mr(4)
2570 .nr(16)
2571 .kr(1)
2572 .sr(1)
2573 .m(4)
2574 .n(16)
2575 .k(8)
2576 .cn_stride(19)
2577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2578 }
2579
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_strided_a)2580 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
2581 TEST_REQUIRES_ARM_NEON;
2582 GemmMicrokernelTester()
2583 .mr(4)
2584 .nr(16)
2585 .kr(1)
2586 .sr(1)
2587 .m(4)
2588 .n(16)
2589 .k(8)
2590 .a_stride(11)
2591 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2592 }
2593
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)2594 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2595 TEST_REQUIRES_ARM_NEON;
2596 for (uint32_t n = 1; n <= 16; n++) {
2597 for (uint32_t m = 1; m <= 4; m++) {
2598 GemmMicrokernelTester()
2599 .mr(4)
2600 .nr(16)
2601 .kr(1)
2602 .sr(1)
2603 .m(m)
2604 .n(n)
2605 .k(8)
2606 .iterations(1)
2607 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2608 }
2609 }
2610 }
2611
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2612 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2613 TEST_REQUIRES_ARM_NEON;
2614 for (uint32_t m = 1; m <= 4; m++) {
2615 GemmMicrokernelTester()
2616 .mr(4)
2617 .nr(16)
2618 .kr(1)
2619 .sr(1)
2620 .m(m)
2621 .n(16)
2622 .k(8)
2623 .iterations(1)
2624 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2625 }
2626 }
2627
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2628 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2629 TEST_REQUIRES_ARM_NEON;
2630 for (uint32_t n = 1; n <= 16; n++) {
2631 GemmMicrokernelTester()
2632 .mr(4)
2633 .nr(16)
2634 .kr(1)
2635 .sr(1)
2636 .m(4)
2637 .n(n)
2638 .k(8)
2639 .iterations(1)
2640 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2641 }
2642 }
2643
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)2644 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
2645 TEST_REQUIRES_ARM_NEON;
2646 for (size_t k = 1; k < 8; k++) {
2647 GemmMicrokernelTester()
2648 .mr(4)
2649 .nr(16)
2650 .kr(1)
2651 .sr(1)
2652 .m(4)
2653 .n(16)
2654 .k(k)
2655 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2656 }
2657 }
2658
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_strided_a)2659 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
2660 TEST_REQUIRES_ARM_NEON;
2661 for (size_t k = 1; k < 8; k++) {
2662 GemmMicrokernelTester()
2663 .mr(4)
2664 .nr(16)
2665 .kr(1)
2666 .sr(1)
2667 .m(4)
2668 .n(16)
2669 .k(k)
2670 .a_stride(11)
2671 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2672 }
2673 }
2674
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)2675 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2676 TEST_REQUIRES_ARM_NEON;
2677 for (size_t k = 1; k < 8; k++) {
2678 for (uint32_t n = 1; n <= 16; n++) {
2679 for (uint32_t m = 1; m <= 4; m++) {
2680 GemmMicrokernelTester()
2681 .mr(4)
2682 .nr(16)
2683 .kr(1)
2684 .sr(1)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .iterations(1)
2689 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2690 }
2691 }
2692 }
2693 }
2694
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)2695 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
2696 TEST_REQUIRES_ARM_NEON;
2697 for (size_t k = 9; k < 16; k++) {
2698 GemmMicrokernelTester()
2699 .mr(4)
2700 .nr(16)
2701 .kr(1)
2702 .sr(1)
2703 .m(4)
2704 .n(16)
2705 .k(k)
2706 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2707 }
2708 }
2709
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_strided_a)2710 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
2711 TEST_REQUIRES_ARM_NEON;
2712 for (size_t k = 9; k < 16; k++) {
2713 GemmMicrokernelTester()
2714 .mr(4)
2715 .nr(16)
2716 .kr(1)
2717 .sr(1)
2718 .m(4)
2719 .n(16)
2720 .k(k)
2721 .a_stride(19)
2722 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2723 }
2724 }
2725
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)2726 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2727 TEST_REQUIRES_ARM_NEON;
2728 for (size_t k = 9; k < 16; k++) {
2729 for (uint32_t n = 1; n <= 16; n++) {
2730 for (uint32_t m = 1; m <= 4; m++) {
2731 GemmMicrokernelTester()
2732 .mr(4)
2733 .nr(16)
2734 .kr(1)
2735 .sr(1)
2736 .m(m)
2737 .n(n)
2738 .k(k)
2739 .iterations(1)
2740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2741 }
2742 }
2743 }
2744 }
2745
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)2746 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
2747 TEST_REQUIRES_ARM_NEON;
2748 for (size_t k = 16; k <= 80; k += 8) {
2749 GemmMicrokernelTester()
2750 .mr(4)
2751 .nr(16)
2752 .kr(1)
2753 .sr(1)
2754 .m(4)
2755 .n(16)
2756 .k(k)
2757 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2758 }
2759 }
2760
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_strided_a)2761 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
2762 TEST_REQUIRES_ARM_NEON;
2763 for (size_t k = 16; k <= 80; k += 8) {
2764 GemmMicrokernelTester()
2765 .mr(4)
2766 .nr(16)
2767 .kr(1)
2768 .sr(1)
2769 .m(4)
2770 .n(16)
2771 .k(k)
2772 .a_stride(83)
2773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2774 }
2775 }
2776
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)2777 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2778 TEST_REQUIRES_ARM_NEON;
2779 for (size_t k = 16; k <= 80; k += 8) {
2780 for (uint32_t n = 1; n <= 16; n++) {
2781 for (uint32_t m = 1; m <= 4; m++) {
2782 GemmMicrokernelTester()
2783 .mr(4)
2784 .nr(16)
2785 .kr(1)
2786 .sr(1)
2787 .m(m)
2788 .n(n)
2789 .k(k)
2790 .iterations(1)
2791 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2792 }
2793 }
2794 }
2795 }
2796
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)2797 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
2798 TEST_REQUIRES_ARM_NEON;
2799 for (uint32_t n = 17; n < 32; n++) {
2800 for (size_t k = 1; k <= 40; k += 9) {
2801 GemmMicrokernelTester()
2802 .mr(4)
2803 .nr(16)
2804 .kr(1)
2805 .sr(1)
2806 .m(4)
2807 .n(n)
2808 .k(k)
2809 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2810 }
2811 }
2812 }
2813
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2814 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2815 TEST_REQUIRES_ARM_NEON;
2816 for (uint32_t n = 17; n < 32; n++) {
2817 for (size_t k = 1; k <= 40; k += 9) {
2818 GemmMicrokernelTester()
2819 .mr(4)
2820 .nr(16)
2821 .kr(1)
2822 .sr(1)
2823 .m(4)
2824 .n(n)
2825 .k(k)
2826 .cn_stride(19)
2827 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2828 }
2829 }
2830 }
2831
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_a)2832 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
2833 TEST_REQUIRES_ARM_NEON;
2834 for (uint32_t n = 17; n < 32; n++) {
2835 for (size_t k = 1; k <= 40; k += 9) {
2836 GemmMicrokernelTester()
2837 .mr(4)
2838 .nr(16)
2839 .kr(1)
2840 .sr(1)
2841 .m(4)
2842 .n(n)
2843 .k(k)
2844 .a_stride(43)
2845 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2846 }
2847 }
2848 }
2849
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)2850 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2851 TEST_REQUIRES_ARM_NEON;
2852 for (uint32_t n = 17; n < 32; n++) {
2853 for (size_t k = 1; k <= 40; k += 9) {
2854 for (uint32_t m = 1; m <= 4; m++) {
2855 GemmMicrokernelTester()
2856 .mr(4)
2857 .nr(16)
2858 .kr(1)
2859 .sr(1)
2860 .m(m)
2861 .n(n)
2862 .k(k)
2863 .iterations(1)
2864 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2865 }
2866 }
2867 }
2868 }
2869
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)2870 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
2871 TEST_REQUIRES_ARM_NEON;
2872 for (uint32_t n = 32; n <= 48; n += 16) {
2873 for (size_t k = 1; k <= 40; k += 9) {
2874 GemmMicrokernelTester()
2875 .mr(4)
2876 .nr(16)
2877 .kr(1)
2878 .sr(1)
2879 .m(4)
2880 .n(n)
2881 .k(k)
2882 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2883 }
2884 }
2885 }
2886
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)2887 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2888 TEST_REQUIRES_ARM_NEON;
2889 for (uint32_t n = 32; n <= 48; n += 16) {
2890 for (size_t k = 1; k <= 40; k += 9) {
2891 GemmMicrokernelTester()
2892 .mr(4)
2893 .nr(16)
2894 .kr(1)
2895 .sr(1)
2896 .m(4)
2897 .n(n)
2898 .k(k)
2899 .cn_stride(19)
2900 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2901 }
2902 }
2903 }
2904
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_a)2905 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
2906 TEST_REQUIRES_ARM_NEON;
2907 for (uint32_t n = 32; n <= 48; n += 16) {
2908 for (size_t k = 1; k <= 40; k += 9) {
2909 GemmMicrokernelTester()
2910 .mr(4)
2911 .nr(16)
2912 .kr(1)
2913 .sr(1)
2914 .m(4)
2915 .n(n)
2916 .k(k)
2917 .a_stride(43)
2918 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2919 }
2920 }
2921 }
2922
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)2923 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2924 TEST_REQUIRES_ARM_NEON;
2925 for (uint32_t n = 32; n <= 48; n += 16) {
2926 for (size_t k = 1; k <= 40; k += 9) {
2927 for (uint32_t m = 1; m <= 4; m++) {
2928 GemmMicrokernelTester()
2929 .mr(4)
2930 .nr(16)
2931 .kr(1)
2932 .sr(1)
2933 .m(m)
2934 .n(n)
2935 .k(k)
2936 .iterations(1)
2937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2938 }
2939 }
2940 }
2941 }
2942
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)2943 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2944 TEST_REQUIRES_ARM_NEON;
2945 for (size_t k = 1; k <= 40; k += 9) {
2946 for (uint32_t n = 1; n <= 16; n++) {
2947 for (uint32_t m = 1; m <= 4; m++) {
2948 GemmMicrokernelTester()
2949 .mr(4)
2950 .nr(16)
2951 .kr(1)
2952 .sr(1)
2953 .m(m)
2954 .n(n)
2955 .k(k)
2956 .cm_stride(19)
2957 .iterations(1)
2958 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2959 }
2960 }
2961 }
2962 }
2963
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)2964 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
2965 TEST_REQUIRES_ARM_NEON;
2966 GemmMicrokernelTester()
2967 .mr(4)
2968 .nr(16)
2969 .kr(1)
2970 .sr(1)
2971 .m(4)
2972 .n(16)
2973 .k(8)
2974 .qmin(128)
2975 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2976 }
2977
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)2978 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
2979 TEST_REQUIRES_ARM_NEON;
2980 GemmMicrokernelTester()
2981 .mr(4)
2982 .nr(16)
2983 .kr(1)
2984 .sr(1)
2985 .m(4)
2986 .n(16)
2987 .k(8)
2988 .qmax(128)
2989 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2990 }
2991
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)2992 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
2993 TEST_REQUIRES_ARM_NEON;
2994 GemmMicrokernelTester()
2995 .mr(4)
2996 .nr(16)
2997 .kr(1)
2998 .sr(1)
2999 .m(4)
3000 .n(16)
3001 .k(8)
3002 .cm_stride(19)
3003 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3004 }
3005
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_a_zero_point)3006 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
3007 TEST_REQUIRES_ARM_NEON;
3008 for (size_t k = 1; k <= 40; k += 9) {
3009 GemmMicrokernelTester()
3010 .mr(4)
3011 .nr(16)
3012 .kr(1)
3013 .sr(1)
3014 .m(4)
3015 .n(16)
3016 .k(k)
3017 .a_zero_point(0)
3018 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3019 }
3020 }
3021
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_b_zero_point)3022 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
3023 TEST_REQUIRES_ARM_NEON;
3024 for (size_t k = 1; k <= 40; k += 9) {
3025 GemmMicrokernelTester()
3026 .mr(4)
3027 .nr(16)
3028 .kr(1)
3029 .sr(1)
3030 .m(4)
3031 .n(16)
3032 .k(k)
3033 .b_zero_point(0)
3034 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3035 }
3036 }
3037
TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_zero_point)3038 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
3039 TEST_REQUIRES_ARM_NEON;
3040 for (size_t k = 1; k <= 40; k += 9) {
3041 GemmMicrokernelTester()
3042 .mr(4)
3043 .nr(16)
3044 .kr(1)
3045 .sr(1)
3046 .m(4)
3047 .n(16)
3048 .k(k)
3049 .a_zero_point(0)
3050 .b_zero_point(0)
3051 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3052 }
3053 }
3054 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3055
3056
3057 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)3058 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
3059 TEST_REQUIRES_X86_SSE2;
3060 GemmMicrokernelTester()
3061 .mr(3)
3062 .nr(4)
3063 .kr(2)
3064 .sr(1)
3065 .m(3)
3066 .n(4)
3067 .k(8)
3068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3069 }
3070
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)3071 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
3072 TEST_REQUIRES_X86_SSE2;
3073 GemmMicrokernelTester()
3074 .mr(3)
3075 .nr(4)
3076 .kr(2)
3077 .sr(1)
3078 .m(3)
3079 .n(4)
3080 .k(8)
3081 .cn_stride(7)
3082 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3083 }
3084
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_strided_a)3085 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
3086 TEST_REQUIRES_X86_SSE2;
3087 GemmMicrokernelTester()
3088 .mr(3)
3089 .nr(4)
3090 .kr(2)
3091 .sr(1)
3092 .m(3)
3093 .n(4)
3094 .k(8)
3095 .a_stride(11)
3096 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3097 }
3098
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)3099 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
3100 TEST_REQUIRES_X86_SSE2;
3101 for (uint32_t n = 1; n <= 4; n++) {
3102 for (uint32_t m = 1; m <= 3; m++) {
3103 GemmMicrokernelTester()
3104 .mr(3)
3105 .nr(4)
3106 .kr(2)
3107 .sr(1)
3108 .m(m)
3109 .n(n)
3110 .k(8)
3111 .iterations(1)
3112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3113 }
3114 }
3115 }
3116
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)3117 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
3118 TEST_REQUIRES_X86_SSE2;
3119 for (uint32_t m = 1; m <= 3; m++) {
3120 GemmMicrokernelTester()
3121 .mr(3)
3122 .nr(4)
3123 .kr(2)
3124 .sr(1)
3125 .m(m)
3126 .n(4)
3127 .k(8)
3128 .iterations(1)
3129 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3130 }
3131 }
3132
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)3133 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
3134 TEST_REQUIRES_X86_SSE2;
3135 for (uint32_t n = 1; n <= 4; n++) {
3136 GemmMicrokernelTester()
3137 .mr(3)
3138 .nr(4)
3139 .kr(2)
3140 .sr(1)
3141 .m(3)
3142 .n(n)
3143 .k(8)
3144 .iterations(1)
3145 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3146 }
3147 }
3148
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)3149 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
3150 TEST_REQUIRES_X86_SSE2;
3151 for (size_t k = 1; k < 8; k++) {
3152 GemmMicrokernelTester()
3153 .mr(3)
3154 .nr(4)
3155 .kr(2)
3156 .sr(1)
3157 .m(3)
3158 .n(4)
3159 .k(k)
3160 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3161 }
3162 }
3163
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_strided_a)3164 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_strided_a) {
3165 TEST_REQUIRES_X86_SSE2;
3166 for (size_t k = 1; k < 8; k++) {
3167 GemmMicrokernelTester()
3168 .mr(3)
3169 .nr(4)
3170 .kr(2)
3171 .sr(1)
3172 .m(3)
3173 .n(4)
3174 .k(k)
3175 .a_stride(11)
3176 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3177 }
3178 }
3179
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)3180 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
3181 TEST_REQUIRES_X86_SSE2;
3182 for (size_t k = 1; k < 8; k++) {
3183 for (uint32_t n = 1; n <= 4; n++) {
3184 for (uint32_t m = 1; m <= 3; m++) {
3185 GemmMicrokernelTester()
3186 .mr(3)
3187 .nr(4)
3188 .kr(2)
3189 .sr(1)
3190 .m(m)
3191 .n(n)
3192 .k(k)
3193 .iterations(1)
3194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3195 }
3196 }
3197 }
3198 }
3199
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)3200 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
3201 TEST_REQUIRES_X86_SSE2;
3202 for (size_t k = 9; k < 16; k++) {
3203 GemmMicrokernelTester()
3204 .mr(3)
3205 .nr(4)
3206 .kr(2)
3207 .sr(1)
3208 .m(3)
3209 .n(4)
3210 .k(k)
3211 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3212 }
3213 }
3214
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_strided_a)3215 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_strided_a) {
3216 TEST_REQUIRES_X86_SSE2;
3217 for (size_t k = 9; k < 16; k++) {
3218 GemmMicrokernelTester()
3219 .mr(3)
3220 .nr(4)
3221 .kr(2)
3222 .sr(1)
3223 .m(3)
3224 .n(4)
3225 .k(k)
3226 .a_stride(19)
3227 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3228 }
3229 }
3230
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)3231 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
3232 TEST_REQUIRES_X86_SSE2;
3233 for (size_t k = 9; k < 16; k++) {
3234 for (uint32_t n = 1; n <= 4; n++) {
3235 for (uint32_t m = 1; m <= 3; m++) {
3236 GemmMicrokernelTester()
3237 .mr(3)
3238 .nr(4)
3239 .kr(2)
3240 .sr(1)
3241 .m(m)
3242 .n(n)
3243 .k(k)
3244 .iterations(1)
3245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3246 }
3247 }
3248 }
3249 }
3250
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)3251 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
3252 TEST_REQUIRES_X86_SSE2;
3253 for (size_t k = 16; k <= 80; k += 8) {
3254 GemmMicrokernelTester()
3255 .mr(3)
3256 .nr(4)
3257 .kr(2)
3258 .sr(1)
3259 .m(3)
3260 .n(4)
3261 .k(k)
3262 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3263 }
3264 }
3265
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_strided_a)3266 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_strided_a) {
3267 TEST_REQUIRES_X86_SSE2;
3268 for (size_t k = 16; k <= 80; k += 8) {
3269 GemmMicrokernelTester()
3270 .mr(3)
3271 .nr(4)
3272 .kr(2)
3273 .sr(1)
3274 .m(3)
3275 .n(4)
3276 .k(k)
3277 .a_stride(83)
3278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3279 }
3280 }
3281
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)3282 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
3283 TEST_REQUIRES_X86_SSE2;
3284 for (size_t k = 16; k <= 80; k += 8) {
3285 for (uint32_t n = 1; n <= 4; n++) {
3286 for (uint32_t m = 1; m <= 3; m++) {
3287 GemmMicrokernelTester()
3288 .mr(3)
3289 .nr(4)
3290 .kr(2)
3291 .sr(1)
3292 .m(m)
3293 .n(n)
3294 .k(k)
3295 .iterations(1)
3296 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3297 }
3298 }
3299 }
3300 }
3301
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)3302 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
3303 TEST_REQUIRES_X86_SSE2;
3304 for (uint32_t n = 5; n < 8; n++) {
3305 for (size_t k = 1; k <= 40; k += 9) {
3306 GemmMicrokernelTester()
3307 .mr(3)
3308 .nr(4)
3309 .kr(2)
3310 .sr(1)
3311 .m(3)
3312 .n(n)
3313 .k(k)
3314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3315 }
3316 }
3317 }
3318
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)3319 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
3320 TEST_REQUIRES_X86_SSE2;
3321 for (uint32_t n = 5; n < 8; n++) {
3322 for (size_t k = 1; k <= 40; k += 9) {
3323 GemmMicrokernelTester()
3324 .mr(3)
3325 .nr(4)
3326 .kr(2)
3327 .sr(1)
3328 .m(3)
3329 .n(n)
3330 .k(k)
3331 .cn_stride(7)
3332 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3333 }
3334 }
3335 }
3336
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_a)3337 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_a) {
3338 TEST_REQUIRES_X86_SSE2;
3339 for (uint32_t n = 5; n < 8; n++) {
3340 for (size_t k = 1; k <= 40; k += 9) {
3341 GemmMicrokernelTester()
3342 .mr(3)
3343 .nr(4)
3344 .kr(2)
3345 .sr(1)
3346 .m(3)
3347 .n(n)
3348 .k(k)
3349 .a_stride(43)
3350 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3351 }
3352 }
3353 }
3354
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)3355 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
3356 TEST_REQUIRES_X86_SSE2;
3357 for (uint32_t n = 5; n < 8; n++) {
3358 for (size_t k = 1; k <= 40; k += 9) {
3359 for (uint32_t m = 1; m <= 3; m++) {
3360 GemmMicrokernelTester()
3361 .mr(3)
3362 .nr(4)
3363 .kr(2)
3364 .sr(1)
3365 .m(m)
3366 .n(n)
3367 .k(k)
3368 .iterations(1)
3369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3370 }
3371 }
3372 }
3373 }
3374
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)3375 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
3376 TEST_REQUIRES_X86_SSE2;
3377 for (uint32_t n = 8; n <= 12; n += 4) {
3378 for (size_t k = 1; k <= 40; k += 9) {
3379 GemmMicrokernelTester()
3380 .mr(3)
3381 .nr(4)
3382 .kr(2)
3383 .sr(1)
3384 .m(3)
3385 .n(n)
3386 .k(k)
3387 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3388 }
3389 }
3390 }
3391
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)3392 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
3393 TEST_REQUIRES_X86_SSE2;
3394 for (uint32_t n = 8; n <= 12; n += 4) {
3395 for (size_t k = 1; k <= 40; k += 9) {
3396 GemmMicrokernelTester()
3397 .mr(3)
3398 .nr(4)
3399 .kr(2)
3400 .sr(1)
3401 .m(3)
3402 .n(n)
3403 .k(k)
3404 .cn_stride(7)
3405 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3406 }
3407 }
3408 }
3409
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_a)3410 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_a) {
3411 TEST_REQUIRES_X86_SSE2;
3412 for (uint32_t n = 8; n <= 12; n += 4) {
3413 for (size_t k = 1; k <= 40; k += 9) {
3414 GemmMicrokernelTester()
3415 .mr(3)
3416 .nr(4)
3417 .kr(2)
3418 .sr(1)
3419 .m(3)
3420 .n(n)
3421 .k(k)
3422 .a_stride(43)
3423 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3424 }
3425 }
3426 }
3427
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)3428 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
3429 TEST_REQUIRES_X86_SSE2;
3430 for (uint32_t n = 8; n <= 12; n += 4) {
3431 for (size_t k = 1; k <= 40; k += 9) {
3432 for (uint32_t m = 1; m <= 3; m++) {
3433 GemmMicrokernelTester()
3434 .mr(3)
3435 .nr(4)
3436 .kr(2)
3437 .sr(1)
3438 .m(m)
3439 .n(n)
3440 .k(k)
3441 .iterations(1)
3442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3443 }
3444 }
3445 }
3446 }
3447
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)3448 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
3449 TEST_REQUIRES_X86_SSE2;
3450 for (size_t k = 1; k <= 40; k += 9) {
3451 for (uint32_t n = 1; n <= 4; n++) {
3452 for (uint32_t m = 1; m <= 3; m++) {
3453 GemmMicrokernelTester()
3454 .mr(3)
3455 .nr(4)
3456 .kr(2)
3457 .sr(1)
3458 .m(m)
3459 .n(n)
3460 .k(k)
3461 .cm_stride(7)
3462 .iterations(1)
3463 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3464 }
3465 }
3466 }
3467 }
3468
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)3469 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
3470 TEST_REQUIRES_X86_SSE2;
3471 GemmMicrokernelTester()
3472 .mr(3)
3473 .nr(4)
3474 .kr(2)
3475 .sr(1)
3476 .m(3)
3477 .n(4)
3478 .k(8)
3479 .qmin(128)
3480 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3481 }
3482
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)3483 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
3484 TEST_REQUIRES_X86_SSE2;
3485 GemmMicrokernelTester()
3486 .mr(3)
3487 .nr(4)
3488 .kr(2)
3489 .sr(1)
3490 .m(3)
3491 .n(4)
3492 .k(8)
3493 .qmax(128)
3494 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3495 }
3496
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)3497 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
3498 TEST_REQUIRES_X86_SSE2;
3499 GemmMicrokernelTester()
3500 .mr(3)
3501 .nr(4)
3502 .kr(2)
3503 .sr(1)
3504 .m(3)
3505 .n(4)
3506 .k(8)
3507 .cm_stride(7)
3508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3509 }
3510
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_a_zero_point)3511 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
3512 TEST_REQUIRES_X86_SSE2;
3513 for (size_t k = 1; k <= 40; k += 9) {
3514 GemmMicrokernelTester()
3515 .mr(3)
3516 .nr(4)
3517 .kr(2)
3518 .sr(1)
3519 .m(3)
3520 .n(4)
3521 .k(k)
3522 .a_zero_point(0)
3523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3524 }
3525 }
3526
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_b_zero_point)3527 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
3528 TEST_REQUIRES_X86_SSE2;
3529 for (size_t k = 1; k <= 40; k += 9) {
3530 GemmMicrokernelTester()
3531 .mr(3)
3532 .nr(4)
3533 .kr(2)
3534 .sr(1)
3535 .m(3)
3536 .n(4)
3537 .k(k)
3538 .b_zero_point(0)
3539 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3540 }
3541 }
3542
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_zero_point)3543 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
3544 TEST_REQUIRES_X86_SSE2;
3545 for (size_t k = 1; k <= 40; k += 9) {
3546 GemmMicrokernelTester()
3547 .mr(3)
3548 .nr(4)
3549 .kr(2)
3550 .sr(1)
3551 .m(3)
3552 .n(4)
3553 .k(k)
3554 .a_zero_point(0)
3555 .b_zero_point(0)
3556 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3557 }
3558 }
3559 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3560
3561
3562 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)3563 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
3564 TEST_REQUIRES_X86_SSE41;
3565 GemmMicrokernelTester()
3566 .mr(3)
3567 .nr(4)
3568 .kr(2)
3569 .sr(1)
3570 .m(3)
3571 .n(4)
3572 .k(8)
3573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3574 }
3575
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)3576 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
3577 TEST_REQUIRES_X86_SSE41;
3578 GemmMicrokernelTester()
3579 .mr(3)
3580 .nr(4)
3581 .kr(2)
3582 .sr(1)
3583 .m(3)
3584 .n(4)
3585 .k(8)
3586 .cn_stride(7)
3587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3588 }
3589
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_strided_a)3590 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
3591 TEST_REQUIRES_X86_SSE41;
3592 GemmMicrokernelTester()
3593 .mr(3)
3594 .nr(4)
3595 .kr(2)
3596 .sr(1)
3597 .m(3)
3598 .n(4)
3599 .k(8)
3600 .a_stride(11)
3601 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3602 }
3603
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)3604 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
3605 TEST_REQUIRES_X86_SSE41;
3606 for (uint32_t n = 1; n <= 4; n++) {
3607 for (uint32_t m = 1; m <= 3; m++) {
3608 GemmMicrokernelTester()
3609 .mr(3)
3610 .nr(4)
3611 .kr(2)
3612 .sr(1)
3613 .m(m)
3614 .n(n)
3615 .k(8)
3616 .iterations(1)
3617 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3618 }
3619 }
3620 }
3621
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)3622 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
3623 TEST_REQUIRES_X86_SSE41;
3624 for (uint32_t m = 1; m <= 3; m++) {
3625 GemmMicrokernelTester()
3626 .mr(3)
3627 .nr(4)
3628 .kr(2)
3629 .sr(1)
3630 .m(m)
3631 .n(4)
3632 .k(8)
3633 .iterations(1)
3634 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3635 }
3636 }
3637
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)3638 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
3639 TEST_REQUIRES_X86_SSE41;
3640 for (uint32_t n = 1; n <= 4; n++) {
3641 GemmMicrokernelTester()
3642 .mr(3)
3643 .nr(4)
3644 .kr(2)
3645 .sr(1)
3646 .m(3)
3647 .n(n)
3648 .k(8)
3649 .iterations(1)
3650 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3651 }
3652 }
3653
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)3654 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
3655 TEST_REQUIRES_X86_SSE41;
3656 for (size_t k = 1; k < 8; k++) {
3657 GemmMicrokernelTester()
3658 .mr(3)
3659 .nr(4)
3660 .kr(2)
3661 .sr(1)
3662 .m(3)
3663 .n(4)
3664 .k(k)
3665 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3666 }
3667 }
3668
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_strided_a)3669 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_strided_a) {
3670 TEST_REQUIRES_X86_SSE41;
3671 for (size_t k = 1; k < 8; k++) {
3672 GemmMicrokernelTester()
3673 .mr(3)
3674 .nr(4)
3675 .kr(2)
3676 .sr(1)
3677 .m(3)
3678 .n(4)
3679 .k(k)
3680 .a_stride(11)
3681 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3682 }
3683 }
3684
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)3685 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
3686 TEST_REQUIRES_X86_SSE41;
3687 for (size_t k = 1; k < 8; k++) {
3688 for (uint32_t n = 1; n <= 4; n++) {
3689 for (uint32_t m = 1; m <= 3; m++) {
3690 GemmMicrokernelTester()
3691 .mr(3)
3692 .nr(4)
3693 .kr(2)
3694 .sr(1)
3695 .m(m)
3696 .n(n)
3697 .k(k)
3698 .iterations(1)
3699 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3700 }
3701 }
3702 }
3703 }
3704
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)3705 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
3706 TEST_REQUIRES_X86_SSE41;
3707 for (size_t k = 9; k < 16; k++) {
3708 GemmMicrokernelTester()
3709 .mr(3)
3710 .nr(4)
3711 .kr(2)
3712 .sr(1)
3713 .m(3)
3714 .n(4)
3715 .k(k)
3716 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3717 }
3718 }
3719
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_strided_a)3720 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_strided_a) {
3721 TEST_REQUIRES_X86_SSE41;
3722 for (size_t k = 9; k < 16; k++) {
3723 GemmMicrokernelTester()
3724 .mr(3)
3725 .nr(4)
3726 .kr(2)
3727 .sr(1)
3728 .m(3)
3729 .n(4)
3730 .k(k)
3731 .a_stride(19)
3732 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3733 }
3734 }
3735
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)3736 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
3737 TEST_REQUIRES_X86_SSE41;
3738 for (size_t k = 9; k < 16; k++) {
3739 for (uint32_t n = 1; n <= 4; n++) {
3740 for (uint32_t m = 1; m <= 3; m++) {
3741 GemmMicrokernelTester()
3742 .mr(3)
3743 .nr(4)
3744 .kr(2)
3745 .sr(1)
3746 .m(m)
3747 .n(n)
3748 .k(k)
3749 .iterations(1)
3750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3751 }
3752 }
3753 }
3754 }
3755
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)3756 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
3757 TEST_REQUIRES_X86_SSE41;
3758 for (size_t k = 16; k <= 80; k += 8) {
3759 GemmMicrokernelTester()
3760 .mr(3)
3761 .nr(4)
3762 .kr(2)
3763 .sr(1)
3764 .m(3)
3765 .n(4)
3766 .k(k)
3767 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3768 }
3769 }
3770
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_strided_a)3771 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_strided_a) {
3772 TEST_REQUIRES_X86_SSE41;
3773 for (size_t k = 16; k <= 80; k += 8) {
3774 GemmMicrokernelTester()
3775 .mr(3)
3776 .nr(4)
3777 .kr(2)
3778 .sr(1)
3779 .m(3)
3780 .n(4)
3781 .k(k)
3782 .a_stride(83)
3783 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3784 }
3785 }
3786
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)3787 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
3788 TEST_REQUIRES_X86_SSE41;
3789 for (size_t k = 16; k <= 80; k += 8) {
3790 for (uint32_t n = 1; n <= 4; n++) {
3791 for (uint32_t m = 1; m <= 3; m++) {
3792 GemmMicrokernelTester()
3793 .mr(3)
3794 .nr(4)
3795 .kr(2)
3796 .sr(1)
3797 .m(m)
3798 .n(n)
3799 .k(k)
3800 .iterations(1)
3801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3802 }
3803 }
3804 }
3805 }
3806
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)3807 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
3808 TEST_REQUIRES_X86_SSE41;
3809 for (uint32_t n = 5; n < 8; n++) {
3810 for (size_t k = 1; k <= 40; k += 9) {
3811 GemmMicrokernelTester()
3812 .mr(3)
3813 .nr(4)
3814 .kr(2)
3815 .sr(1)
3816 .m(3)
3817 .n(n)
3818 .k(k)
3819 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3820 }
3821 }
3822 }
3823
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)3824 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
3825 TEST_REQUIRES_X86_SSE41;
3826 for (uint32_t n = 5; n < 8; n++) {
3827 for (size_t k = 1; k <= 40; k += 9) {
3828 GemmMicrokernelTester()
3829 .mr(3)
3830 .nr(4)
3831 .kr(2)
3832 .sr(1)
3833 .m(3)
3834 .n(n)
3835 .k(k)
3836 .cn_stride(7)
3837 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3838 }
3839 }
3840 }
3841
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_a)3842 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_a) {
3843 TEST_REQUIRES_X86_SSE41;
3844 for (uint32_t n = 5; n < 8; n++) {
3845 for (size_t k = 1; k <= 40; k += 9) {
3846 GemmMicrokernelTester()
3847 .mr(3)
3848 .nr(4)
3849 .kr(2)
3850 .sr(1)
3851 .m(3)
3852 .n(n)
3853 .k(k)
3854 .a_stride(43)
3855 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3856 }
3857 }
3858 }
3859
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)3860 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
3861 TEST_REQUIRES_X86_SSE41;
3862 for (uint32_t n = 5; n < 8; n++) {
3863 for (size_t k = 1; k <= 40; k += 9) {
3864 for (uint32_t m = 1; m <= 3; m++) {
3865 GemmMicrokernelTester()
3866 .mr(3)
3867 .nr(4)
3868 .kr(2)
3869 .sr(1)
3870 .m(m)
3871 .n(n)
3872 .k(k)
3873 .iterations(1)
3874 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3875 }
3876 }
3877 }
3878 }
3879
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)3880 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
3881 TEST_REQUIRES_X86_SSE41;
3882 for (uint32_t n = 8; n <= 12; n += 4) {
3883 for (size_t k = 1; k <= 40; k += 9) {
3884 GemmMicrokernelTester()
3885 .mr(3)
3886 .nr(4)
3887 .kr(2)
3888 .sr(1)
3889 .m(3)
3890 .n(n)
3891 .k(k)
3892 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3893 }
3894 }
3895 }
3896
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)3897 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
3898 TEST_REQUIRES_X86_SSE41;
3899 for (uint32_t n = 8; n <= 12; n += 4) {
3900 for (size_t k = 1; k <= 40; k += 9) {
3901 GemmMicrokernelTester()
3902 .mr(3)
3903 .nr(4)
3904 .kr(2)
3905 .sr(1)
3906 .m(3)
3907 .n(n)
3908 .k(k)
3909 .cn_stride(7)
3910 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3911 }
3912 }
3913 }
3914
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_a)3915 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_a) {
3916 TEST_REQUIRES_X86_SSE41;
3917 for (uint32_t n = 8; n <= 12; n += 4) {
3918 for (size_t k = 1; k <= 40; k += 9) {
3919 GemmMicrokernelTester()
3920 .mr(3)
3921 .nr(4)
3922 .kr(2)
3923 .sr(1)
3924 .m(3)
3925 .n(n)
3926 .k(k)
3927 .a_stride(43)
3928 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3929 }
3930 }
3931 }
3932
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)3933 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
3934 TEST_REQUIRES_X86_SSE41;
3935 for (uint32_t n = 8; n <= 12; n += 4) {
3936 for (size_t k = 1; k <= 40; k += 9) {
3937 for (uint32_t m = 1; m <= 3; m++) {
3938 GemmMicrokernelTester()
3939 .mr(3)
3940 .nr(4)
3941 .kr(2)
3942 .sr(1)
3943 .m(m)
3944 .n(n)
3945 .k(k)
3946 .iterations(1)
3947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3948 }
3949 }
3950 }
3951 }
3952
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)3953 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
3954 TEST_REQUIRES_X86_SSE41;
3955 for (size_t k = 1; k <= 40; k += 9) {
3956 for (uint32_t n = 1; n <= 4; n++) {
3957 for (uint32_t m = 1; m <= 3; m++) {
3958 GemmMicrokernelTester()
3959 .mr(3)
3960 .nr(4)
3961 .kr(2)
3962 .sr(1)
3963 .m(m)
3964 .n(n)
3965 .k(k)
3966 .cm_stride(7)
3967 .iterations(1)
3968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3969 }
3970 }
3971 }
3972 }
3973
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)3974 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
3975 TEST_REQUIRES_X86_SSE41;
3976 GemmMicrokernelTester()
3977 .mr(3)
3978 .nr(4)
3979 .kr(2)
3980 .sr(1)
3981 .m(3)
3982 .n(4)
3983 .k(8)
3984 .qmin(128)
3985 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3986 }
3987
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)3988 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
3989 TEST_REQUIRES_X86_SSE41;
3990 GemmMicrokernelTester()
3991 .mr(3)
3992 .nr(4)
3993 .kr(2)
3994 .sr(1)
3995 .m(3)
3996 .n(4)
3997 .k(8)
3998 .qmax(128)
3999 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4000 }
4001
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)4002 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
4003 TEST_REQUIRES_X86_SSE41;
4004 GemmMicrokernelTester()
4005 .mr(3)
4006 .nr(4)
4007 .kr(2)
4008 .sr(1)
4009 .m(3)
4010 .n(4)
4011 .k(8)
4012 .cm_stride(7)
4013 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4014 }
4015
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_a_zero_point)4016 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_a_zero_point) {
4017 TEST_REQUIRES_X86_SSE41;
4018 for (size_t k = 1; k <= 40; k += 9) {
4019 GemmMicrokernelTester()
4020 .mr(3)
4021 .nr(4)
4022 .kr(2)
4023 .sr(1)
4024 .m(3)
4025 .n(4)
4026 .k(k)
4027 .a_zero_point(0)
4028 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4029 }
4030 }
4031
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_b_zero_point)4032 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_b_zero_point) {
4033 TEST_REQUIRES_X86_SSE41;
4034 for (size_t k = 1; k <= 40; k += 9) {
4035 GemmMicrokernelTester()
4036 .mr(3)
4037 .nr(4)
4038 .kr(2)
4039 .sr(1)
4040 .m(3)
4041 .n(4)
4042 .k(k)
4043 .b_zero_point(0)
4044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4045 }
4046 }
4047
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_zero_point)4048 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_zero_point) {
4049 TEST_REQUIRES_X86_SSE41;
4050 for (size_t k = 1; k <= 40; k += 9) {
4051 GemmMicrokernelTester()
4052 .mr(3)
4053 .nr(4)
4054 .kr(2)
4055 .sr(1)
4056 .m(3)
4057 .n(4)
4058 .k(k)
4059 .a_zero_point(0)
4060 .b_zero_point(0)
4061 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4062 }
4063 }
4064 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4065
4066
4067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)4068 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
4069 TEST_REQUIRES_X86_AVX;
4070 GemmMicrokernelTester()
4071 .mr(2)
4072 .nr(4)
4073 .kr(2)
4074 .sr(1)
4075 .m(2)
4076 .n(4)
4077 .k(8)
4078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4079 }
4080
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)4081 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
4082 TEST_REQUIRES_X86_AVX;
4083 GemmMicrokernelTester()
4084 .mr(2)
4085 .nr(4)
4086 .kr(2)
4087 .sr(1)
4088 .m(2)
4089 .n(4)
4090 .k(8)
4091 .cn_stride(7)
4092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4093 }
4094
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_strided_a)4095 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_strided_a) {
4096 TEST_REQUIRES_X86_AVX;
4097 GemmMicrokernelTester()
4098 .mr(2)
4099 .nr(4)
4100 .kr(2)
4101 .sr(1)
4102 .m(2)
4103 .n(4)
4104 .k(8)
4105 .a_stride(11)
4106 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4107 }
4108
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)4109 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
4110 TEST_REQUIRES_X86_AVX;
4111 for (uint32_t n = 1; n <= 4; n++) {
4112 for (uint32_t m = 1; m <= 2; m++) {
4113 GemmMicrokernelTester()
4114 .mr(2)
4115 .nr(4)
4116 .kr(2)
4117 .sr(1)
4118 .m(m)
4119 .n(n)
4120 .k(8)
4121 .iterations(1)
4122 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4123 }
4124 }
4125 }
4126
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)4127 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
4128 TEST_REQUIRES_X86_AVX;
4129 for (uint32_t m = 1; m <= 2; m++) {
4130 GemmMicrokernelTester()
4131 .mr(2)
4132 .nr(4)
4133 .kr(2)
4134 .sr(1)
4135 .m(m)
4136 .n(4)
4137 .k(8)
4138 .iterations(1)
4139 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4140 }
4141 }
4142
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)4143 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
4144 TEST_REQUIRES_X86_AVX;
4145 for (uint32_t n = 1; n <= 4; n++) {
4146 GemmMicrokernelTester()
4147 .mr(2)
4148 .nr(4)
4149 .kr(2)
4150 .sr(1)
4151 .m(2)
4152 .n(n)
4153 .k(8)
4154 .iterations(1)
4155 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4156 }
4157 }
4158
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)4159 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
4160 TEST_REQUIRES_X86_AVX;
4161 for (size_t k = 1; k < 8; k++) {
4162 GemmMicrokernelTester()
4163 .mr(2)
4164 .nr(4)
4165 .kr(2)
4166 .sr(1)
4167 .m(2)
4168 .n(4)
4169 .k(k)
4170 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4171 }
4172 }
4173
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_strided_a)4174 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_strided_a) {
4175 TEST_REQUIRES_X86_AVX;
4176 for (size_t k = 1; k < 8; k++) {
4177 GemmMicrokernelTester()
4178 .mr(2)
4179 .nr(4)
4180 .kr(2)
4181 .sr(1)
4182 .m(2)
4183 .n(4)
4184 .k(k)
4185 .a_stride(11)
4186 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4187 }
4188 }
4189
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)4190 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
4191 TEST_REQUIRES_X86_AVX;
4192 for (size_t k = 1; k < 8; k++) {
4193 for (uint32_t n = 1; n <= 4; n++) {
4194 for (uint32_t m = 1; m <= 2; m++) {
4195 GemmMicrokernelTester()
4196 .mr(2)
4197 .nr(4)
4198 .kr(2)
4199 .sr(1)
4200 .m(m)
4201 .n(n)
4202 .k(k)
4203 .iterations(1)
4204 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4205 }
4206 }
4207 }
4208 }
4209
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)4210 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
4211 TEST_REQUIRES_X86_AVX;
4212 for (size_t k = 9; k < 16; k++) {
4213 GemmMicrokernelTester()
4214 .mr(2)
4215 .nr(4)
4216 .kr(2)
4217 .sr(1)
4218 .m(2)
4219 .n(4)
4220 .k(k)
4221 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4222 }
4223 }
4224
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_strided_a)4225 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_strided_a) {
4226 TEST_REQUIRES_X86_AVX;
4227 for (size_t k = 9; k < 16; k++) {
4228 GemmMicrokernelTester()
4229 .mr(2)
4230 .nr(4)
4231 .kr(2)
4232 .sr(1)
4233 .m(2)
4234 .n(4)
4235 .k(k)
4236 .a_stride(19)
4237 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4238 }
4239 }
4240
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)4241 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
4242 TEST_REQUIRES_X86_AVX;
4243 for (size_t k = 9; k < 16; k++) {
4244 for (uint32_t n = 1; n <= 4; n++) {
4245 for (uint32_t m = 1; m <= 2; m++) {
4246 GemmMicrokernelTester()
4247 .mr(2)
4248 .nr(4)
4249 .kr(2)
4250 .sr(1)
4251 .m(m)
4252 .n(n)
4253 .k(k)
4254 .iterations(1)
4255 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4256 }
4257 }
4258 }
4259 }
4260
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)4261 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
4262 TEST_REQUIRES_X86_AVX;
4263 for (size_t k = 16; k <= 80; k += 8) {
4264 GemmMicrokernelTester()
4265 .mr(2)
4266 .nr(4)
4267 .kr(2)
4268 .sr(1)
4269 .m(2)
4270 .n(4)
4271 .k(k)
4272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4273 }
4274 }
4275
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_strided_a)4276 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_strided_a) {
4277 TEST_REQUIRES_X86_AVX;
4278 for (size_t k = 16; k <= 80; k += 8) {
4279 GemmMicrokernelTester()
4280 .mr(2)
4281 .nr(4)
4282 .kr(2)
4283 .sr(1)
4284 .m(2)
4285 .n(4)
4286 .k(k)
4287 .a_stride(83)
4288 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4289 }
4290 }
4291
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)4292 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
4293 TEST_REQUIRES_X86_AVX;
4294 for (size_t k = 16; k <= 80; k += 8) {
4295 for (uint32_t n = 1; n <= 4; n++) {
4296 for (uint32_t m = 1; m <= 2; m++) {
4297 GemmMicrokernelTester()
4298 .mr(2)
4299 .nr(4)
4300 .kr(2)
4301 .sr(1)
4302 .m(m)
4303 .n(n)
4304 .k(k)
4305 .iterations(1)
4306 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4307 }
4308 }
4309 }
4310 }
4311
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)4312 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
4313 TEST_REQUIRES_X86_AVX;
4314 for (uint32_t n = 5; n < 8; n++) {
4315 for (size_t k = 1; k <= 40; k += 9) {
4316 GemmMicrokernelTester()
4317 .mr(2)
4318 .nr(4)
4319 .kr(2)
4320 .sr(1)
4321 .m(2)
4322 .n(n)
4323 .k(k)
4324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4325 }
4326 }
4327 }
4328
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)4329 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
4330 TEST_REQUIRES_X86_AVX;
4331 for (uint32_t n = 5; n < 8; n++) {
4332 for (size_t k = 1; k <= 40; k += 9) {
4333 GemmMicrokernelTester()
4334 .mr(2)
4335 .nr(4)
4336 .kr(2)
4337 .sr(1)
4338 .m(2)
4339 .n(n)
4340 .k(k)
4341 .cn_stride(7)
4342 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4343 }
4344 }
4345 }
4346
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_a)4347 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_a) {
4348 TEST_REQUIRES_X86_AVX;
4349 for (uint32_t n = 5; n < 8; n++) {
4350 for (size_t k = 1; k <= 40; k += 9) {
4351 GemmMicrokernelTester()
4352 .mr(2)
4353 .nr(4)
4354 .kr(2)
4355 .sr(1)
4356 .m(2)
4357 .n(n)
4358 .k(k)
4359 .a_stride(43)
4360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4361 }
4362 }
4363 }
4364
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)4365 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
4366 TEST_REQUIRES_X86_AVX;
4367 for (uint32_t n = 5; n < 8; n++) {
4368 for (size_t k = 1; k <= 40; k += 9) {
4369 for (uint32_t m = 1; m <= 2; m++) {
4370 GemmMicrokernelTester()
4371 .mr(2)
4372 .nr(4)
4373 .kr(2)
4374 .sr(1)
4375 .m(m)
4376 .n(n)
4377 .k(k)
4378 .iterations(1)
4379 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4380 }
4381 }
4382 }
4383 }
4384
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)4385 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
4386 TEST_REQUIRES_X86_AVX;
4387 for (uint32_t n = 8; n <= 12; n += 4) {
4388 for (size_t k = 1; k <= 40; k += 9) {
4389 GemmMicrokernelTester()
4390 .mr(2)
4391 .nr(4)
4392 .kr(2)
4393 .sr(1)
4394 .m(2)
4395 .n(n)
4396 .k(k)
4397 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4398 }
4399 }
4400 }
4401
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)4402 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
4403 TEST_REQUIRES_X86_AVX;
4404 for (uint32_t n = 8; n <= 12; n += 4) {
4405 for (size_t k = 1; k <= 40; k += 9) {
4406 GemmMicrokernelTester()
4407 .mr(2)
4408 .nr(4)
4409 .kr(2)
4410 .sr(1)
4411 .m(2)
4412 .n(n)
4413 .k(k)
4414 .cn_stride(7)
4415 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4416 }
4417 }
4418 }
4419
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_a)4420 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_a) {
4421 TEST_REQUIRES_X86_AVX;
4422 for (uint32_t n = 8; n <= 12; n += 4) {
4423 for (size_t k = 1; k <= 40; k += 9) {
4424 GemmMicrokernelTester()
4425 .mr(2)
4426 .nr(4)
4427 .kr(2)
4428 .sr(1)
4429 .m(2)
4430 .n(n)
4431 .k(k)
4432 .a_stride(43)
4433 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4434 }
4435 }
4436 }
4437
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)4438 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
4439 TEST_REQUIRES_X86_AVX;
4440 for (uint32_t n = 8; n <= 12; n += 4) {
4441 for (size_t k = 1; k <= 40; k += 9) {
4442 for (uint32_t m = 1; m <= 2; m++) {
4443 GemmMicrokernelTester()
4444 .mr(2)
4445 .nr(4)
4446 .kr(2)
4447 .sr(1)
4448 .m(m)
4449 .n(n)
4450 .k(k)
4451 .iterations(1)
4452 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4453 }
4454 }
4455 }
4456 }
4457
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)4458 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
4459 TEST_REQUIRES_X86_AVX;
4460 for (size_t k = 1; k <= 40; k += 9) {
4461 for (uint32_t n = 1; n <= 4; n++) {
4462 for (uint32_t m = 1; m <= 2; m++) {
4463 GemmMicrokernelTester()
4464 .mr(2)
4465 .nr(4)
4466 .kr(2)
4467 .sr(1)
4468 .m(m)
4469 .n(n)
4470 .k(k)
4471 .cm_stride(7)
4472 .iterations(1)
4473 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4474 }
4475 }
4476 }
4477 }
4478
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)4479 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
4480 TEST_REQUIRES_X86_AVX;
4481 GemmMicrokernelTester()
4482 .mr(2)
4483 .nr(4)
4484 .kr(2)
4485 .sr(1)
4486 .m(2)
4487 .n(4)
4488 .k(8)
4489 .qmin(128)
4490 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4491 }
4492
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)4493 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
4494 TEST_REQUIRES_X86_AVX;
4495 GemmMicrokernelTester()
4496 .mr(2)
4497 .nr(4)
4498 .kr(2)
4499 .sr(1)
4500 .m(2)
4501 .n(4)
4502 .k(8)
4503 .qmax(128)
4504 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4505 }
4506
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)4507 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
4508 TEST_REQUIRES_X86_AVX;
4509 GemmMicrokernelTester()
4510 .mr(2)
4511 .nr(4)
4512 .kr(2)
4513 .sr(1)
4514 .m(2)
4515 .n(4)
4516 .k(8)
4517 .cm_stride(7)
4518 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4519 }
4520
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_a_zero_point)4521 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
4522 TEST_REQUIRES_X86_AVX;
4523 for (size_t k = 1; k <= 40; k += 9) {
4524 GemmMicrokernelTester()
4525 .mr(2)
4526 .nr(4)
4527 .kr(2)
4528 .sr(1)
4529 .m(2)
4530 .n(4)
4531 .k(k)
4532 .a_zero_point(0)
4533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4534 }
4535 }
4536
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_b_zero_point)4537 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
4538 TEST_REQUIRES_X86_AVX;
4539 for (size_t k = 1; k <= 40; k += 9) {
4540 GemmMicrokernelTester()
4541 .mr(2)
4542 .nr(4)
4543 .kr(2)
4544 .sr(1)
4545 .m(2)
4546 .n(4)
4547 .k(k)
4548 .b_zero_point(0)
4549 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4550 }
4551 }
4552
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_zero_point)4553 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
4554 TEST_REQUIRES_X86_AVX;
4555 for (size_t k = 1; k <= 40; k += 9) {
4556 GemmMicrokernelTester()
4557 .mr(2)
4558 .nr(4)
4559 .kr(2)
4560 .sr(1)
4561 .m(2)
4562 .n(4)
4563 .k(k)
4564 .a_zero_point(0)
4565 .b_zero_point(0)
4566 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4567 }
4568 }
4569 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4570
4571
4572 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)4573 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
4574 TEST_REQUIRES_X86_XOP;
4575 GemmMicrokernelTester()
4576 .mr(2)
4577 .nr(4)
4578 .kr(2)
4579 .sr(1)
4580 .m(2)
4581 .n(4)
4582 .k(8)
4583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4584 }
4585
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)4586 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
4587 TEST_REQUIRES_X86_XOP;
4588 GemmMicrokernelTester()
4589 .mr(2)
4590 .nr(4)
4591 .kr(2)
4592 .sr(1)
4593 .m(2)
4594 .n(4)
4595 .k(8)
4596 .cn_stride(7)
4597 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4598 }
4599
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_strided_a)4600 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_strided_a) {
4601 TEST_REQUIRES_X86_XOP;
4602 GemmMicrokernelTester()
4603 .mr(2)
4604 .nr(4)
4605 .kr(2)
4606 .sr(1)
4607 .m(2)
4608 .n(4)
4609 .k(8)
4610 .a_stride(11)
4611 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4612 }
4613
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)4614 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
4615 TEST_REQUIRES_X86_XOP;
4616 for (uint32_t n = 1; n <= 4; n++) {
4617 for (uint32_t m = 1; m <= 2; m++) {
4618 GemmMicrokernelTester()
4619 .mr(2)
4620 .nr(4)
4621 .kr(2)
4622 .sr(1)
4623 .m(m)
4624 .n(n)
4625 .k(8)
4626 .iterations(1)
4627 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4628 }
4629 }
4630 }
4631
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)4632 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
4633 TEST_REQUIRES_X86_XOP;
4634 for (uint32_t m = 1; m <= 2; m++) {
4635 GemmMicrokernelTester()
4636 .mr(2)
4637 .nr(4)
4638 .kr(2)
4639 .sr(1)
4640 .m(m)
4641 .n(4)
4642 .k(8)
4643 .iterations(1)
4644 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4645 }
4646 }
4647
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)4648 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
4649 TEST_REQUIRES_X86_XOP;
4650 for (uint32_t n = 1; n <= 4; n++) {
4651 GemmMicrokernelTester()
4652 .mr(2)
4653 .nr(4)
4654 .kr(2)
4655 .sr(1)
4656 .m(2)
4657 .n(n)
4658 .k(8)
4659 .iterations(1)
4660 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4661 }
4662 }
4663
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)4664 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
4665 TEST_REQUIRES_X86_XOP;
4666 for (size_t k = 1; k < 8; k++) {
4667 GemmMicrokernelTester()
4668 .mr(2)
4669 .nr(4)
4670 .kr(2)
4671 .sr(1)
4672 .m(2)
4673 .n(4)
4674 .k(k)
4675 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4676 }
4677 }
4678
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_strided_a)4679 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_strided_a) {
4680 TEST_REQUIRES_X86_XOP;
4681 for (size_t k = 1; k < 8; k++) {
4682 GemmMicrokernelTester()
4683 .mr(2)
4684 .nr(4)
4685 .kr(2)
4686 .sr(1)
4687 .m(2)
4688 .n(4)
4689 .k(k)
4690 .a_stride(11)
4691 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4692 }
4693 }
4694
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)4695 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
4696 TEST_REQUIRES_X86_XOP;
4697 for (size_t k = 1; k < 8; k++) {
4698 for (uint32_t n = 1; n <= 4; n++) {
4699 for (uint32_t m = 1; m <= 2; m++) {
4700 GemmMicrokernelTester()
4701 .mr(2)
4702 .nr(4)
4703 .kr(2)
4704 .sr(1)
4705 .m(m)
4706 .n(n)
4707 .k(k)
4708 .iterations(1)
4709 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4710 }
4711 }
4712 }
4713 }
4714
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)4715 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
4716 TEST_REQUIRES_X86_XOP;
4717 for (size_t k = 9; k < 16; k++) {
4718 GemmMicrokernelTester()
4719 .mr(2)
4720 .nr(4)
4721 .kr(2)
4722 .sr(1)
4723 .m(2)
4724 .n(4)
4725 .k(k)
4726 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4727 }
4728 }
4729
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_strided_a)4730 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_strided_a) {
4731 TEST_REQUIRES_X86_XOP;
4732 for (size_t k = 9; k < 16; k++) {
4733 GemmMicrokernelTester()
4734 .mr(2)
4735 .nr(4)
4736 .kr(2)
4737 .sr(1)
4738 .m(2)
4739 .n(4)
4740 .k(k)
4741 .a_stride(19)
4742 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4743 }
4744 }
4745
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)4746 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
4747 TEST_REQUIRES_X86_XOP;
4748 for (size_t k = 9; k < 16; k++) {
4749 for (uint32_t n = 1; n <= 4; n++) {
4750 for (uint32_t m = 1; m <= 2; m++) {
4751 GemmMicrokernelTester()
4752 .mr(2)
4753 .nr(4)
4754 .kr(2)
4755 .sr(1)
4756 .m(m)
4757 .n(n)
4758 .k(k)
4759 .iterations(1)
4760 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4761 }
4762 }
4763 }
4764 }
4765
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)4766 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
4767 TEST_REQUIRES_X86_XOP;
4768 for (size_t k = 16; k <= 80; k += 8) {
4769 GemmMicrokernelTester()
4770 .mr(2)
4771 .nr(4)
4772 .kr(2)
4773 .sr(1)
4774 .m(2)
4775 .n(4)
4776 .k(k)
4777 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4778 }
4779 }
4780
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_strided_a)4781 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_strided_a) {
4782 TEST_REQUIRES_X86_XOP;
4783 for (size_t k = 16; k <= 80; k += 8) {
4784 GemmMicrokernelTester()
4785 .mr(2)
4786 .nr(4)
4787 .kr(2)
4788 .sr(1)
4789 .m(2)
4790 .n(4)
4791 .k(k)
4792 .a_stride(83)
4793 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4794 }
4795 }
4796
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)4797 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
4798 TEST_REQUIRES_X86_XOP;
4799 for (size_t k = 16; k <= 80; k += 8) {
4800 for (uint32_t n = 1; n <= 4; n++) {
4801 for (uint32_t m = 1; m <= 2; m++) {
4802 GemmMicrokernelTester()
4803 .mr(2)
4804 .nr(4)
4805 .kr(2)
4806 .sr(1)
4807 .m(m)
4808 .n(n)
4809 .k(k)
4810 .iterations(1)
4811 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4812 }
4813 }
4814 }
4815 }
4816
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)4817 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
4818 TEST_REQUIRES_X86_XOP;
4819 for (uint32_t n = 5; n < 8; n++) {
4820 for (size_t k = 1; k <= 40; k += 9) {
4821 GemmMicrokernelTester()
4822 .mr(2)
4823 .nr(4)
4824 .kr(2)
4825 .sr(1)
4826 .m(2)
4827 .n(n)
4828 .k(k)
4829 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4830 }
4831 }
4832 }
4833
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)4834 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
4835 TEST_REQUIRES_X86_XOP;
4836 for (uint32_t n = 5; n < 8; n++) {
4837 for (size_t k = 1; k <= 40; k += 9) {
4838 GemmMicrokernelTester()
4839 .mr(2)
4840 .nr(4)
4841 .kr(2)
4842 .sr(1)
4843 .m(2)
4844 .n(n)
4845 .k(k)
4846 .cn_stride(7)
4847 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4848 }
4849 }
4850 }
4851
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_a)4852 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_a) {
4853 TEST_REQUIRES_X86_XOP;
4854 for (uint32_t n = 5; n < 8; n++) {
4855 for (size_t k = 1; k <= 40; k += 9) {
4856 GemmMicrokernelTester()
4857 .mr(2)
4858 .nr(4)
4859 .kr(2)
4860 .sr(1)
4861 .m(2)
4862 .n(n)
4863 .k(k)
4864 .a_stride(43)
4865 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4866 }
4867 }
4868 }
4869
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)4870 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
4871 TEST_REQUIRES_X86_XOP;
4872 for (uint32_t n = 5; n < 8; n++) {
4873 for (size_t k = 1; k <= 40; k += 9) {
4874 for (uint32_t m = 1; m <= 2; m++) {
4875 GemmMicrokernelTester()
4876 .mr(2)
4877 .nr(4)
4878 .kr(2)
4879 .sr(1)
4880 .m(m)
4881 .n(n)
4882 .k(k)
4883 .iterations(1)
4884 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4885 }
4886 }
4887 }
4888 }
4889
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)4890 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
4891 TEST_REQUIRES_X86_XOP;
4892 for (uint32_t n = 8; n <= 12; n += 4) {
4893 for (size_t k = 1; k <= 40; k += 9) {
4894 GemmMicrokernelTester()
4895 .mr(2)
4896 .nr(4)
4897 .kr(2)
4898 .sr(1)
4899 .m(2)
4900 .n(n)
4901 .k(k)
4902 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4903 }
4904 }
4905 }
4906
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)4907 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
4908 TEST_REQUIRES_X86_XOP;
4909 for (uint32_t n = 8; n <= 12; n += 4) {
4910 for (size_t k = 1; k <= 40; k += 9) {
4911 GemmMicrokernelTester()
4912 .mr(2)
4913 .nr(4)
4914 .kr(2)
4915 .sr(1)
4916 .m(2)
4917 .n(n)
4918 .k(k)
4919 .cn_stride(7)
4920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4921 }
4922 }
4923 }
4924
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_a)4925 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_a) {
4926 TEST_REQUIRES_X86_XOP;
4927 for (uint32_t n = 8; n <= 12; n += 4) {
4928 for (size_t k = 1; k <= 40; k += 9) {
4929 GemmMicrokernelTester()
4930 .mr(2)
4931 .nr(4)
4932 .kr(2)
4933 .sr(1)
4934 .m(2)
4935 .n(n)
4936 .k(k)
4937 .a_stride(43)
4938 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4939 }
4940 }
4941 }
4942
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)4943 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
4944 TEST_REQUIRES_X86_XOP;
4945 for (uint32_t n = 8; n <= 12; n += 4) {
4946 for (size_t k = 1; k <= 40; k += 9) {
4947 for (uint32_t m = 1; m <= 2; m++) {
4948 GemmMicrokernelTester()
4949 .mr(2)
4950 .nr(4)
4951 .kr(2)
4952 .sr(1)
4953 .m(m)
4954 .n(n)
4955 .k(k)
4956 .iterations(1)
4957 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4958 }
4959 }
4960 }
4961 }
4962
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)4963 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
4964 TEST_REQUIRES_X86_XOP;
4965 for (size_t k = 1; k <= 40; k += 9) {
4966 for (uint32_t n = 1; n <= 4; n++) {
4967 for (uint32_t m = 1; m <= 2; m++) {
4968 GemmMicrokernelTester()
4969 .mr(2)
4970 .nr(4)
4971 .kr(2)
4972 .sr(1)
4973 .m(m)
4974 .n(n)
4975 .k(k)
4976 .cm_stride(7)
4977 .iterations(1)
4978 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4979 }
4980 }
4981 }
4982 }
4983
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)4984 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
4985 TEST_REQUIRES_X86_XOP;
4986 GemmMicrokernelTester()
4987 .mr(2)
4988 .nr(4)
4989 .kr(2)
4990 .sr(1)
4991 .m(2)
4992 .n(4)
4993 .k(8)
4994 .qmin(128)
4995 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4996 }
4997
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)4998 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
4999 TEST_REQUIRES_X86_XOP;
5000 GemmMicrokernelTester()
5001 .mr(2)
5002 .nr(4)
5003 .kr(2)
5004 .sr(1)
5005 .m(2)
5006 .n(4)
5007 .k(8)
5008 .qmax(128)
5009 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5010 }
5011
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)5012 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
5013 TEST_REQUIRES_X86_XOP;
5014 GemmMicrokernelTester()
5015 .mr(2)
5016 .nr(4)
5017 .kr(2)
5018 .sr(1)
5019 .m(2)
5020 .n(4)
5021 .k(8)
5022 .cm_stride(7)
5023 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5024 }
5025
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_a_zero_point)5026 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
5027 TEST_REQUIRES_X86_XOP;
5028 for (size_t k = 1; k <= 40; k += 9) {
5029 GemmMicrokernelTester()
5030 .mr(2)
5031 .nr(4)
5032 .kr(2)
5033 .sr(1)
5034 .m(2)
5035 .n(4)
5036 .k(k)
5037 .a_zero_point(0)
5038 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5039 }
5040 }
5041
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_b_zero_point)5042 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
5043 TEST_REQUIRES_X86_XOP;
5044 for (size_t k = 1; k <= 40; k += 9) {
5045 GemmMicrokernelTester()
5046 .mr(2)
5047 .nr(4)
5048 .kr(2)
5049 .sr(1)
5050 .m(2)
5051 .n(4)
5052 .k(k)
5053 .b_zero_point(0)
5054 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5055 }
5056 }
5057
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_zero_point)5058 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
5059 TEST_REQUIRES_X86_XOP;
5060 for (size_t k = 1; k <= 40; k += 9) {
5061 GemmMicrokernelTester()
5062 .mr(2)
5063 .nr(4)
5064 .kr(2)
5065 .sr(1)
5066 .m(2)
5067 .n(4)
5068 .k(k)
5069 .a_zero_point(0)
5070 .b_zero_point(0)
5071 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5072 }
5073 }
5074 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5075
5076
5077 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)5078 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
5079 TEST_REQUIRES_X86_AVX;
5080 GemmMicrokernelTester()
5081 .mr(3)
5082 .nr(4)
5083 .kr(2)
5084 .sr(1)
5085 .m(3)
5086 .n(4)
5087 .k(8)
5088 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5089 }
5090
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)5091 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
5092 TEST_REQUIRES_X86_AVX;
5093 GemmMicrokernelTester()
5094 .mr(3)
5095 .nr(4)
5096 .kr(2)
5097 .sr(1)
5098 .m(3)
5099 .n(4)
5100 .k(8)
5101 .cn_stride(7)
5102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5103 }
5104
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_strided_a)5105 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_strided_a) {
5106 TEST_REQUIRES_X86_AVX;
5107 GemmMicrokernelTester()
5108 .mr(3)
5109 .nr(4)
5110 .kr(2)
5111 .sr(1)
5112 .m(3)
5113 .n(4)
5114 .k(8)
5115 .a_stride(11)
5116 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5117 }
5118
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)5119 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
5120 TEST_REQUIRES_X86_AVX;
5121 for (uint32_t n = 1; n <= 4; n++) {
5122 for (uint32_t m = 1; m <= 3; m++) {
5123 GemmMicrokernelTester()
5124 .mr(3)
5125 .nr(4)
5126 .kr(2)
5127 .sr(1)
5128 .m(m)
5129 .n(n)
5130 .k(8)
5131 .iterations(1)
5132 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5133 }
5134 }
5135 }
5136
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)5137 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
5138 TEST_REQUIRES_X86_AVX;
5139 for (uint32_t m = 1; m <= 3; m++) {
5140 GemmMicrokernelTester()
5141 .mr(3)
5142 .nr(4)
5143 .kr(2)
5144 .sr(1)
5145 .m(m)
5146 .n(4)
5147 .k(8)
5148 .iterations(1)
5149 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5150 }
5151 }
5152
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)5153 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
5154 TEST_REQUIRES_X86_AVX;
5155 for (uint32_t n = 1; n <= 4; n++) {
5156 GemmMicrokernelTester()
5157 .mr(3)
5158 .nr(4)
5159 .kr(2)
5160 .sr(1)
5161 .m(3)
5162 .n(n)
5163 .k(8)
5164 .iterations(1)
5165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5166 }
5167 }
5168
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)5169 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
5170 TEST_REQUIRES_X86_AVX;
5171 for (size_t k = 1; k < 8; k++) {
5172 GemmMicrokernelTester()
5173 .mr(3)
5174 .nr(4)
5175 .kr(2)
5176 .sr(1)
5177 .m(3)
5178 .n(4)
5179 .k(k)
5180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5181 }
5182 }
5183
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_strided_a)5184 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_strided_a) {
5185 TEST_REQUIRES_X86_AVX;
5186 for (size_t k = 1; k < 8; k++) {
5187 GemmMicrokernelTester()
5188 .mr(3)
5189 .nr(4)
5190 .kr(2)
5191 .sr(1)
5192 .m(3)
5193 .n(4)
5194 .k(k)
5195 .a_stride(11)
5196 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5197 }
5198 }
5199
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)5200 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
5201 TEST_REQUIRES_X86_AVX;
5202 for (size_t k = 1; k < 8; k++) {
5203 for (uint32_t n = 1; n <= 4; n++) {
5204 for (uint32_t m = 1; m <= 3; m++) {
5205 GemmMicrokernelTester()
5206 .mr(3)
5207 .nr(4)
5208 .kr(2)
5209 .sr(1)
5210 .m(m)
5211 .n(n)
5212 .k(k)
5213 .iterations(1)
5214 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5215 }
5216 }
5217 }
5218 }
5219
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)5220 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
5221 TEST_REQUIRES_X86_AVX;
5222 for (size_t k = 9; k < 16; k++) {
5223 GemmMicrokernelTester()
5224 .mr(3)
5225 .nr(4)
5226 .kr(2)
5227 .sr(1)
5228 .m(3)
5229 .n(4)
5230 .k(k)
5231 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5232 }
5233 }
5234
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_strided_a)5235 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_strided_a) {
5236 TEST_REQUIRES_X86_AVX;
5237 for (size_t k = 9; k < 16; k++) {
5238 GemmMicrokernelTester()
5239 .mr(3)
5240 .nr(4)
5241 .kr(2)
5242 .sr(1)
5243 .m(3)
5244 .n(4)
5245 .k(k)
5246 .a_stride(19)
5247 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5248 }
5249 }
5250
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)5251 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
5252 TEST_REQUIRES_X86_AVX;
5253 for (size_t k = 9; k < 16; k++) {
5254 for (uint32_t n = 1; n <= 4; n++) {
5255 for (uint32_t m = 1; m <= 3; m++) {
5256 GemmMicrokernelTester()
5257 .mr(3)
5258 .nr(4)
5259 .kr(2)
5260 .sr(1)
5261 .m(m)
5262 .n(n)
5263 .k(k)
5264 .iterations(1)
5265 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5266 }
5267 }
5268 }
5269 }
5270
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)5271 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
5272 TEST_REQUIRES_X86_AVX;
5273 for (size_t k = 16; k <= 80; k += 8) {
5274 GemmMicrokernelTester()
5275 .mr(3)
5276 .nr(4)
5277 .kr(2)
5278 .sr(1)
5279 .m(3)
5280 .n(4)
5281 .k(k)
5282 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5283 }
5284 }
5285
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_strided_a)5286 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_strided_a) {
5287 TEST_REQUIRES_X86_AVX;
5288 for (size_t k = 16; k <= 80; k += 8) {
5289 GemmMicrokernelTester()
5290 .mr(3)
5291 .nr(4)
5292 .kr(2)
5293 .sr(1)
5294 .m(3)
5295 .n(4)
5296 .k(k)
5297 .a_stride(83)
5298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5299 }
5300 }
5301
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)5302 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
5303 TEST_REQUIRES_X86_AVX;
5304 for (size_t k = 16; k <= 80; k += 8) {
5305 for (uint32_t n = 1; n <= 4; n++) {
5306 for (uint32_t m = 1; m <= 3; m++) {
5307 GemmMicrokernelTester()
5308 .mr(3)
5309 .nr(4)
5310 .kr(2)
5311 .sr(1)
5312 .m(m)
5313 .n(n)
5314 .k(k)
5315 .iterations(1)
5316 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5317 }
5318 }
5319 }
5320 }
5321
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)5322 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
5323 TEST_REQUIRES_X86_AVX;
5324 for (uint32_t n = 5; n < 8; n++) {
5325 for (size_t k = 1; k <= 40; k += 9) {
5326 GemmMicrokernelTester()
5327 .mr(3)
5328 .nr(4)
5329 .kr(2)
5330 .sr(1)
5331 .m(3)
5332 .n(n)
5333 .k(k)
5334 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5335 }
5336 }
5337 }
5338
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)5339 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
5340 TEST_REQUIRES_X86_AVX;
5341 for (uint32_t n = 5; n < 8; n++) {
5342 for (size_t k = 1; k <= 40; k += 9) {
5343 GemmMicrokernelTester()
5344 .mr(3)
5345 .nr(4)
5346 .kr(2)
5347 .sr(1)
5348 .m(3)
5349 .n(n)
5350 .k(k)
5351 .cn_stride(7)
5352 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5353 }
5354 }
5355 }
5356
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_a)5357 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_a) {
5358 TEST_REQUIRES_X86_AVX;
5359 for (uint32_t n = 5; n < 8; n++) {
5360 for (size_t k = 1; k <= 40; k += 9) {
5361 GemmMicrokernelTester()
5362 .mr(3)
5363 .nr(4)
5364 .kr(2)
5365 .sr(1)
5366 .m(3)
5367 .n(n)
5368 .k(k)
5369 .a_stride(43)
5370 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5371 }
5372 }
5373 }
5374
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)5375 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
5376 TEST_REQUIRES_X86_AVX;
5377 for (uint32_t n = 5; n < 8; n++) {
5378 for (size_t k = 1; k <= 40; k += 9) {
5379 for (uint32_t m = 1; m <= 3; m++) {
5380 GemmMicrokernelTester()
5381 .mr(3)
5382 .nr(4)
5383 .kr(2)
5384 .sr(1)
5385 .m(m)
5386 .n(n)
5387 .k(k)
5388 .iterations(1)
5389 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5390 }
5391 }
5392 }
5393 }
5394
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)5395 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
5396 TEST_REQUIRES_X86_AVX;
5397 for (uint32_t n = 8; n <= 12; n += 4) {
5398 for (size_t k = 1; k <= 40; k += 9) {
5399 GemmMicrokernelTester()
5400 .mr(3)
5401 .nr(4)
5402 .kr(2)
5403 .sr(1)
5404 .m(3)
5405 .n(n)
5406 .k(k)
5407 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5408 }
5409 }
5410 }
5411
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)5412 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
5413 TEST_REQUIRES_X86_AVX;
5414 for (uint32_t n = 8; n <= 12; n += 4) {
5415 for (size_t k = 1; k <= 40; k += 9) {
5416 GemmMicrokernelTester()
5417 .mr(3)
5418 .nr(4)
5419 .kr(2)
5420 .sr(1)
5421 .m(3)
5422 .n(n)
5423 .k(k)
5424 .cn_stride(7)
5425 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5426 }
5427 }
5428 }
5429
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_a)5430 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_a) {
5431 TEST_REQUIRES_X86_AVX;
5432 for (uint32_t n = 8; n <= 12; n += 4) {
5433 for (size_t k = 1; k <= 40; k += 9) {
5434 GemmMicrokernelTester()
5435 .mr(3)
5436 .nr(4)
5437 .kr(2)
5438 .sr(1)
5439 .m(3)
5440 .n(n)
5441 .k(k)
5442 .a_stride(43)
5443 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5444 }
5445 }
5446 }
5447
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)5448 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
5449 TEST_REQUIRES_X86_AVX;
5450 for (uint32_t n = 8; n <= 12; n += 4) {
5451 for (size_t k = 1; k <= 40; k += 9) {
5452 for (uint32_t m = 1; m <= 3; m++) {
5453 GemmMicrokernelTester()
5454 .mr(3)
5455 .nr(4)
5456 .kr(2)
5457 .sr(1)
5458 .m(m)
5459 .n(n)
5460 .k(k)
5461 .iterations(1)
5462 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5463 }
5464 }
5465 }
5466 }
5467
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)5468 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
5469 TEST_REQUIRES_X86_AVX;
5470 for (size_t k = 1; k <= 40; k += 9) {
5471 for (uint32_t n = 1; n <= 4; n++) {
5472 for (uint32_t m = 1; m <= 3; m++) {
5473 GemmMicrokernelTester()
5474 .mr(3)
5475 .nr(4)
5476 .kr(2)
5477 .sr(1)
5478 .m(m)
5479 .n(n)
5480 .k(k)
5481 .cm_stride(7)
5482 .iterations(1)
5483 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5484 }
5485 }
5486 }
5487 }
5488
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)5489 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
5490 TEST_REQUIRES_X86_AVX;
5491 GemmMicrokernelTester()
5492 .mr(3)
5493 .nr(4)
5494 .kr(2)
5495 .sr(1)
5496 .m(3)
5497 .n(4)
5498 .k(8)
5499 .qmin(128)
5500 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5501 }
5502
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)5503 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
5504 TEST_REQUIRES_X86_AVX;
5505 GemmMicrokernelTester()
5506 .mr(3)
5507 .nr(4)
5508 .kr(2)
5509 .sr(1)
5510 .m(3)
5511 .n(4)
5512 .k(8)
5513 .qmax(128)
5514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5515 }
5516
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)5517 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
5518 TEST_REQUIRES_X86_AVX;
5519 GemmMicrokernelTester()
5520 .mr(3)
5521 .nr(4)
5522 .kr(2)
5523 .sr(1)
5524 .m(3)
5525 .n(4)
5526 .k(8)
5527 .cm_stride(7)
5528 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5529 }
5530
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_a_zero_point)5531 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
5532 TEST_REQUIRES_X86_AVX;
5533 for (size_t k = 1; k <= 40; k += 9) {
5534 GemmMicrokernelTester()
5535 .mr(3)
5536 .nr(4)
5537 .kr(2)
5538 .sr(1)
5539 .m(3)
5540 .n(4)
5541 .k(k)
5542 .a_zero_point(0)
5543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5544 }
5545 }
5546
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_b_zero_point)5547 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
5548 TEST_REQUIRES_X86_AVX;
5549 for (size_t k = 1; k <= 40; k += 9) {
5550 GemmMicrokernelTester()
5551 .mr(3)
5552 .nr(4)
5553 .kr(2)
5554 .sr(1)
5555 .m(3)
5556 .n(4)
5557 .k(k)
5558 .b_zero_point(0)
5559 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5560 }
5561 }
5562
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_zero_point)5563 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
5564 TEST_REQUIRES_X86_AVX;
5565 for (size_t k = 1; k <= 40; k += 9) {
5566 GemmMicrokernelTester()
5567 .mr(3)
5568 .nr(4)
5569 .kr(2)
5570 .sr(1)
5571 .m(3)
5572 .n(4)
5573 .k(k)
5574 .a_zero_point(0)
5575 .b_zero_point(0)
5576 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5577 }
5578 }
5579 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5580
5581
5582 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)5583 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
5584 TEST_REQUIRES_X86_XOP;
5585 GemmMicrokernelTester()
5586 .mr(3)
5587 .nr(4)
5588 .kr(2)
5589 .sr(1)
5590 .m(3)
5591 .n(4)
5592 .k(8)
5593 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5594 }
5595
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)5596 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
5597 TEST_REQUIRES_X86_XOP;
5598 GemmMicrokernelTester()
5599 .mr(3)
5600 .nr(4)
5601 .kr(2)
5602 .sr(1)
5603 .m(3)
5604 .n(4)
5605 .k(8)
5606 .cn_stride(7)
5607 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5608 }
5609
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_strided_a)5610 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_strided_a) {
5611 TEST_REQUIRES_X86_XOP;
5612 GemmMicrokernelTester()
5613 .mr(3)
5614 .nr(4)
5615 .kr(2)
5616 .sr(1)
5617 .m(3)
5618 .n(4)
5619 .k(8)
5620 .a_stride(11)
5621 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5622 }
5623
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)5624 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
5625 TEST_REQUIRES_X86_XOP;
5626 for (uint32_t n = 1; n <= 4; n++) {
5627 for (uint32_t m = 1; m <= 3; m++) {
5628 GemmMicrokernelTester()
5629 .mr(3)
5630 .nr(4)
5631 .kr(2)
5632 .sr(1)
5633 .m(m)
5634 .n(n)
5635 .k(8)
5636 .iterations(1)
5637 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5638 }
5639 }
5640 }
5641
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)5642 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
5643 TEST_REQUIRES_X86_XOP;
5644 for (uint32_t m = 1; m <= 3; m++) {
5645 GemmMicrokernelTester()
5646 .mr(3)
5647 .nr(4)
5648 .kr(2)
5649 .sr(1)
5650 .m(m)
5651 .n(4)
5652 .k(8)
5653 .iterations(1)
5654 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5655 }
5656 }
5657
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)5658 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
5659 TEST_REQUIRES_X86_XOP;
5660 for (uint32_t n = 1; n <= 4; n++) {
5661 GemmMicrokernelTester()
5662 .mr(3)
5663 .nr(4)
5664 .kr(2)
5665 .sr(1)
5666 .m(3)
5667 .n(n)
5668 .k(8)
5669 .iterations(1)
5670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5671 }
5672 }
5673
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)5674 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
5675 TEST_REQUIRES_X86_XOP;
5676 for (size_t k = 1; k < 8; k++) {
5677 GemmMicrokernelTester()
5678 .mr(3)
5679 .nr(4)
5680 .kr(2)
5681 .sr(1)
5682 .m(3)
5683 .n(4)
5684 .k(k)
5685 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5686 }
5687 }
5688
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_strided_a)5689 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_strided_a) {
5690 TEST_REQUIRES_X86_XOP;
5691 for (size_t k = 1; k < 8; k++) {
5692 GemmMicrokernelTester()
5693 .mr(3)
5694 .nr(4)
5695 .kr(2)
5696 .sr(1)
5697 .m(3)
5698 .n(4)
5699 .k(k)
5700 .a_stride(11)
5701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5702 }
5703 }
5704
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)5705 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
5706 TEST_REQUIRES_X86_XOP;
5707 for (size_t k = 1; k < 8; k++) {
5708 for (uint32_t n = 1; n <= 4; n++) {
5709 for (uint32_t m = 1; m <= 3; m++) {
5710 GemmMicrokernelTester()
5711 .mr(3)
5712 .nr(4)
5713 .kr(2)
5714 .sr(1)
5715 .m(m)
5716 .n(n)
5717 .k(k)
5718 .iterations(1)
5719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5720 }
5721 }
5722 }
5723 }
5724
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)5725 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
5726 TEST_REQUIRES_X86_XOP;
5727 for (size_t k = 9; k < 16; k++) {
5728 GemmMicrokernelTester()
5729 .mr(3)
5730 .nr(4)
5731 .kr(2)
5732 .sr(1)
5733 .m(3)
5734 .n(4)
5735 .k(k)
5736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5737 }
5738 }
5739
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_strided_a)5740 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_strided_a) {
5741 TEST_REQUIRES_X86_XOP;
5742 for (size_t k = 9; k < 16; k++) {
5743 GemmMicrokernelTester()
5744 .mr(3)
5745 .nr(4)
5746 .kr(2)
5747 .sr(1)
5748 .m(3)
5749 .n(4)
5750 .k(k)
5751 .a_stride(19)
5752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5753 }
5754 }
5755
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)5756 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
5757 TEST_REQUIRES_X86_XOP;
5758 for (size_t k = 9; k < 16; k++) {
5759 for (uint32_t n = 1; n <= 4; n++) {
5760 for (uint32_t m = 1; m <= 3; m++) {
5761 GemmMicrokernelTester()
5762 .mr(3)
5763 .nr(4)
5764 .kr(2)
5765 .sr(1)
5766 .m(m)
5767 .n(n)
5768 .k(k)
5769 .iterations(1)
5770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5771 }
5772 }
5773 }
5774 }
5775
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)5776 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
5777 TEST_REQUIRES_X86_XOP;
5778 for (size_t k = 16; k <= 80; k += 8) {
5779 GemmMicrokernelTester()
5780 .mr(3)
5781 .nr(4)
5782 .kr(2)
5783 .sr(1)
5784 .m(3)
5785 .n(4)
5786 .k(k)
5787 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5788 }
5789 }
5790
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_strided_a)5791 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_strided_a) {
5792 TEST_REQUIRES_X86_XOP;
5793 for (size_t k = 16; k <= 80; k += 8) {
5794 GemmMicrokernelTester()
5795 .mr(3)
5796 .nr(4)
5797 .kr(2)
5798 .sr(1)
5799 .m(3)
5800 .n(4)
5801 .k(k)
5802 .a_stride(83)
5803 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5804 }
5805 }
5806
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)5807 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
5808 TEST_REQUIRES_X86_XOP;
5809 for (size_t k = 16; k <= 80; k += 8) {
5810 for (uint32_t n = 1; n <= 4; n++) {
5811 for (uint32_t m = 1; m <= 3; m++) {
5812 GemmMicrokernelTester()
5813 .mr(3)
5814 .nr(4)
5815 .kr(2)
5816 .sr(1)
5817 .m(m)
5818 .n(n)
5819 .k(k)
5820 .iterations(1)
5821 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5822 }
5823 }
5824 }
5825 }
5826
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)5827 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
5828 TEST_REQUIRES_X86_XOP;
5829 for (uint32_t n = 5; n < 8; n++) {
5830 for (size_t k = 1; k <= 40; k += 9) {
5831 GemmMicrokernelTester()
5832 .mr(3)
5833 .nr(4)
5834 .kr(2)
5835 .sr(1)
5836 .m(3)
5837 .n(n)
5838 .k(k)
5839 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5840 }
5841 }
5842 }
5843
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)5844 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
5845 TEST_REQUIRES_X86_XOP;
5846 for (uint32_t n = 5; n < 8; n++) {
5847 for (size_t k = 1; k <= 40; k += 9) {
5848 GemmMicrokernelTester()
5849 .mr(3)
5850 .nr(4)
5851 .kr(2)
5852 .sr(1)
5853 .m(3)
5854 .n(n)
5855 .k(k)
5856 .cn_stride(7)
5857 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5858 }
5859 }
5860 }
5861
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_a)5862 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_a) {
5863 TEST_REQUIRES_X86_XOP;
5864 for (uint32_t n = 5; n < 8; n++) {
5865 for (size_t k = 1; k <= 40; k += 9) {
5866 GemmMicrokernelTester()
5867 .mr(3)
5868 .nr(4)
5869 .kr(2)
5870 .sr(1)
5871 .m(3)
5872 .n(n)
5873 .k(k)
5874 .a_stride(43)
5875 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5876 }
5877 }
5878 }
5879
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)5880 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
5881 TEST_REQUIRES_X86_XOP;
5882 for (uint32_t n = 5; n < 8; n++) {
5883 for (size_t k = 1; k <= 40; k += 9) {
5884 for (uint32_t m = 1; m <= 3; m++) {
5885 GemmMicrokernelTester()
5886 .mr(3)
5887 .nr(4)
5888 .kr(2)
5889 .sr(1)
5890 .m(m)
5891 .n(n)
5892 .k(k)
5893 .iterations(1)
5894 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5895 }
5896 }
5897 }
5898 }
5899
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)5900 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
5901 TEST_REQUIRES_X86_XOP;
5902 for (uint32_t n = 8; n <= 12; n += 4) {
5903 for (size_t k = 1; k <= 40; k += 9) {
5904 GemmMicrokernelTester()
5905 .mr(3)
5906 .nr(4)
5907 .kr(2)
5908 .sr(1)
5909 .m(3)
5910 .n(n)
5911 .k(k)
5912 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5913 }
5914 }
5915 }
5916
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)5917 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
5918 TEST_REQUIRES_X86_XOP;
5919 for (uint32_t n = 8; n <= 12; n += 4) {
5920 for (size_t k = 1; k <= 40; k += 9) {
5921 GemmMicrokernelTester()
5922 .mr(3)
5923 .nr(4)
5924 .kr(2)
5925 .sr(1)
5926 .m(3)
5927 .n(n)
5928 .k(k)
5929 .cn_stride(7)
5930 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5931 }
5932 }
5933 }
5934
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_a)5935 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_a) {
5936 TEST_REQUIRES_X86_XOP;
5937 for (uint32_t n = 8; n <= 12; n += 4) {
5938 for (size_t k = 1; k <= 40; k += 9) {
5939 GemmMicrokernelTester()
5940 .mr(3)
5941 .nr(4)
5942 .kr(2)
5943 .sr(1)
5944 .m(3)
5945 .n(n)
5946 .k(k)
5947 .a_stride(43)
5948 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5949 }
5950 }
5951 }
5952
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)5953 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
5954 TEST_REQUIRES_X86_XOP;
5955 for (uint32_t n = 8; n <= 12; n += 4) {
5956 for (size_t k = 1; k <= 40; k += 9) {
5957 for (uint32_t m = 1; m <= 3; m++) {
5958 GemmMicrokernelTester()
5959 .mr(3)
5960 .nr(4)
5961 .kr(2)
5962 .sr(1)
5963 .m(m)
5964 .n(n)
5965 .k(k)
5966 .iterations(1)
5967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5968 }
5969 }
5970 }
5971 }
5972
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)5973 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
5974 TEST_REQUIRES_X86_XOP;
5975 for (size_t k = 1; k <= 40; k += 9) {
5976 for (uint32_t n = 1; n <= 4; n++) {
5977 for (uint32_t m = 1; m <= 3; m++) {
5978 GemmMicrokernelTester()
5979 .mr(3)
5980 .nr(4)
5981 .kr(2)
5982 .sr(1)
5983 .m(m)
5984 .n(n)
5985 .k(k)
5986 .cm_stride(7)
5987 .iterations(1)
5988 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5989 }
5990 }
5991 }
5992 }
5993
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)5994 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
5995 TEST_REQUIRES_X86_XOP;
5996 GemmMicrokernelTester()
5997 .mr(3)
5998 .nr(4)
5999 .kr(2)
6000 .sr(1)
6001 .m(3)
6002 .n(4)
6003 .k(8)
6004 .qmin(128)
6005 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6006 }
6007
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)6008 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
6009 TEST_REQUIRES_X86_XOP;
6010 GemmMicrokernelTester()
6011 .mr(3)
6012 .nr(4)
6013 .kr(2)
6014 .sr(1)
6015 .m(3)
6016 .n(4)
6017 .k(8)
6018 .qmax(128)
6019 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6020 }
6021
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)6022 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
6023 TEST_REQUIRES_X86_XOP;
6024 GemmMicrokernelTester()
6025 .mr(3)
6026 .nr(4)
6027 .kr(2)
6028 .sr(1)
6029 .m(3)
6030 .n(4)
6031 .k(8)
6032 .cm_stride(7)
6033 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6034 }
6035
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_a_zero_point)6036 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
6037 TEST_REQUIRES_X86_XOP;
6038 for (size_t k = 1; k <= 40; k += 9) {
6039 GemmMicrokernelTester()
6040 .mr(3)
6041 .nr(4)
6042 .kr(2)
6043 .sr(1)
6044 .m(3)
6045 .n(4)
6046 .k(k)
6047 .a_zero_point(0)
6048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6049 }
6050 }
6051
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_b_zero_point)6052 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
6053 TEST_REQUIRES_X86_XOP;
6054 for (size_t k = 1; k <= 40; k += 9) {
6055 GemmMicrokernelTester()
6056 .mr(3)
6057 .nr(4)
6058 .kr(2)
6059 .sr(1)
6060 .m(3)
6061 .n(4)
6062 .k(k)
6063 .b_zero_point(0)
6064 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6065 }
6066 }
6067
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_zero_point)6068 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
6069 TEST_REQUIRES_X86_XOP;
6070 for (size_t k = 1; k <= 40; k += 9) {
6071 GemmMicrokernelTester()
6072 .mr(3)
6073 .nr(4)
6074 .kr(2)
6075 .sr(1)
6076 .m(3)
6077 .n(4)
6078 .k(k)
6079 .a_zero_point(0)
6080 .b_zero_point(0)
6081 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6082 }
6083 }
6084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6085
6086
6087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)6088 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
6089 TEST_REQUIRES_X86_AVX;
6090 GemmMicrokernelTester()
6091 .mr(4)
6092 .nr(4)
6093 .kr(2)
6094 .sr(1)
6095 .m(4)
6096 .n(4)
6097 .k(8)
6098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6099 }
6100
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)6101 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
6102 TEST_REQUIRES_X86_AVX;
6103 GemmMicrokernelTester()
6104 .mr(4)
6105 .nr(4)
6106 .kr(2)
6107 .sr(1)
6108 .m(4)
6109 .n(4)
6110 .k(8)
6111 .cn_stride(7)
6112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6113 }
6114
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_strided_a)6115 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_strided_a) {
6116 TEST_REQUIRES_X86_AVX;
6117 GemmMicrokernelTester()
6118 .mr(4)
6119 .nr(4)
6120 .kr(2)
6121 .sr(1)
6122 .m(4)
6123 .n(4)
6124 .k(8)
6125 .a_stride(11)
6126 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6127 }
6128
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)6129 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
6130 TEST_REQUIRES_X86_AVX;
6131 for (uint32_t n = 1; n <= 4; n++) {
6132 for (uint32_t m = 1; m <= 4; m++) {
6133 GemmMicrokernelTester()
6134 .mr(4)
6135 .nr(4)
6136 .kr(2)
6137 .sr(1)
6138 .m(m)
6139 .n(n)
6140 .k(8)
6141 .iterations(1)
6142 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6143 }
6144 }
6145 }
6146
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)6147 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
6148 TEST_REQUIRES_X86_AVX;
6149 for (uint32_t m = 1; m <= 4; m++) {
6150 GemmMicrokernelTester()
6151 .mr(4)
6152 .nr(4)
6153 .kr(2)
6154 .sr(1)
6155 .m(m)
6156 .n(4)
6157 .k(8)
6158 .iterations(1)
6159 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6160 }
6161 }
6162
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)6163 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
6164 TEST_REQUIRES_X86_AVX;
6165 for (uint32_t n = 1; n <= 4; n++) {
6166 GemmMicrokernelTester()
6167 .mr(4)
6168 .nr(4)
6169 .kr(2)
6170 .sr(1)
6171 .m(4)
6172 .n(n)
6173 .k(8)
6174 .iterations(1)
6175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6176 }
6177 }
6178
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)6179 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
6180 TEST_REQUIRES_X86_AVX;
6181 for (size_t k = 1; k < 8; k++) {
6182 GemmMicrokernelTester()
6183 .mr(4)
6184 .nr(4)
6185 .kr(2)
6186 .sr(1)
6187 .m(4)
6188 .n(4)
6189 .k(k)
6190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6191 }
6192 }
6193
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_strided_a)6194 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_strided_a) {
6195 TEST_REQUIRES_X86_AVX;
6196 for (size_t k = 1; k < 8; k++) {
6197 GemmMicrokernelTester()
6198 .mr(4)
6199 .nr(4)
6200 .kr(2)
6201 .sr(1)
6202 .m(4)
6203 .n(4)
6204 .k(k)
6205 .a_stride(11)
6206 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6207 }
6208 }
6209
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)6210 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
6211 TEST_REQUIRES_X86_AVX;
6212 for (size_t k = 1; k < 8; k++) {
6213 for (uint32_t n = 1; n <= 4; n++) {
6214 for (uint32_t m = 1; m <= 4; m++) {
6215 GemmMicrokernelTester()
6216 .mr(4)
6217 .nr(4)
6218 .kr(2)
6219 .sr(1)
6220 .m(m)
6221 .n(n)
6222 .k(k)
6223 .iterations(1)
6224 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6225 }
6226 }
6227 }
6228 }
6229
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)6230 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
6231 TEST_REQUIRES_X86_AVX;
6232 for (size_t k = 9; k < 16; k++) {
6233 GemmMicrokernelTester()
6234 .mr(4)
6235 .nr(4)
6236 .kr(2)
6237 .sr(1)
6238 .m(4)
6239 .n(4)
6240 .k(k)
6241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6242 }
6243 }
6244
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_strided_a)6245 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_strided_a) {
6246 TEST_REQUIRES_X86_AVX;
6247 for (size_t k = 9; k < 16; k++) {
6248 GemmMicrokernelTester()
6249 .mr(4)
6250 .nr(4)
6251 .kr(2)
6252 .sr(1)
6253 .m(4)
6254 .n(4)
6255 .k(k)
6256 .a_stride(19)
6257 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6258 }
6259 }
6260
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)6261 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
6262 TEST_REQUIRES_X86_AVX;
6263 for (size_t k = 9; k < 16; k++) {
6264 for (uint32_t n = 1; n <= 4; n++) {
6265 for (uint32_t m = 1; m <= 4; m++) {
6266 GemmMicrokernelTester()
6267 .mr(4)
6268 .nr(4)
6269 .kr(2)
6270 .sr(1)
6271 .m(m)
6272 .n(n)
6273 .k(k)
6274 .iterations(1)
6275 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6276 }
6277 }
6278 }
6279 }
6280
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)6281 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
6282 TEST_REQUIRES_X86_AVX;
6283 for (size_t k = 16; k <= 80; k += 8) {
6284 GemmMicrokernelTester()
6285 .mr(4)
6286 .nr(4)
6287 .kr(2)
6288 .sr(1)
6289 .m(4)
6290 .n(4)
6291 .k(k)
6292 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6293 }
6294 }
6295
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_strided_a)6296 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_strided_a) {
6297 TEST_REQUIRES_X86_AVX;
6298 for (size_t k = 16; k <= 80; k += 8) {
6299 GemmMicrokernelTester()
6300 .mr(4)
6301 .nr(4)
6302 .kr(2)
6303 .sr(1)
6304 .m(4)
6305 .n(4)
6306 .k(k)
6307 .a_stride(83)
6308 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6309 }
6310 }
6311
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)6312 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
6313 TEST_REQUIRES_X86_AVX;
6314 for (size_t k = 16; k <= 80; k += 8) {
6315 for (uint32_t n = 1; n <= 4; n++) {
6316 for (uint32_t m = 1; m <= 4; m++) {
6317 GemmMicrokernelTester()
6318 .mr(4)
6319 .nr(4)
6320 .kr(2)
6321 .sr(1)
6322 .m(m)
6323 .n(n)
6324 .k(k)
6325 .iterations(1)
6326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6327 }
6328 }
6329 }
6330 }
6331
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)6332 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
6333 TEST_REQUIRES_X86_AVX;
6334 for (uint32_t n = 5; n < 8; n++) {
6335 for (size_t k = 1; k <= 40; k += 9) {
6336 GemmMicrokernelTester()
6337 .mr(4)
6338 .nr(4)
6339 .kr(2)
6340 .sr(1)
6341 .m(4)
6342 .n(n)
6343 .k(k)
6344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6345 }
6346 }
6347 }
6348
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)6349 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
6350 TEST_REQUIRES_X86_AVX;
6351 for (uint32_t n = 5; n < 8; n++) {
6352 for (size_t k = 1; k <= 40; k += 9) {
6353 GemmMicrokernelTester()
6354 .mr(4)
6355 .nr(4)
6356 .kr(2)
6357 .sr(1)
6358 .m(4)
6359 .n(n)
6360 .k(k)
6361 .cn_stride(7)
6362 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6363 }
6364 }
6365 }
6366
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_a)6367 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_a) {
6368 TEST_REQUIRES_X86_AVX;
6369 for (uint32_t n = 5; n < 8; n++) {
6370 for (size_t k = 1; k <= 40; k += 9) {
6371 GemmMicrokernelTester()
6372 .mr(4)
6373 .nr(4)
6374 .kr(2)
6375 .sr(1)
6376 .m(4)
6377 .n(n)
6378 .k(k)
6379 .a_stride(43)
6380 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6381 }
6382 }
6383 }
6384
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)6385 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
6386 TEST_REQUIRES_X86_AVX;
6387 for (uint32_t n = 5; n < 8; n++) {
6388 for (size_t k = 1; k <= 40; k += 9) {
6389 for (uint32_t m = 1; m <= 4; m++) {
6390 GemmMicrokernelTester()
6391 .mr(4)
6392 .nr(4)
6393 .kr(2)
6394 .sr(1)
6395 .m(m)
6396 .n(n)
6397 .k(k)
6398 .iterations(1)
6399 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6400 }
6401 }
6402 }
6403 }
6404
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)6405 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
6406 TEST_REQUIRES_X86_AVX;
6407 for (uint32_t n = 8; n <= 12; n += 4) {
6408 for (size_t k = 1; k <= 40; k += 9) {
6409 GemmMicrokernelTester()
6410 .mr(4)
6411 .nr(4)
6412 .kr(2)
6413 .sr(1)
6414 .m(4)
6415 .n(n)
6416 .k(k)
6417 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6418 }
6419 }
6420 }
6421
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)6422 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
6423 TEST_REQUIRES_X86_AVX;
6424 for (uint32_t n = 8; n <= 12; n += 4) {
6425 for (size_t k = 1; k <= 40; k += 9) {
6426 GemmMicrokernelTester()
6427 .mr(4)
6428 .nr(4)
6429 .kr(2)
6430 .sr(1)
6431 .m(4)
6432 .n(n)
6433 .k(k)
6434 .cn_stride(7)
6435 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6436 }
6437 }
6438 }
6439
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_a)6440 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_a) {
6441 TEST_REQUIRES_X86_AVX;
6442 for (uint32_t n = 8; n <= 12; n += 4) {
6443 for (size_t k = 1; k <= 40; k += 9) {
6444 GemmMicrokernelTester()
6445 .mr(4)
6446 .nr(4)
6447 .kr(2)
6448 .sr(1)
6449 .m(4)
6450 .n(n)
6451 .k(k)
6452 .a_stride(43)
6453 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6454 }
6455 }
6456 }
6457
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)6458 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
6459 TEST_REQUIRES_X86_AVX;
6460 for (uint32_t n = 8; n <= 12; n += 4) {
6461 for (size_t k = 1; k <= 40; k += 9) {
6462 for (uint32_t m = 1; m <= 4; m++) {
6463 GemmMicrokernelTester()
6464 .mr(4)
6465 .nr(4)
6466 .kr(2)
6467 .sr(1)
6468 .m(m)
6469 .n(n)
6470 .k(k)
6471 .iterations(1)
6472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6473 }
6474 }
6475 }
6476 }
6477
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)6478 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
6479 TEST_REQUIRES_X86_AVX;
6480 for (size_t k = 1; k <= 40; k += 9) {
6481 for (uint32_t n = 1; n <= 4; n++) {
6482 for (uint32_t m = 1; m <= 4; m++) {
6483 GemmMicrokernelTester()
6484 .mr(4)
6485 .nr(4)
6486 .kr(2)
6487 .sr(1)
6488 .m(m)
6489 .n(n)
6490 .k(k)
6491 .cm_stride(7)
6492 .iterations(1)
6493 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6494 }
6495 }
6496 }
6497 }
6498
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)6499 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
6500 TEST_REQUIRES_X86_AVX;
6501 GemmMicrokernelTester()
6502 .mr(4)
6503 .nr(4)
6504 .kr(2)
6505 .sr(1)
6506 .m(4)
6507 .n(4)
6508 .k(8)
6509 .qmin(128)
6510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6511 }
6512
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)6513 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
6514 TEST_REQUIRES_X86_AVX;
6515 GemmMicrokernelTester()
6516 .mr(4)
6517 .nr(4)
6518 .kr(2)
6519 .sr(1)
6520 .m(4)
6521 .n(4)
6522 .k(8)
6523 .qmax(128)
6524 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6525 }
6526
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)6527 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
6528 TEST_REQUIRES_X86_AVX;
6529 GemmMicrokernelTester()
6530 .mr(4)
6531 .nr(4)
6532 .kr(2)
6533 .sr(1)
6534 .m(4)
6535 .n(4)
6536 .k(8)
6537 .cm_stride(7)
6538 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6539 }
6540
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_a_zero_point)6541 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
6542 TEST_REQUIRES_X86_AVX;
6543 for (size_t k = 1; k <= 40; k += 9) {
6544 GemmMicrokernelTester()
6545 .mr(4)
6546 .nr(4)
6547 .kr(2)
6548 .sr(1)
6549 .m(4)
6550 .n(4)
6551 .k(k)
6552 .a_zero_point(0)
6553 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6554 }
6555 }
6556
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_b_zero_point)6557 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
6558 TEST_REQUIRES_X86_AVX;
6559 for (size_t k = 1; k <= 40; k += 9) {
6560 GemmMicrokernelTester()
6561 .mr(4)
6562 .nr(4)
6563 .kr(2)
6564 .sr(1)
6565 .m(4)
6566 .n(4)
6567 .k(k)
6568 .b_zero_point(0)
6569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6570 }
6571 }
6572
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_zero_point)6573 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
6574 TEST_REQUIRES_X86_AVX;
6575 for (size_t k = 1; k <= 40; k += 9) {
6576 GemmMicrokernelTester()
6577 .mr(4)
6578 .nr(4)
6579 .kr(2)
6580 .sr(1)
6581 .m(4)
6582 .n(4)
6583 .k(k)
6584 .a_zero_point(0)
6585 .b_zero_point(0)
6586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6587 }
6588 }
6589 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6590
6591
6592 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)6593 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
6594 TEST_REQUIRES_X86_XOP;
6595 GemmMicrokernelTester()
6596 .mr(4)
6597 .nr(4)
6598 .kr(2)
6599 .sr(1)
6600 .m(4)
6601 .n(4)
6602 .k(8)
6603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6604 }
6605
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)6606 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
6607 TEST_REQUIRES_X86_XOP;
6608 GemmMicrokernelTester()
6609 .mr(4)
6610 .nr(4)
6611 .kr(2)
6612 .sr(1)
6613 .m(4)
6614 .n(4)
6615 .k(8)
6616 .cn_stride(7)
6617 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6618 }
6619
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_strided_a)6620 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_strided_a) {
6621 TEST_REQUIRES_X86_XOP;
6622 GemmMicrokernelTester()
6623 .mr(4)
6624 .nr(4)
6625 .kr(2)
6626 .sr(1)
6627 .m(4)
6628 .n(4)
6629 .k(8)
6630 .a_stride(11)
6631 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6632 }
6633
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)6634 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
6635 TEST_REQUIRES_X86_XOP;
6636 for (uint32_t n = 1; n <= 4; n++) {
6637 for (uint32_t m = 1; m <= 4; m++) {
6638 GemmMicrokernelTester()
6639 .mr(4)
6640 .nr(4)
6641 .kr(2)
6642 .sr(1)
6643 .m(m)
6644 .n(n)
6645 .k(8)
6646 .iterations(1)
6647 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6648 }
6649 }
6650 }
6651
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)6652 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
6653 TEST_REQUIRES_X86_XOP;
6654 for (uint32_t m = 1; m <= 4; m++) {
6655 GemmMicrokernelTester()
6656 .mr(4)
6657 .nr(4)
6658 .kr(2)
6659 .sr(1)
6660 .m(m)
6661 .n(4)
6662 .k(8)
6663 .iterations(1)
6664 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6665 }
6666 }
6667
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)6668 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
6669 TEST_REQUIRES_X86_XOP;
6670 for (uint32_t n = 1; n <= 4; n++) {
6671 GemmMicrokernelTester()
6672 .mr(4)
6673 .nr(4)
6674 .kr(2)
6675 .sr(1)
6676 .m(4)
6677 .n(n)
6678 .k(8)
6679 .iterations(1)
6680 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6681 }
6682 }
6683
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)6684 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
6685 TEST_REQUIRES_X86_XOP;
6686 for (size_t k = 1; k < 8; k++) {
6687 GemmMicrokernelTester()
6688 .mr(4)
6689 .nr(4)
6690 .kr(2)
6691 .sr(1)
6692 .m(4)
6693 .n(4)
6694 .k(k)
6695 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6696 }
6697 }
6698
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_strided_a)6699 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_strided_a) {
6700 TEST_REQUIRES_X86_XOP;
6701 for (size_t k = 1; k < 8; k++) {
6702 GemmMicrokernelTester()
6703 .mr(4)
6704 .nr(4)
6705 .kr(2)
6706 .sr(1)
6707 .m(4)
6708 .n(4)
6709 .k(k)
6710 .a_stride(11)
6711 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6712 }
6713 }
6714
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)6715 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
6716 TEST_REQUIRES_X86_XOP;
6717 for (size_t k = 1; k < 8; k++) {
6718 for (uint32_t n = 1; n <= 4; n++) {
6719 for (uint32_t m = 1; m <= 4; m++) {
6720 GemmMicrokernelTester()
6721 .mr(4)
6722 .nr(4)
6723 .kr(2)
6724 .sr(1)
6725 .m(m)
6726 .n(n)
6727 .k(k)
6728 .iterations(1)
6729 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6730 }
6731 }
6732 }
6733 }
6734
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)6735 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
6736 TEST_REQUIRES_X86_XOP;
6737 for (size_t k = 9; k < 16; k++) {
6738 GemmMicrokernelTester()
6739 .mr(4)
6740 .nr(4)
6741 .kr(2)
6742 .sr(1)
6743 .m(4)
6744 .n(4)
6745 .k(k)
6746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6747 }
6748 }
6749
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_strided_a)6750 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_strided_a) {
6751 TEST_REQUIRES_X86_XOP;
6752 for (size_t k = 9; k < 16; k++) {
6753 GemmMicrokernelTester()
6754 .mr(4)
6755 .nr(4)
6756 .kr(2)
6757 .sr(1)
6758 .m(4)
6759 .n(4)
6760 .k(k)
6761 .a_stride(19)
6762 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6763 }
6764 }
6765
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)6766 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
6767 TEST_REQUIRES_X86_XOP;
6768 for (size_t k = 9; k < 16; k++) {
6769 for (uint32_t n = 1; n <= 4; n++) {
6770 for (uint32_t m = 1; m <= 4; m++) {
6771 GemmMicrokernelTester()
6772 .mr(4)
6773 .nr(4)
6774 .kr(2)
6775 .sr(1)
6776 .m(m)
6777 .n(n)
6778 .k(k)
6779 .iterations(1)
6780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6781 }
6782 }
6783 }
6784 }
6785
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)6786 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
6787 TEST_REQUIRES_X86_XOP;
6788 for (size_t k = 16; k <= 80; k += 8) {
6789 GemmMicrokernelTester()
6790 .mr(4)
6791 .nr(4)
6792 .kr(2)
6793 .sr(1)
6794 .m(4)
6795 .n(4)
6796 .k(k)
6797 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6798 }
6799 }
6800
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_strided_a)6801 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_strided_a) {
6802 TEST_REQUIRES_X86_XOP;
6803 for (size_t k = 16; k <= 80; k += 8) {
6804 GemmMicrokernelTester()
6805 .mr(4)
6806 .nr(4)
6807 .kr(2)
6808 .sr(1)
6809 .m(4)
6810 .n(4)
6811 .k(k)
6812 .a_stride(83)
6813 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6814 }
6815 }
6816
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)6817 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
6818 TEST_REQUIRES_X86_XOP;
6819 for (size_t k = 16; k <= 80; k += 8) {
6820 for (uint32_t n = 1; n <= 4; n++) {
6821 for (uint32_t m = 1; m <= 4; m++) {
6822 GemmMicrokernelTester()
6823 .mr(4)
6824 .nr(4)
6825 .kr(2)
6826 .sr(1)
6827 .m(m)
6828 .n(n)
6829 .k(k)
6830 .iterations(1)
6831 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6832 }
6833 }
6834 }
6835 }
6836
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)6837 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
6838 TEST_REQUIRES_X86_XOP;
6839 for (uint32_t n = 5; n < 8; n++) {
6840 for (size_t k = 1; k <= 40; k += 9) {
6841 GemmMicrokernelTester()
6842 .mr(4)
6843 .nr(4)
6844 .kr(2)
6845 .sr(1)
6846 .m(4)
6847 .n(n)
6848 .k(k)
6849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6850 }
6851 }
6852 }
6853
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)6854 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
6855 TEST_REQUIRES_X86_XOP;
6856 for (uint32_t n = 5; n < 8; n++) {
6857 for (size_t k = 1; k <= 40; k += 9) {
6858 GemmMicrokernelTester()
6859 .mr(4)
6860 .nr(4)
6861 .kr(2)
6862 .sr(1)
6863 .m(4)
6864 .n(n)
6865 .k(k)
6866 .cn_stride(7)
6867 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6868 }
6869 }
6870 }
6871
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_a)6872 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_a) {
6873 TEST_REQUIRES_X86_XOP;
6874 for (uint32_t n = 5; n < 8; n++) {
6875 for (size_t k = 1; k <= 40; k += 9) {
6876 GemmMicrokernelTester()
6877 .mr(4)
6878 .nr(4)
6879 .kr(2)
6880 .sr(1)
6881 .m(4)
6882 .n(n)
6883 .k(k)
6884 .a_stride(43)
6885 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6886 }
6887 }
6888 }
6889
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)6890 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
6891 TEST_REQUIRES_X86_XOP;
6892 for (uint32_t n = 5; n < 8; n++) {
6893 for (size_t k = 1; k <= 40; k += 9) {
6894 for (uint32_t m = 1; m <= 4; m++) {
6895 GemmMicrokernelTester()
6896 .mr(4)
6897 .nr(4)
6898 .kr(2)
6899 .sr(1)
6900 .m(m)
6901 .n(n)
6902 .k(k)
6903 .iterations(1)
6904 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6905 }
6906 }
6907 }
6908 }
6909
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)6910 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
6911 TEST_REQUIRES_X86_XOP;
6912 for (uint32_t n = 8; n <= 12; n += 4) {
6913 for (size_t k = 1; k <= 40; k += 9) {
6914 GemmMicrokernelTester()
6915 .mr(4)
6916 .nr(4)
6917 .kr(2)
6918 .sr(1)
6919 .m(4)
6920 .n(n)
6921 .k(k)
6922 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)6927 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
6928 TEST_REQUIRES_X86_XOP;
6929 for (uint32_t n = 8; n <= 12; n += 4) {
6930 for (size_t k = 1; k <= 40; k += 9) {
6931 GemmMicrokernelTester()
6932 .mr(4)
6933 .nr(4)
6934 .kr(2)
6935 .sr(1)
6936 .m(4)
6937 .n(n)
6938 .k(k)
6939 .cn_stride(7)
6940 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6941 }
6942 }
6943 }
6944
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_a)6945 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_a) {
6946 TEST_REQUIRES_X86_XOP;
6947 for (uint32_t n = 8; n <= 12; n += 4) {
6948 for (size_t k = 1; k <= 40; k += 9) {
6949 GemmMicrokernelTester()
6950 .mr(4)
6951 .nr(4)
6952 .kr(2)
6953 .sr(1)
6954 .m(4)
6955 .n(n)
6956 .k(k)
6957 .a_stride(43)
6958 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6959 }
6960 }
6961 }
6962
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)6963 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
6964 TEST_REQUIRES_X86_XOP;
6965 for (uint32_t n = 8; n <= 12; n += 4) {
6966 for (size_t k = 1; k <= 40; k += 9) {
6967 for (uint32_t m = 1; m <= 4; m++) {
6968 GemmMicrokernelTester()
6969 .mr(4)
6970 .nr(4)
6971 .kr(2)
6972 .sr(1)
6973 .m(m)
6974 .n(n)
6975 .k(k)
6976 .iterations(1)
6977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6978 }
6979 }
6980 }
6981 }
6982
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)6983 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
6984 TEST_REQUIRES_X86_XOP;
6985 for (size_t k = 1; k <= 40; k += 9) {
6986 for (uint32_t n = 1; n <= 4; n++) {
6987 for (uint32_t m = 1; m <= 4; m++) {
6988 GemmMicrokernelTester()
6989 .mr(4)
6990 .nr(4)
6991 .kr(2)
6992 .sr(1)
6993 .m(m)
6994 .n(n)
6995 .k(k)
6996 .cm_stride(7)
6997 .iterations(1)
6998 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999 }
7000 }
7001 }
7002 }
7003
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)7004 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
7005 TEST_REQUIRES_X86_XOP;
7006 GemmMicrokernelTester()
7007 .mr(4)
7008 .nr(4)
7009 .kr(2)
7010 .sr(1)
7011 .m(4)
7012 .n(4)
7013 .k(8)
7014 .qmin(128)
7015 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7016 }
7017
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)7018 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
7019 TEST_REQUIRES_X86_XOP;
7020 GemmMicrokernelTester()
7021 .mr(4)
7022 .nr(4)
7023 .kr(2)
7024 .sr(1)
7025 .m(4)
7026 .n(4)
7027 .k(8)
7028 .qmax(128)
7029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7030 }
7031
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)7032 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
7033 TEST_REQUIRES_X86_XOP;
7034 GemmMicrokernelTester()
7035 .mr(4)
7036 .nr(4)
7037 .kr(2)
7038 .sr(1)
7039 .m(4)
7040 .n(4)
7041 .k(8)
7042 .cm_stride(7)
7043 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7044 }
7045
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_a_zero_point)7046 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
7047 TEST_REQUIRES_X86_XOP;
7048 for (size_t k = 1; k <= 40; k += 9) {
7049 GemmMicrokernelTester()
7050 .mr(4)
7051 .nr(4)
7052 .kr(2)
7053 .sr(1)
7054 .m(4)
7055 .n(4)
7056 .k(k)
7057 .a_zero_point(0)
7058 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7059 }
7060 }
7061
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_b_zero_point)7062 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
7063 TEST_REQUIRES_X86_XOP;
7064 for (size_t k = 1; k <= 40; k += 9) {
7065 GemmMicrokernelTester()
7066 .mr(4)
7067 .nr(4)
7068 .kr(2)
7069 .sr(1)
7070 .m(4)
7071 .n(4)
7072 .k(k)
7073 .b_zero_point(0)
7074 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7075 }
7076 }
7077
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_zero_point)7078 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
7079 TEST_REQUIRES_X86_XOP;
7080 for (size_t k = 1; k <= 40; k += 9) {
7081 GemmMicrokernelTester()
7082 .mr(4)
7083 .nr(4)
7084 .kr(2)
7085 .sr(1)
7086 .m(4)
7087 .n(4)
7088 .k(k)
7089 .a_zero_point(0)
7090 .b_zero_point(0)
7091 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7092 }
7093 }
7094 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7095
7096
7097 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)7098 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
7099 TEST_REQUIRES_X86_SSE2;
7100 GemmMicrokernelTester()
7101 .mr(1)
7102 .nr(4)
7103 .kr(2)
7104 .sr(1)
7105 .m(1)
7106 .n(4)
7107 .k(8)
7108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7109 }
7110
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)7111 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
7112 TEST_REQUIRES_X86_SSE2;
7113 GemmMicrokernelTester()
7114 .mr(1)
7115 .nr(4)
7116 .kr(2)
7117 .sr(1)
7118 .m(1)
7119 .n(4)
7120 .k(8)
7121 .cn_stride(7)
7122 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7123 }
7124
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_strided_a)7125 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
7126 TEST_REQUIRES_X86_SSE2;
7127 GemmMicrokernelTester()
7128 .mr(1)
7129 .nr(4)
7130 .kr(2)
7131 .sr(1)
7132 .m(1)
7133 .n(4)
7134 .k(8)
7135 .a_stride(11)
7136 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7137 }
7138
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)7139 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
7140 TEST_REQUIRES_X86_SSE2;
7141 for (uint32_t n = 1; n <= 4; n++) {
7142 for (uint32_t m = 1; m <= 1; m++) {
7143 GemmMicrokernelTester()
7144 .mr(1)
7145 .nr(4)
7146 .kr(2)
7147 .sr(1)
7148 .m(m)
7149 .n(n)
7150 .k(8)
7151 .iterations(1)
7152 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7153 }
7154 }
7155 }
7156
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)7157 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7158 TEST_REQUIRES_X86_SSE2;
7159 for (uint32_t m = 1; m <= 1; m++) {
7160 GemmMicrokernelTester()
7161 .mr(1)
7162 .nr(4)
7163 .kr(2)
7164 .sr(1)
7165 .m(m)
7166 .n(4)
7167 .k(8)
7168 .iterations(1)
7169 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7170 }
7171 }
7172
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)7173 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7174 TEST_REQUIRES_X86_SSE2;
7175 for (uint32_t n = 1; n <= 4; n++) {
7176 GemmMicrokernelTester()
7177 .mr(1)
7178 .nr(4)
7179 .kr(2)
7180 .sr(1)
7181 .m(1)
7182 .n(n)
7183 .k(8)
7184 .iterations(1)
7185 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7186 }
7187 }
7188
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)7189 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
7190 TEST_REQUIRES_X86_SSE2;
7191 for (size_t k = 1; k < 8; k++) {
7192 GemmMicrokernelTester()
7193 .mr(1)
7194 .nr(4)
7195 .kr(2)
7196 .sr(1)
7197 .m(1)
7198 .n(4)
7199 .k(k)
7200 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7201 }
7202 }
7203
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_strided_a)7204 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_strided_a) {
7205 TEST_REQUIRES_X86_SSE2;
7206 for (size_t k = 1; k < 8; k++) {
7207 GemmMicrokernelTester()
7208 .mr(1)
7209 .nr(4)
7210 .kr(2)
7211 .sr(1)
7212 .m(1)
7213 .n(4)
7214 .k(k)
7215 .a_stride(11)
7216 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7217 }
7218 }
7219
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)7220 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
7221 TEST_REQUIRES_X86_SSE2;
7222 for (size_t k = 1; k < 8; k++) {
7223 for (uint32_t n = 1; n <= 4; n++) {
7224 for (uint32_t m = 1; m <= 1; m++) {
7225 GemmMicrokernelTester()
7226 .mr(1)
7227 .nr(4)
7228 .kr(2)
7229 .sr(1)
7230 .m(m)
7231 .n(n)
7232 .k(k)
7233 .iterations(1)
7234 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7235 }
7236 }
7237 }
7238 }
7239
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)7240 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
7241 TEST_REQUIRES_X86_SSE2;
7242 for (size_t k = 9; k < 16; k++) {
7243 GemmMicrokernelTester()
7244 .mr(1)
7245 .nr(4)
7246 .kr(2)
7247 .sr(1)
7248 .m(1)
7249 .n(4)
7250 .k(k)
7251 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7252 }
7253 }
7254
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_strided_a)7255 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_strided_a) {
7256 TEST_REQUIRES_X86_SSE2;
7257 for (size_t k = 9; k < 16; k++) {
7258 GemmMicrokernelTester()
7259 .mr(1)
7260 .nr(4)
7261 .kr(2)
7262 .sr(1)
7263 .m(1)
7264 .n(4)
7265 .k(k)
7266 .a_stride(19)
7267 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7268 }
7269 }
7270
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)7271 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
7272 TEST_REQUIRES_X86_SSE2;
7273 for (size_t k = 9; k < 16; k++) {
7274 for (uint32_t n = 1; n <= 4; n++) {
7275 for (uint32_t m = 1; m <= 1; m++) {
7276 GemmMicrokernelTester()
7277 .mr(1)
7278 .nr(4)
7279 .kr(2)
7280 .sr(1)
7281 .m(m)
7282 .n(n)
7283 .k(k)
7284 .iterations(1)
7285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7286 }
7287 }
7288 }
7289 }
7290
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)7291 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
7292 TEST_REQUIRES_X86_SSE2;
7293 for (size_t k = 16; k <= 80; k += 8) {
7294 GemmMicrokernelTester()
7295 .mr(1)
7296 .nr(4)
7297 .kr(2)
7298 .sr(1)
7299 .m(1)
7300 .n(4)
7301 .k(k)
7302 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7303 }
7304 }
7305
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_strided_a)7306 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_strided_a) {
7307 TEST_REQUIRES_X86_SSE2;
7308 for (size_t k = 16; k <= 80; k += 8) {
7309 GemmMicrokernelTester()
7310 .mr(1)
7311 .nr(4)
7312 .kr(2)
7313 .sr(1)
7314 .m(1)
7315 .n(4)
7316 .k(k)
7317 .a_stride(83)
7318 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7319 }
7320 }
7321
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)7322 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
7323 TEST_REQUIRES_X86_SSE2;
7324 for (size_t k = 16; k <= 80; k += 8) {
7325 for (uint32_t n = 1; n <= 4; n++) {
7326 for (uint32_t m = 1; m <= 1; m++) {
7327 GemmMicrokernelTester()
7328 .mr(1)
7329 .nr(4)
7330 .kr(2)
7331 .sr(1)
7332 .m(m)
7333 .n(n)
7334 .k(k)
7335 .iterations(1)
7336 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7337 }
7338 }
7339 }
7340 }
7341
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)7342 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
7343 TEST_REQUIRES_X86_SSE2;
7344 for (uint32_t n = 5; n < 8; n++) {
7345 for (size_t k = 1; k <= 40; k += 9) {
7346 GemmMicrokernelTester()
7347 .mr(1)
7348 .nr(4)
7349 .kr(2)
7350 .sr(1)
7351 .m(1)
7352 .n(n)
7353 .k(k)
7354 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7355 }
7356 }
7357 }
7358
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)7359 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7360 TEST_REQUIRES_X86_SSE2;
7361 for (uint32_t n = 5; n < 8; n++) {
7362 for (size_t k = 1; k <= 40; k += 9) {
7363 GemmMicrokernelTester()
7364 .mr(1)
7365 .nr(4)
7366 .kr(2)
7367 .sr(1)
7368 .m(1)
7369 .n(n)
7370 .k(k)
7371 .cn_stride(7)
7372 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7373 }
7374 }
7375 }
7376
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_a)7377 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_a) {
7378 TEST_REQUIRES_X86_SSE2;
7379 for (uint32_t n = 5; n < 8; n++) {
7380 for (size_t k = 1; k <= 40; k += 9) {
7381 GemmMicrokernelTester()
7382 .mr(1)
7383 .nr(4)
7384 .kr(2)
7385 .sr(1)
7386 .m(1)
7387 .n(n)
7388 .k(k)
7389 .a_stride(43)
7390 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)7395 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
7396 TEST_REQUIRES_X86_SSE2;
7397 for (uint32_t n = 5; n < 8; n++) {
7398 for (size_t k = 1; k <= 40; k += 9) {
7399 for (uint32_t m = 1; m <= 1; m++) {
7400 GemmMicrokernelTester()
7401 .mr(1)
7402 .nr(4)
7403 .kr(2)
7404 .sr(1)
7405 .m(m)
7406 .n(n)
7407 .k(k)
7408 .iterations(1)
7409 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7410 }
7411 }
7412 }
7413 }
7414
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)7415 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
7416 TEST_REQUIRES_X86_SSE2;
7417 for (uint32_t n = 8; n <= 12; n += 4) {
7418 for (size_t k = 1; k <= 40; k += 9) {
7419 GemmMicrokernelTester()
7420 .mr(1)
7421 .nr(4)
7422 .kr(2)
7423 .sr(1)
7424 .m(1)
7425 .n(n)
7426 .k(k)
7427 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7428 }
7429 }
7430 }
7431
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)7432 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
7433 TEST_REQUIRES_X86_SSE2;
7434 for (uint32_t n = 8; n <= 12; n += 4) {
7435 for (size_t k = 1; k <= 40; k += 9) {
7436 GemmMicrokernelTester()
7437 .mr(1)
7438 .nr(4)
7439 .kr(2)
7440 .sr(1)
7441 .m(1)
7442 .n(n)
7443 .k(k)
7444 .cn_stride(7)
7445 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7446 }
7447 }
7448 }
7449
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_a)7450 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_a) {
7451 TEST_REQUIRES_X86_SSE2;
7452 for (uint32_t n = 8; n <= 12; n += 4) {
7453 for (size_t k = 1; k <= 40; k += 9) {
7454 GemmMicrokernelTester()
7455 .mr(1)
7456 .nr(4)
7457 .kr(2)
7458 .sr(1)
7459 .m(1)
7460 .n(n)
7461 .k(k)
7462 .a_stride(43)
7463 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7464 }
7465 }
7466 }
7467
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)7468 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
7469 TEST_REQUIRES_X86_SSE2;
7470 for (uint32_t n = 8; n <= 12; n += 4) {
7471 for (size_t k = 1; k <= 40; k += 9) {
7472 for (uint32_t m = 1; m <= 1; m++) {
7473 GemmMicrokernelTester()
7474 .mr(1)
7475 .nr(4)
7476 .kr(2)
7477 .sr(1)
7478 .m(m)
7479 .n(n)
7480 .k(k)
7481 .iterations(1)
7482 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7483 }
7484 }
7485 }
7486 }
7487
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)7488 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
7489 TEST_REQUIRES_X86_SSE2;
7490 for (size_t k = 1; k <= 40; k += 9) {
7491 for (uint32_t n = 1; n <= 4; n++) {
7492 for (uint32_t m = 1; m <= 1; m++) {
7493 GemmMicrokernelTester()
7494 .mr(1)
7495 .nr(4)
7496 .kr(2)
7497 .sr(1)
7498 .m(m)
7499 .n(n)
7500 .k(k)
7501 .cm_stride(7)
7502 .iterations(1)
7503 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7504 }
7505 }
7506 }
7507 }
7508
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)7509 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
7510 TEST_REQUIRES_X86_SSE2;
7511 GemmMicrokernelTester()
7512 .mr(1)
7513 .nr(4)
7514 .kr(2)
7515 .sr(1)
7516 .m(1)
7517 .n(4)
7518 .k(8)
7519 .qmin(128)
7520 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7521 }
7522
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)7523 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
7524 TEST_REQUIRES_X86_SSE2;
7525 GemmMicrokernelTester()
7526 .mr(1)
7527 .nr(4)
7528 .kr(2)
7529 .sr(1)
7530 .m(1)
7531 .n(4)
7532 .k(8)
7533 .qmax(128)
7534 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7535 }
7536
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)7537 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
7538 TEST_REQUIRES_X86_SSE2;
7539 GemmMicrokernelTester()
7540 .mr(1)
7541 .nr(4)
7542 .kr(2)
7543 .sr(1)
7544 .m(1)
7545 .n(4)
7546 .k(8)
7547 .cm_stride(7)
7548 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7549 }
7550
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_a_zero_point)7551 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_a_zero_point) {
7552 TEST_REQUIRES_X86_SSE2;
7553 for (size_t k = 1; k <= 40; k += 9) {
7554 GemmMicrokernelTester()
7555 .mr(1)
7556 .nr(4)
7557 .kr(2)
7558 .sr(1)
7559 .m(1)
7560 .n(4)
7561 .k(k)
7562 .a_zero_point(0)
7563 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7564 }
7565 }
7566
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_b_zero_point)7567 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_b_zero_point) {
7568 TEST_REQUIRES_X86_SSE2;
7569 for (size_t k = 1; k <= 40; k += 9) {
7570 GemmMicrokernelTester()
7571 .mr(1)
7572 .nr(4)
7573 .kr(2)
7574 .sr(1)
7575 .m(1)
7576 .n(4)
7577 .k(k)
7578 .b_zero_point(0)
7579 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7580 }
7581 }
7582
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_zero_point)7583 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_zero_point) {
7584 TEST_REQUIRES_X86_SSE2;
7585 for (size_t k = 1; k <= 40; k += 9) {
7586 GemmMicrokernelTester()
7587 .mr(1)
7588 .nr(4)
7589 .kr(2)
7590 .sr(1)
7591 .m(1)
7592 .n(4)
7593 .k(k)
7594 .a_zero_point(0)
7595 .b_zero_point(0)
7596 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7597 }
7598 }
7599 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7600
7601
7602 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)7603 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
7604 TEST_REQUIRES_X86_SSE2;
7605 GemmMicrokernelTester()
7606 .mr(2)
7607 .nr(4)
7608 .kr(2)
7609 .sr(1)
7610 .m(2)
7611 .n(4)
7612 .k(8)
7613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7614 }
7615
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)7616 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
7617 TEST_REQUIRES_X86_SSE2;
7618 GemmMicrokernelTester()
7619 .mr(2)
7620 .nr(4)
7621 .kr(2)
7622 .sr(1)
7623 .m(2)
7624 .n(4)
7625 .k(8)
7626 .cn_stride(7)
7627 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7628 }
7629
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_strided_a)7630 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
7631 TEST_REQUIRES_X86_SSE2;
7632 GemmMicrokernelTester()
7633 .mr(2)
7634 .nr(4)
7635 .kr(2)
7636 .sr(1)
7637 .m(2)
7638 .n(4)
7639 .k(8)
7640 .a_stride(11)
7641 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7642 }
7643
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)7644 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
7645 TEST_REQUIRES_X86_SSE2;
7646 for (uint32_t n = 1; n <= 4; n++) {
7647 for (uint32_t m = 1; m <= 2; m++) {
7648 GemmMicrokernelTester()
7649 .mr(2)
7650 .nr(4)
7651 .kr(2)
7652 .sr(1)
7653 .m(m)
7654 .n(n)
7655 .k(8)
7656 .iterations(1)
7657 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7658 }
7659 }
7660 }
7661
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)7662 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7663 TEST_REQUIRES_X86_SSE2;
7664 for (uint32_t m = 1; m <= 2; m++) {
7665 GemmMicrokernelTester()
7666 .mr(2)
7667 .nr(4)
7668 .kr(2)
7669 .sr(1)
7670 .m(m)
7671 .n(4)
7672 .k(8)
7673 .iterations(1)
7674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7675 }
7676 }
7677
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)7678 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7679 TEST_REQUIRES_X86_SSE2;
7680 for (uint32_t n = 1; n <= 4; n++) {
7681 GemmMicrokernelTester()
7682 .mr(2)
7683 .nr(4)
7684 .kr(2)
7685 .sr(1)
7686 .m(2)
7687 .n(n)
7688 .k(8)
7689 .iterations(1)
7690 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7691 }
7692 }
7693
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)7694 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
7695 TEST_REQUIRES_X86_SSE2;
7696 for (size_t k = 1; k < 8; k++) {
7697 GemmMicrokernelTester()
7698 .mr(2)
7699 .nr(4)
7700 .kr(2)
7701 .sr(1)
7702 .m(2)
7703 .n(4)
7704 .k(k)
7705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7706 }
7707 }
7708
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_strided_a)7709 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_strided_a) {
7710 TEST_REQUIRES_X86_SSE2;
7711 for (size_t k = 1; k < 8; k++) {
7712 GemmMicrokernelTester()
7713 .mr(2)
7714 .nr(4)
7715 .kr(2)
7716 .sr(1)
7717 .m(2)
7718 .n(4)
7719 .k(k)
7720 .a_stride(11)
7721 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7722 }
7723 }
7724
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)7725 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
7726 TEST_REQUIRES_X86_SSE2;
7727 for (size_t k = 1; k < 8; k++) {
7728 for (uint32_t n = 1; n <= 4; n++) {
7729 for (uint32_t m = 1; m <= 2; m++) {
7730 GemmMicrokernelTester()
7731 .mr(2)
7732 .nr(4)
7733 .kr(2)
7734 .sr(1)
7735 .m(m)
7736 .n(n)
7737 .k(k)
7738 .iterations(1)
7739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7740 }
7741 }
7742 }
7743 }
7744
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)7745 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
7746 TEST_REQUIRES_X86_SSE2;
7747 for (size_t k = 9; k < 16; k++) {
7748 GemmMicrokernelTester()
7749 .mr(2)
7750 .nr(4)
7751 .kr(2)
7752 .sr(1)
7753 .m(2)
7754 .n(4)
7755 .k(k)
7756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7757 }
7758 }
7759
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_strided_a)7760 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_strided_a) {
7761 TEST_REQUIRES_X86_SSE2;
7762 for (size_t k = 9; k < 16; k++) {
7763 GemmMicrokernelTester()
7764 .mr(2)
7765 .nr(4)
7766 .kr(2)
7767 .sr(1)
7768 .m(2)
7769 .n(4)
7770 .k(k)
7771 .a_stride(19)
7772 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7773 }
7774 }
7775
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)7776 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
7777 TEST_REQUIRES_X86_SSE2;
7778 for (size_t k = 9; k < 16; k++) {
7779 for (uint32_t n = 1; n <= 4; n++) {
7780 for (uint32_t m = 1; m <= 2; m++) {
7781 GemmMicrokernelTester()
7782 .mr(2)
7783 .nr(4)
7784 .kr(2)
7785 .sr(1)
7786 .m(m)
7787 .n(n)
7788 .k(k)
7789 .iterations(1)
7790 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7791 }
7792 }
7793 }
7794 }
7795
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)7796 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
7797 TEST_REQUIRES_X86_SSE2;
7798 for (size_t k = 16; k <= 80; k += 8) {
7799 GemmMicrokernelTester()
7800 .mr(2)
7801 .nr(4)
7802 .kr(2)
7803 .sr(1)
7804 .m(2)
7805 .n(4)
7806 .k(k)
7807 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7808 }
7809 }
7810
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_strided_a)7811 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_strided_a) {
7812 TEST_REQUIRES_X86_SSE2;
7813 for (size_t k = 16; k <= 80; k += 8) {
7814 GemmMicrokernelTester()
7815 .mr(2)
7816 .nr(4)
7817 .kr(2)
7818 .sr(1)
7819 .m(2)
7820 .n(4)
7821 .k(k)
7822 .a_stride(83)
7823 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7824 }
7825 }
7826
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)7827 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
7828 TEST_REQUIRES_X86_SSE2;
7829 for (size_t k = 16; k <= 80; k += 8) {
7830 for (uint32_t n = 1; n <= 4; n++) {
7831 for (uint32_t m = 1; m <= 2; m++) {
7832 GemmMicrokernelTester()
7833 .mr(2)
7834 .nr(4)
7835 .kr(2)
7836 .sr(1)
7837 .m(m)
7838 .n(n)
7839 .k(k)
7840 .iterations(1)
7841 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7842 }
7843 }
7844 }
7845 }
7846
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)7847 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
7848 TEST_REQUIRES_X86_SSE2;
7849 for (uint32_t n = 5; n < 8; n++) {
7850 for (size_t k = 1; k <= 40; k += 9) {
7851 GemmMicrokernelTester()
7852 .mr(2)
7853 .nr(4)
7854 .kr(2)
7855 .sr(1)
7856 .m(2)
7857 .n(n)
7858 .k(k)
7859 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7860 }
7861 }
7862 }
7863
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)7864 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7865 TEST_REQUIRES_X86_SSE2;
7866 for (uint32_t n = 5; n < 8; n++) {
7867 for (size_t k = 1; k <= 40; k += 9) {
7868 GemmMicrokernelTester()
7869 .mr(2)
7870 .nr(4)
7871 .kr(2)
7872 .sr(1)
7873 .m(2)
7874 .n(n)
7875 .k(k)
7876 .cn_stride(7)
7877 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7878 }
7879 }
7880 }
7881
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_a)7882 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_a) {
7883 TEST_REQUIRES_X86_SSE2;
7884 for (uint32_t n = 5; n < 8; n++) {
7885 for (size_t k = 1; k <= 40; k += 9) {
7886 GemmMicrokernelTester()
7887 .mr(2)
7888 .nr(4)
7889 .kr(2)
7890 .sr(1)
7891 .m(2)
7892 .n(n)
7893 .k(k)
7894 .a_stride(43)
7895 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7896 }
7897 }
7898 }
7899
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)7900 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
7901 TEST_REQUIRES_X86_SSE2;
7902 for (uint32_t n = 5; n < 8; n++) {
7903 for (size_t k = 1; k <= 40; k += 9) {
7904 for (uint32_t m = 1; m <= 2; m++) {
7905 GemmMicrokernelTester()
7906 .mr(2)
7907 .nr(4)
7908 .kr(2)
7909 .sr(1)
7910 .m(m)
7911 .n(n)
7912 .k(k)
7913 .iterations(1)
7914 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7915 }
7916 }
7917 }
7918 }
7919
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)7920 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
7921 TEST_REQUIRES_X86_SSE2;
7922 for (uint32_t n = 8; n <= 12; n += 4) {
7923 for (size_t k = 1; k <= 40; k += 9) {
7924 GemmMicrokernelTester()
7925 .mr(2)
7926 .nr(4)
7927 .kr(2)
7928 .sr(1)
7929 .m(2)
7930 .n(n)
7931 .k(k)
7932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7933 }
7934 }
7935 }
7936
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)7937 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
7938 TEST_REQUIRES_X86_SSE2;
7939 for (uint32_t n = 8; n <= 12; n += 4) {
7940 for (size_t k = 1; k <= 40; k += 9) {
7941 GemmMicrokernelTester()
7942 .mr(2)
7943 .nr(4)
7944 .kr(2)
7945 .sr(1)
7946 .m(2)
7947 .n(n)
7948 .k(k)
7949 .cn_stride(7)
7950 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7951 }
7952 }
7953 }
7954
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_a)7955 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_a) {
7956 TEST_REQUIRES_X86_SSE2;
7957 for (uint32_t n = 8; n <= 12; n += 4) {
7958 for (size_t k = 1; k <= 40; k += 9) {
7959 GemmMicrokernelTester()
7960 .mr(2)
7961 .nr(4)
7962 .kr(2)
7963 .sr(1)
7964 .m(2)
7965 .n(n)
7966 .k(k)
7967 .a_stride(43)
7968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7969 }
7970 }
7971 }
7972
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)7973 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
7974 TEST_REQUIRES_X86_SSE2;
7975 for (uint32_t n = 8; n <= 12; n += 4) {
7976 for (size_t k = 1; k <= 40; k += 9) {
7977 for (uint32_t m = 1; m <= 2; m++) {
7978 GemmMicrokernelTester()
7979 .mr(2)
7980 .nr(4)
7981 .kr(2)
7982 .sr(1)
7983 .m(m)
7984 .n(n)
7985 .k(k)
7986 .iterations(1)
7987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7988 }
7989 }
7990 }
7991 }
7992
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)7993 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
7994 TEST_REQUIRES_X86_SSE2;
7995 for (size_t k = 1; k <= 40; k += 9) {
7996 for (uint32_t n = 1; n <= 4; n++) {
7997 for (uint32_t m = 1; m <= 2; m++) {
7998 GemmMicrokernelTester()
7999 .mr(2)
8000 .nr(4)
8001 .kr(2)
8002 .sr(1)
8003 .m(m)
8004 .n(n)
8005 .k(k)
8006 .cm_stride(7)
8007 .iterations(1)
8008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8009 }
8010 }
8011 }
8012 }
8013
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)8014 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
8015 TEST_REQUIRES_X86_SSE2;
8016 GemmMicrokernelTester()
8017 .mr(2)
8018 .nr(4)
8019 .kr(2)
8020 .sr(1)
8021 .m(2)
8022 .n(4)
8023 .k(8)
8024 .qmin(128)
8025 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8026 }
8027
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)8028 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
8029 TEST_REQUIRES_X86_SSE2;
8030 GemmMicrokernelTester()
8031 .mr(2)
8032 .nr(4)
8033 .kr(2)
8034 .sr(1)
8035 .m(2)
8036 .n(4)
8037 .k(8)
8038 .qmax(128)
8039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8040 }
8041
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)8042 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
8043 TEST_REQUIRES_X86_SSE2;
8044 GemmMicrokernelTester()
8045 .mr(2)
8046 .nr(4)
8047 .kr(2)
8048 .sr(1)
8049 .m(2)
8050 .n(4)
8051 .k(8)
8052 .cm_stride(7)
8053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8054 }
8055
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_a_zero_point)8056 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_a_zero_point) {
8057 TEST_REQUIRES_X86_SSE2;
8058 for (size_t k = 1; k <= 40; k += 9) {
8059 GemmMicrokernelTester()
8060 .mr(2)
8061 .nr(4)
8062 .kr(2)
8063 .sr(1)
8064 .m(2)
8065 .n(4)
8066 .k(k)
8067 .a_zero_point(0)
8068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8069 }
8070 }
8071
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_b_zero_point)8072 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_b_zero_point) {
8073 TEST_REQUIRES_X86_SSE2;
8074 for (size_t k = 1; k <= 40; k += 9) {
8075 GemmMicrokernelTester()
8076 .mr(2)
8077 .nr(4)
8078 .kr(2)
8079 .sr(1)
8080 .m(2)
8081 .n(4)
8082 .k(k)
8083 .b_zero_point(0)
8084 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8085 }
8086 }
8087
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_zero_point)8088 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_zero_point) {
8089 TEST_REQUIRES_X86_SSE2;
8090 for (size_t k = 1; k <= 40; k += 9) {
8091 GemmMicrokernelTester()
8092 .mr(2)
8093 .nr(4)
8094 .kr(2)
8095 .sr(1)
8096 .m(2)
8097 .n(4)
8098 .k(k)
8099 .a_zero_point(0)
8100 .b_zero_point(0)
8101 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8102 }
8103 }
8104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8105
8106
8107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)8108 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
8109 TEST_REQUIRES_X86_SSE41;
8110 GemmMicrokernelTester()
8111 .mr(3)
8112 .nr(4)
8113 .kr(2)
8114 .sr(1)
8115 .m(3)
8116 .n(4)
8117 .k(8)
8118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8119 }
8120
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)8121 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
8122 TEST_REQUIRES_X86_SSE41;
8123 GemmMicrokernelTester()
8124 .mr(3)
8125 .nr(4)
8126 .kr(2)
8127 .sr(1)
8128 .m(3)
8129 .n(4)
8130 .k(8)
8131 .cn_stride(7)
8132 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8133 }
8134
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_strided_a)8135 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
8136 TEST_REQUIRES_X86_SSE41;
8137 GemmMicrokernelTester()
8138 .mr(3)
8139 .nr(4)
8140 .kr(2)
8141 .sr(1)
8142 .m(3)
8143 .n(4)
8144 .k(8)
8145 .a_stride(11)
8146 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8147 }
8148
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)8149 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
8150 TEST_REQUIRES_X86_SSE41;
8151 for (uint32_t n = 1; n <= 4; n++) {
8152 for (uint32_t m = 1; m <= 3; m++) {
8153 GemmMicrokernelTester()
8154 .mr(3)
8155 .nr(4)
8156 .kr(2)
8157 .sr(1)
8158 .m(m)
8159 .n(n)
8160 .k(8)
8161 .iterations(1)
8162 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8163 }
8164 }
8165 }
8166
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)8167 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8168 TEST_REQUIRES_X86_SSE41;
8169 for (uint32_t m = 1; m <= 3; m++) {
8170 GemmMicrokernelTester()
8171 .mr(3)
8172 .nr(4)
8173 .kr(2)
8174 .sr(1)
8175 .m(m)
8176 .n(4)
8177 .k(8)
8178 .iterations(1)
8179 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8180 }
8181 }
8182
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)8183 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8184 TEST_REQUIRES_X86_SSE41;
8185 for (uint32_t n = 1; n <= 4; n++) {
8186 GemmMicrokernelTester()
8187 .mr(3)
8188 .nr(4)
8189 .kr(2)
8190 .sr(1)
8191 .m(3)
8192 .n(n)
8193 .k(8)
8194 .iterations(1)
8195 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8196 }
8197 }
8198
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)8199 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
8200 TEST_REQUIRES_X86_SSE41;
8201 for (size_t k = 1; k < 8; k++) {
8202 GemmMicrokernelTester()
8203 .mr(3)
8204 .nr(4)
8205 .kr(2)
8206 .sr(1)
8207 .m(3)
8208 .n(4)
8209 .k(k)
8210 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8211 }
8212 }
8213
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_strided_a)8214 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
8215 TEST_REQUIRES_X86_SSE41;
8216 for (size_t k = 1; k < 8; k++) {
8217 GemmMicrokernelTester()
8218 .mr(3)
8219 .nr(4)
8220 .kr(2)
8221 .sr(1)
8222 .m(3)
8223 .n(4)
8224 .k(k)
8225 .a_stride(11)
8226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8227 }
8228 }
8229
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)8230 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
8231 TEST_REQUIRES_X86_SSE41;
8232 for (size_t k = 1; k < 8; k++) {
8233 for (uint32_t n = 1; n <= 4; n++) {
8234 for (uint32_t m = 1; m <= 3; m++) {
8235 GemmMicrokernelTester()
8236 .mr(3)
8237 .nr(4)
8238 .kr(2)
8239 .sr(1)
8240 .m(m)
8241 .n(n)
8242 .k(k)
8243 .iterations(1)
8244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8245 }
8246 }
8247 }
8248 }
8249
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)8250 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
8251 TEST_REQUIRES_X86_SSE41;
8252 for (size_t k = 9; k < 16; k++) {
8253 GemmMicrokernelTester()
8254 .mr(3)
8255 .nr(4)
8256 .kr(2)
8257 .sr(1)
8258 .m(3)
8259 .n(4)
8260 .k(k)
8261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8262 }
8263 }
8264
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_strided_a)8265 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
8266 TEST_REQUIRES_X86_SSE41;
8267 for (size_t k = 9; k < 16; k++) {
8268 GemmMicrokernelTester()
8269 .mr(3)
8270 .nr(4)
8271 .kr(2)
8272 .sr(1)
8273 .m(3)
8274 .n(4)
8275 .k(k)
8276 .a_stride(19)
8277 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8278 }
8279 }
8280
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)8281 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
8282 TEST_REQUIRES_X86_SSE41;
8283 for (size_t k = 9; k < 16; k++) {
8284 for (uint32_t n = 1; n <= 4; n++) {
8285 for (uint32_t m = 1; m <= 3; m++) {
8286 GemmMicrokernelTester()
8287 .mr(3)
8288 .nr(4)
8289 .kr(2)
8290 .sr(1)
8291 .m(m)
8292 .n(n)
8293 .k(k)
8294 .iterations(1)
8295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8296 }
8297 }
8298 }
8299 }
8300
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)8301 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
8302 TEST_REQUIRES_X86_SSE41;
8303 for (size_t k = 16; k <= 80; k += 8) {
8304 GemmMicrokernelTester()
8305 .mr(3)
8306 .nr(4)
8307 .kr(2)
8308 .sr(1)
8309 .m(3)
8310 .n(4)
8311 .k(k)
8312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8313 }
8314 }
8315
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_strided_a)8316 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_strided_a) {
8317 TEST_REQUIRES_X86_SSE41;
8318 for (size_t k = 16; k <= 80; k += 8) {
8319 GemmMicrokernelTester()
8320 .mr(3)
8321 .nr(4)
8322 .kr(2)
8323 .sr(1)
8324 .m(3)
8325 .n(4)
8326 .k(k)
8327 .a_stride(83)
8328 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8329 }
8330 }
8331
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)8332 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
8333 TEST_REQUIRES_X86_SSE41;
8334 for (size_t k = 16; k <= 80; k += 8) {
8335 for (uint32_t n = 1; n <= 4; n++) {
8336 for (uint32_t m = 1; m <= 3; m++) {
8337 GemmMicrokernelTester()
8338 .mr(3)
8339 .nr(4)
8340 .kr(2)
8341 .sr(1)
8342 .m(m)
8343 .n(n)
8344 .k(k)
8345 .iterations(1)
8346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8347 }
8348 }
8349 }
8350 }
8351
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)8352 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
8353 TEST_REQUIRES_X86_SSE41;
8354 for (uint32_t n = 5; n < 8; n++) {
8355 for (size_t k = 1; k <= 40; k += 9) {
8356 GemmMicrokernelTester()
8357 .mr(3)
8358 .nr(4)
8359 .kr(2)
8360 .sr(1)
8361 .m(3)
8362 .n(n)
8363 .k(k)
8364 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8365 }
8366 }
8367 }
8368
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)8369 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
8370 TEST_REQUIRES_X86_SSE41;
8371 for (uint32_t n = 5; n < 8; n++) {
8372 for (size_t k = 1; k <= 40; k += 9) {
8373 GemmMicrokernelTester()
8374 .mr(3)
8375 .nr(4)
8376 .kr(2)
8377 .sr(1)
8378 .m(3)
8379 .n(n)
8380 .k(k)
8381 .cn_stride(7)
8382 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8383 }
8384 }
8385 }
8386
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_a)8387 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
8388 TEST_REQUIRES_X86_SSE41;
8389 for (uint32_t n = 5; n < 8; n++) {
8390 for (size_t k = 1; k <= 40; k += 9) {
8391 GemmMicrokernelTester()
8392 .mr(3)
8393 .nr(4)
8394 .kr(2)
8395 .sr(1)
8396 .m(3)
8397 .n(n)
8398 .k(k)
8399 .a_stride(43)
8400 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8401 }
8402 }
8403 }
8404
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)8405 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
8406 TEST_REQUIRES_X86_SSE41;
8407 for (uint32_t n = 5; n < 8; n++) {
8408 for (size_t k = 1; k <= 40; k += 9) {
8409 for (uint32_t m = 1; m <= 3; m++) {
8410 GemmMicrokernelTester()
8411 .mr(3)
8412 .nr(4)
8413 .kr(2)
8414 .sr(1)
8415 .m(m)
8416 .n(n)
8417 .k(k)
8418 .iterations(1)
8419 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8420 }
8421 }
8422 }
8423 }
8424
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)8425 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
8426 TEST_REQUIRES_X86_SSE41;
8427 for (uint32_t n = 8; n <= 12; n += 4) {
8428 for (size_t k = 1; k <= 40; k += 9) {
8429 GemmMicrokernelTester()
8430 .mr(3)
8431 .nr(4)
8432 .kr(2)
8433 .sr(1)
8434 .m(3)
8435 .n(n)
8436 .k(k)
8437 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8438 }
8439 }
8440 }
8441
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)8442 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
8443 TEST_REQUIRES_X86_SSE41;
8444 for (uint32_t n = 8; n <= 12; n += 4) {
8445 for (size_t k = 1; k <= 40; k += 9) {
8446 GemmMicrokernelTester()
8447 .mr(3)
8448 .nr(4)
8449 .kr(2)
8450 .sr(1)
8451 .m(3)
8452 .n(n)
8453 .k(k)
8454 .cn_stride(7)
8455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8456 }
8457 }
8458 }
8459
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_a)8460 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_a) {
8461 TEST_REQUIRES_X86_SSE41;
8462 for (uint32_t n = 8; n <= 12; n += 4) {
8463 for (size_t k = 1; k <= 40; k += 9) {
8464 GemmMicrokernelTester()
8465 .mr(3)
8466 .nr(4)
8467 .kr(2)
8468 .sr(1)
8469 .m(3)
8470 .n(n)
8471 .k(k)
8472 .a_stride(43)
8473 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8474 }
8475 }
8476 }
8477
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)8478 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
8479 TEST_REQUIRES_X86_SSE41;
8480 for (uint32_t n = 8; n <= 12; n += 4) {
8481 for (size_t k = 1; k <= 40; k += 9) {
8482 for (uint32_t m = 1; m <= 3; m++) {
8483 GemmMicrokernelTester()
8484 .mr(3)
8485 .nr(4)
8486 .kr(2)
8487 .sr(1)
8488 .m(m)
8489 .n(n)
8490 .k(k)
8491 .iterations(1)
8492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8493 }
8494 }
8495 }
8496 }
8497
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)8498 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
8499 TEST_REQUIRES_X86_SSE41;
8500 for (size_t k = 1; k <= 40; k += 9) {
8501 for (uint32_t n = 1; n <= 4; n++) {
8502 for (uint32_t m = 1; m <= 3; m++) {
8503 GemmMicrokernelTester()
8504 .mr(3)
8505 .nr(4)
8506 .kr(2)
8507 .sr(1)
8508 .m(m)
8509 .n(n)
8510 .k(k)
8511 .cm_stride(7)
8512 .iterations(1)
8513 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8514 }
8515 }
8516 }
8517 }
8518
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)8519 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
8520 TEST_REQUIRES_X86_SSE41;
8521 GemmMicrokernelTester()
8522 .mr(3)
8523 .nr(4)
8524 .kr(2)
8525 .sr(1)
8526 .m(3)
8527 .n(4)
8528 .k(8)
8529 .qmin(128)
8530 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8531 }
8532
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)8533 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
8534 TEST_REQUIRES_X86_SSE41;
8535 GemmMicrokernelTester()
8536 .mr(3)
8537 .nr(4)
8538 .kr(2)
8539 .sr(1)
8540 .m(3)
8541 .n(4)
8542 .k(8)
8543 .qmax(128)
8544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8545 }
8546
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)8547 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
8548 TEST_REQUIRES_X86_SSE41;
8549 GemmMicrokernelTester()
8550 .mr(3)
8551 .nr(4)
8552 .kr(2)
8553 .sr(1)
8554 .m(3)
8555 .n(4)
8556 .k(8)
8557 .cm_stride(7)
8558 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8559 }
8560
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_a_zero_point)8561 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
8562 TEST_REQUIRES_X86_SSE41;
8563 for (size_t k = 1; k <= 40; k += 9) {
8564 GemmMicrokernelTester()
8565 .mr(3)
8566 .nr(4)
8567 .kr(2)
8568 .sr(1)
8569 .m(3)
8570 .n(4)
8571 .k(k)
8572 .a_zero_point(0)
8573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8574 }
8575 }
8576
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_b_zero_point)8577 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
8578 TEST_REQUIRES_X86_SSE41;
8579 for (size_t k = 1; k <= 40; k += 9) {
8580 GemmMicrokernelTester()
8581 .mr(3)
8582 .nr(4)
8583 .kr(2)
8584 .sr(1)
8585 .m(3)
8586 .n(4)
8587 .k(k)
8588 .b_zero_point(0)
8589 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8590 }
8591 }
8592
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_zero_point)8593 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
8594 TEST_REQUIRES_X86_SSE41;
8595 for (size_t k = 1; k <= 40; k += 9) {
8596 GemmMicrokernelTester()
8597 .mr(3)
8598 .nr(4)
8599 .kr(2)
8600 .sr(1)
8601 .m(3)
8602 .n(4)
8603 .k(k)
8604 .a_zero_point(0)
8605 .b_zero_point(0)
8606 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8607 }
8608 }
8609 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8610
8611
8612 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)8613 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
8614 TEST_REQUIRES_X86_SSE2;
8615 GemmMicrokernelTester()
8616 .mr(4)
8617 .nr(4)
8618 .kr(2)
8619 .sr(1)
8620 .m(4)
8621 .n(4)
8622 .k(8)
8623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8624 }
8625
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)8626 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
8627 TEST_REQUIRES_X86_SSE2;
8628 GemmMicrokernelTester()
8629 .mr(4)
8630 .nr(4)
8631 .kr(2)
8632 .sr(1)
8633 .m(4)
8634 .n(4)
8635 .k(8)
8636 .cn_stride(7)
8637 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8638 }
8639
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_strided_a)8640 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
8641 TEST_REQUIRES_X86_SSE2;
8642 GemmMicrokernelTester()
8643 .mr(4)
8644 .nr(4)
8645 .kr(2)
8646 .sr(1)
8647 .m(4)
8648 .n(4)
8649 .k(8)
8650 .a_stride(11)
8651 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8652 }
8653
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)8654 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
8655 TEST_REQUIRES_X86_SSE2;
8656 for (uint32_t n = 1; n <= 4; n++) {
8657 for (uint32_t m = 1; m <= 4; m++) {
8658 GemmMicrokernelTester()
8659 .mr(4)
8660 .nr(4)
8661 .kr(2)
8662 .sr(1)
8663 .m(m)
8664 .n(n)
8665 .k(8)
8666 .iterations(1)
8667 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8668 }
8669 }
8670 }
8671
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)8672 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
8673 TEST_REQUIRES_X86_SSE2;
8674 for (uint32_t m = 1; m <= 4; m++) {
8675 GemmMicrokernelTester()
8676 .mr(4)
8677 .nr(4)
8678 .kr(2)
8679 .sr(1)
8680 .m(m)
8681 .n(4)
8682 .k(8)
8683 .iterations(1)
8684 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8685 }
8686 }
8687
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)8688 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
8689 TEST_REQUIRES_X86_SSE2;
8690 for (uint32_t n = 1; n <= 4; n++) {
8691 GemmMicrokernelTester()
8692 .mr(4)
8693 .nr(4)
8694 .kr(2)
8695 .sr(1)
8696 .m(4)
8697 .n(n)
8698 .k(8)
8699 .iterations(1)
8700 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8701 }
8702 }
8703
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)8704 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
8705 TEST_REQUIRES_X86_SSE2;
8706 for (size_t k = 1; k < 8; k++) {
8707 GemmMicrokernelTester()
8708 .mr(4)
8709 .nr(4)
8710 .kr(2)
8711 .sr(1)
8712 .m(4)
8713 .n(4)
8714 .k(k)
8715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8716 }
8717 }
8718
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_strided_a)8719 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_strided_a) {
8720 TEST_REQUIRES_X86_SSE2;
8721 for (size_t k = 1; k < 8; k++) {
8722 GemmMicrokernelTester()
8723 .mr(4)
8724 .nr(4)
8725 .kr(2)
8726 .sr(1)
8727 .m(4)
8728 .n(4)
8729 .k(k)
8730 .a_stride(11)
8731 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8732 }
8733 }
8734
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)8735 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
8736 TEST_REQUIRES_X86_SSE2;
8737 for (size_t k = 1; k < 8; k++) {
8738 for (uint32_t n = 1; n <= 4; n++) {
8739 for (uint32_t m = 1; m <= 4; m++) {
8740 GemmMicrokernelTester()
8741 .mr(4)
8742 .nr(4)
8743 .kr(2)
8744 .sr(1)
8745 .m(m)
8746 .n(n)
8747 .k(k)
8748 .iterations(1)
8749 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8750 }
8751 }
8752 }
8753 }
8754
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)8755 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
8756 TEST_REQUIRES_X86_SSE2;
8757 for (size_t k = 9; k < 16; k++) {
8758 GemmMicrokernelTester()
8759 .mr(4)
8760 .nr(4)
8761 .kr(2)
8762 .sr(1)
8763 .m(4)
8764 .n(4)
8765 .k(k)
8766 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8767 }
8768 }
8769
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_strided_a)8770 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_strided_a) {
8771 TEST_REQUIRES_X86_SSE2;
8772 for (size_t k = 9; k < 16; k++) {
8773 GemmMicrokernelTester()
8774 .mr(4)
8775 .nr(4)
8776 .kr(2)
8777 .sr(1)
8778 .m(4)
8779 .n(4)
8780 .k(k)
8781 .a_stride(19)
8782 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8783 }
8784 }
8785
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)8786 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
8787 TEST_REQUIRES_X86_SSE2;
8788 for (size_t k = 9; k < 16; k++) {
8789 for (uint32_t n = 1; n <= 4; n++) {
8790 for (uint32_t m = 1; m <= 4; m++) {
8791 GemmMicrokernelTester()
8792 .mr(4)
8793 .nr(4)
8794 .kr(2)
8795 .sr(1)
8796 .m(m)
8797 .n(n)
8798 .k(k)
8799 .iterations(1)
8800 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8801 }
8802 }
8803 }
8804 }
8805
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)8806 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
8807 TEST_REQUIRES_X86_SSE2;
8808 for (size_t k = 16; k <= 80; k += 8) {
8809 GemmMicrokernelTester()
8810 .mr(4)
8811 .nr(4)
8812 .kr(2)
8813 .sr(1)
8814 .m(4)
8815 .n(4)
8816 .k(k)
8817 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8818 }
8819 }
8820
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_strided_a)8821 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_strided_a) {
8822 TEST_REQUIRES_X86_SSE2;
8823 for (size_t k = 16; k <= 80; k += 8) {
8824 GemmMicrokernelTester()
8825 .mr(4)
8826 .nr(4)
8827 .kr(2)
8828 .sr(1)
8829 .m(4)
8830 .n(4)
8831 .k(k)
8832 .a_stride(83)
8833 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8834 }
8835 }
8836
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)8837 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
8838 TEST_REQUIRES_X86_SSE2;
8839 for (size_t k = 16; k <= 80; k += 8) {
8840 for (uint32_t n = 1; n <= 4; n++) {
8841 for (uint32_t m = 1; m <= 4; m++) {
8842 GemmMicrokernelTester()
8843 .mr(4)
8844 .nr(4)
8845 .kr(2)
8846 .sr(1)
8847 .m(m)
8848 .n(n)
8849 .k(k)
8850 .iterations(1)
8851 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8852 }
8853 }
8854 }
8855 }
8856
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)8857 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
8858 TEST_REQUIRES_X86_SSE2;
8859 for (uint32_t n = 5; n < 8; n++) {
8860 for (size_t k = 1; k <= 40; k += 9) {
8861 GemmMicrokernelTester()
8862 .mr(4)
8863 .nr(4)
8864 .kr(2)
8865 .sr(1)
8866 .m(4)
8867 .n(n)
8868 .k(k)
8869 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8870 }
8871 }
8872 }
8873
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)8874 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
8875 TEST_REQUIRES_X86_SSE2;
8876 for (uint32_t n = 5; n < 8; n++) {
8877 for (size_t k = 1; k <= 40; k += 9) {
8878 GemmMicrokernelTester()
8879 .mr(4)
8880 .nr(4)
8881 .kr(2)
8882 .sr(1)
8883 .m(4)
8884 .n(n)
8885 .k(k)
8886 .cn_stride(7)
8887 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8888 }
8889 }
8890 }
8891
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_a)8892 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_a) {
8893 TEST_REQUIRES_X86_SSE2;
8894 for (uint32_t n = 5; n < 8; n++) {
8895 for (size_t k = 1; k <= 40; k += 9) {
8896 GemmMicrokernelTester()
8897 .mr(4)
8898 .nr(4)
8899 .kr(2)
8900 .sr(1)
8901 .m(4)
8902 .n(n)
8903 .k(k)
8904 .a_stride(43)
8905 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8906 }
8907 }
8908 }
8909
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)8910 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
8911 TEST_REQUIRES_X86_SSE2;
8912 for (uint32_t n = 5; n < 8; n++) {
8913 for (size_t k = 1; k <= 40; k += 9) {
8914 for (uint32_t m = 1; m <= 4; m++) {
8915 GemmMicrokernelTester()
8916 .mr(4)
8917 .nr(4)
8918 .kr(2)
8919 .sr(1)
8920 .m(m)
8921 .n(n)
8922 .k(k)
8923 .iterations(1)
8924 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8925 }
8926 }
8927 }
8928 }
8929
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)8930 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
8931 TEST_REQUIRES_X86_SSE2;
8932 for (uint32_t n = 8; n <= 12; n += 4) {
8933 for (size_t k = 1; k <= 40; k += 9) {
8934 GemmMicrokernelTester()
8935 .mr(4)
8936 .nr(4)
8937 .kr(2)
8938 .sr(1)
8939 .m(4)
8940 .n(n)
8941 .k(k)
8942 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8943 }
8944 }
8945 }
8946
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)8947 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
8948 TEST_REQUIRES_X86_SSE2;
8949 for (uint32_t n = 8; n <= 12; n += 4) {
8950 for (size_t k = 1; k <= 40; k += 9) {
8951 GemmMicrokernelTester()
8952 .mr(4)
8953 .nr(4)
8954 .kr(2)
8955 .sr(1)
8956 .m(4)
8957 .n(n)
8958 .k(k)
8959 .cn_stride(7)
8960 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8961 }
8962 }
8963 }
8964
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_a)8965 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_a) {
8966 TEST_REQUIRES_X86_SSE2;
8967 for (uint32_t n = 8; n <= 12; n += 4) {
8968 for (size_t k = 1; k <= 40; k += 9) {
8969 GemmMicrokernelTester()
8970 .mr(4)
8971 .nr(4)
8972 .kr(2)
8973 .sr(1)
8974 .m(4)
8975 .n(n)
8976 .k(k)
8977 .a_stride(43)
8978 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8979 }
8980 }
8981 }
8982
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)8983 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
8984 TEST_REQUIRES_X86_SSE2;
8985 for (uint32_t n = 8; n <= 12; n += 4) {
8986 for (size_t k = 1; k <= 40; k += 9) {
8987 for (uint32_t m = 1; m <= 4; m++) {
8988 GemmMicrokernelTester()
8989 .mr(4)
8990 .nr(4)
8991 .kr(2)
8992 .sr(1)
8993 .m(m)
8994 .n(n)
8995 .k(k)
8996 .iterations(1)
8997 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8998 }
8999 }
9000 }
9001 }
9002
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)9003 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
9004 TEST_REQUIRES_X86_SSE2;
9005 for (size_t k = 1; k <= 40; k += 9) {
9006 for (uint32_t n = 1; n <= 4; n++) {
9007 for (uint32_t m = 1; m <= 4; m++) {
9008 GemmMicrokernelTester()
9009 .mr(4)
9010 .nr(4)
9011 .kr(2)
9012 .sr(1)
9013 .m(m)
9014 .n(n)
9015 .k(k)
9016 .cm_stride(7)
9017 .iterations(1)
9018 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9019 }
9020 }
9021 }
9022 }
9023
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)9024 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
9025 TEST_REQUIRES_X86_SSE2;
9026 GemmMicrokernelTester()
9027 .mr(4)
9028 .nr(4)
9029 .kr(2)
9030 .sr(1)
9031 .m(4)
9032 .n(4)
9033 .k(8)
9034 .qmin(128)
9035 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9036 }
9037
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)9038 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
9039 TEST_REQUIRES_X86_SSE2;
9040 GemmMicrokernelTester()
9041 .mr(4)
9042 .nr(4)
9043 .kr(2)
9044 .sr(1)
9045 .m(4)
9046 .n(4)
9047 .k(8)
9048 .qmax(128)
9049 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9050 }
9051
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)9052 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
9053 TEST_REQUIRES_X86_SSE2;
9054 GemmMicrokernelTester()
9055 .mr(4)
9056 .nr(4)
9057 .kr(2)
9058 .sr(1)
9059 .m(4)
9060 .n(4)
9061 .k(8)
9062 .cm_stride(7)
9063 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9064 }
9065
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_a_zero_point)9066 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_a_zero_point) {
9067 TEST_REQUIRES_X86_SSE2;
9068 for (size_t k = 1; k <= 40; k += 9) {
9069 GemmMicrokernelTester()
9070 .mr(4)
9071 .nr(4)
9072 .kr(2)
9073 .sr(1)
9074 .m(4)
9075 .n(4)
9076 .k(k)
9077 .a_zero_point(0)
9078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9079 }
9080 }
9081
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_b_zero_point)9082 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_b_zero_point) {
9083 TEST_REQUIRES_X86_SSE2;
9084 for (size_t k = 1; k <= 40; k += 9) {
9085 GemmMicrokernelTester()
9086 .mr(4)
9087 .nr(4)
9088 .kr(2)
9089 .sr(1)
9090 .m(4)
9091 .n(4)
9092 .k(k)
9093 .b_zero_point(0)
9094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9095 }
9096 }
9097
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_zero_point)9098 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_zero_point) {
9099 TEST_REQUIRES_X86_SSE2;
9100 for (size_t k = 1; k <= 40; k += 9) {
9101 GemmMicrokernelTester()
9102 .mr(4)
9103 .nr(4)
9104 .kr(2)
9105 .sr(1)
9106 .m(4)
9107 .n(4)
9108 .k(k)
9109 .a_zero_point(0)
9110 .b_zero_point(0)
9111 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9112 }
9113 }
9114 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9115
9116
9117 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)9118 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
9119 TEST_REQUIRES_X86_SSE41;
9120 GemmMicrokernelTester()
9121 .mr(4)
9122 .nr(4)
9123 .kr(2)
9124 .sr(1)
9125 .m(4)
9126 .n(4)
9127 .k(8)
9128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9129 }
9130
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)9131 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
9132 TEST_REQUIRES_X86_SSE41;
9133 GemmMicrokernelTester()
9134 .mr(4)
9135 .nr(4)
9136 .kr(2)
9137 .sr(1)
9138 .m(4)
9139 .n(4)
9140 .k(8)
9141 .cn_stride(7)
9142 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9143 }
9144
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_strided_a)9145 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
9146 TEST_REQUIRES_X86_SSE41;
9147 GemmMicrokernelTester()
9148 .mr(4)
9149 .nr(4)
9150 .kr(2)
9151 .sr(1)
9152 .m(4)
9153 .n(4)
9154 .k(8)
9155 .a_stride(11)
9156 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9157 }
9158
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)9159 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
9160 TEST_REQUIRES_X86_SSE41;
9161 for (uint32_t n = 1; n <= 4; n++) {
9162 for (uint32_t m = 1; m <= 4; m++) {
9163 GemmMicrokernelTester()
9164 .mr(4)
9165 .nr(4)
9166 .kr(2)
9167 .sr(1)
9168 .m(m)
9169 .n(n)
9170 .k(8)
9171 .iterations(1)
9172 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9173 }
9174 }
9175 }
9176
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)9177 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
9178 TEST_REQUIRES_X86_SSE41;
9179 for (uint32_t m = 1; m <= 4; m++) {
9180 GemmMicrokernelTester()
9181 .mr(4)
9182 .nr(4)
9183 .kr(2)
9184 .sr(1)
9185 .m(m)
9186 .n(4)
9187 .k(8)
9188 .iterations(1)
9189 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9190 }
9191 }
9192
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)9193 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
9194 TEST_REQUIRES_X86_SSE41;
9195 for (uint32_t n = 1; n <= 4; n++) {
9196 GemmMicrokernelTester()
9197 .mr(4)
9198 .nr(4)
9199 .kr(2)
9200 .sr(1)
9201 .m(4)
9202 .n(n)
9203 .k(8)
9204 .iterations(1)
9205 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9206 }
9207 }
9208
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)9209 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
9210 TEST_REQUIRES_X86_SSE41;
9211 for (size_t k = 1; k < 8; k++) {
9212 GemmMicrokernelTester()
9213 .mr(4)
9214 .nr(4)
9215 .kr(2)
9216 .sr(1)
9217 .m(4)
9218 .n(4)
9219 .k(k)
9220 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9221 }
9222 }
9223
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_strided_a)9224 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
9225 TEST_REQUIRES_X86_SSE41;
9226 for (size_t k = 1; k < 8; k++) {
9227 GemmMicrokernelTester()
9228 .mr(4)
9229 .nr(4)
9230 .kr(2)
9231 .sr(1)
9232 .m(4)
9233 .n(4)
9234 .k(k)
9235 .a_stride(11)
9236 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9237 }
9238 }
9239
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)9240 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
9241 TEST_REQUIRES_X86_SSE41;
9242 for (size_t k = 1; k < 8; k++) {
9243 for (uint32_t n = 1; n <= 4; n++) {
9244 for (uint32_t m = 1; m <= 4; m++) {
9245 GemmMicrokernelTester()
9246 .mr(4)
9247 .nr(4)
9248 .kr(2)
9249 .sr(1)
9250 .m(m)
9251 .n(n)
9252 .k(k)
9253 .iterations(1)
9254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9255 }
9256 }
9257 }
9258 }
9259
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)9260 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
9261 TEST_REQUIRES_X86_SSE41;
9262 for (size_t k = 9; k < 16; k++) {
9263 GemmMicrokernelTester()
9264 .mr(4)
9265 .nr(4)
9266 .kr(2)
9267 .sr(1)
9268 .m(4)
9269 .n(4)
9270 .k(k)
9271 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9272 }
9273 }
9274
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_strided_a)9275 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
9276 TEST_REQUIRES_X86_SSE41;
9277 for (size_t k = 9; k < 16; k++) {
9278 GemmMicrokernelTester()
9279 .mr(4)
9280 .nr(4)
9281 .kr(2)
9282 .sr(1)
9283 .m(4)
9284 .n(4)
9285 .k(k)
9286 .a_stride(19)
9287 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9288 }
9289 }
9290
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)9291 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
9292 TEST_REQUIRES_X86_SSE41;
9293 for (size_t k = 9; k < 16; k++) {
9294 for (uint32_t n = 1; n <= 4; n++) {
9295 for (uint32_t m = 1; m <= 4; m++) {
9296 GemmMicrokernelTester()
9297 .mr(4)
9298 .nr(4)
9299 .kr(2)
9300 .sr(1)
9301 .m(m)
9302 .n(n)
9303 .k(k)
9304 .iterations(1)
9305 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9306 }
9307 }
9308 }
9309 }
9310
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)9311 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
9312 TEST_REQUIRES_X86_SSE41;
9313 for (size_t k = 16; k <= 80; k += 8) {
9314 GemmMicrokernelTester()
9315 .mr(4)
9316 .nr(4)
9317 .kr(2)
9318 .sr(1)
9319 .m(4)
9320 .n(4)
9321 .k(k)
9322 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9323 }
9324 }
9325
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_strided_a)9326 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_strided_a) {
9327 TEST_REQUIRES_X86_SSE41;
9328 for (size_t k = 16; k <= 80; k += 8) {
9329 GemmMicrokernelTester()
9330 .mr(4)
9331 .nr(4)
9332 .kr(2)
9333 .sr(1)
9334 .m(4)
9335 .n(4)
9336 .k(k)
9337 .a_stride(83)
9338 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9339 }
9340 }
9341
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)9342 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
9343 TEST_REQUIRES_X86_SSE41;
9344 for (size_t k = 16; k <= 80; k += 8) {
9345 for (uint32_t n = 1; n <= 4; n++) {
9346 for (uint32_t m = 1; m <= 4; m++) {
9347 GemmMicrokernelTester()
9348 .mr(4)
9349 .nr(4)
9350 .kr(2)
9351 .sr(1)
9352 .m(m)
9353 .n(n)
9354 .k(k)
9355 .iterations(1)
9356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9357 }
9358 }
9359 }
9360 }
9361
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)9362 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
9363 TEST_REQUIRES_X86_SSE41;
9364 for (uint32_t n = 5; n < 8; n++) {
9365 for (size_t k = 1; k <= 40; k += 9) {
9366 GemmMicrokernelTester()
9367 .mr(4)
9368 .nr(4)
9369 .kr(2)
9370 .sr(1)
9371 .m(4)
9372 .n(n)
9373 .k(k)
9374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9375 }
9376 }
9377 }
9378
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)9379 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9380 TEST_REQUIRES_X86_SSE41;
9381 for (uint32_t n = 5; n < 8; n++) {
9382 for (size_t k = 1; k <= 40; k += 9) {
9383 GemmMicrokernelTester()
9384 .mr(4)
9385 .nr(4)
9386 .kr(2)
9387 .sr(1)
9388 .m(4)
9389 .n(n)
9390 .k(k)
9391 .cn_stride(7)
9392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9393 }
9394 }
9395 }
9396
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_a)9397 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
9398 TEST_REQUIRES_X86_SSE41;
9399 for (uint32_t n = 5; n < 8; n++) {
9400 for (size_t k = 1; k <= 40; k += 9) {
9401 GemmMicrokernelTester()
9402 .mr(4)
9403 .nr(4)
9404 .kr(2)
9405 .sr(1)
9406 .m(4)
9407 .n(n)
9408 .k(k)
9409 .a_stride(43)
9410 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9411 }
9412 }
9413 }
9414
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)9415 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
9416 TEST_REQUIRES_X86_SSE41;
9417 for (uint32_t n = 5; n < 8; n++) {
9418 for (size_t k = 1; k <= 40; k += 9) {
9419 for (uint32_t m = 1; m <= 4; m++) {
9420 GemmMicrokernelTester()
9421 .mr(4)
9422 .nr(4)
9423 .kr(2)
9424 .sr(1)
9425 .m(m)
9426 .n(n)
9427 .k(k)
9428 .iterations(1)
9429 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9430 }
9431 }
9432 }
9433 }
9434
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)9435 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
9436 TEST_REQUIRES_X86_SSE41;
9437 for (uint32_t n = 8; n <= 12; n += 4) {
9438 for (size_t k = 1; k <= 40; k += 9) {
9439 GemmMicrokernelTester()
9440 .mr(4)
9441 .nr(4)
9442 .kr(2)
9443 .sr(1)
9444 .m(4)
9445 .n(n)
9446 .k(k)
9447 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9448 }
9449 }
9450 }
9451
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)9452 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
9453 TEST_REQUIRES_X86_SSE41;
9454 for (uint32_t n = 8; n <= 12; n += 4) {
9455 for (size_t k = 1; k <= 40; k += 9) {
9456 GemmMicrokernelTester()
9457 .mr(4)
9458 .nr(4)
9459 .kr(2)
9460 .sr(1)
9461 .m(4)
9462 .n(n)
9463 .k(k)
9464 .cn_stride(7)
9465 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9466 }
9467 }
9468 }
9469
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_a)9470 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_a) {
9471 TEST_REQUIRES_X86_SSE41;
9472 for (uint32_t n = 8; n <= 12; n += 4) {
9473 for (size_t k = 1; k <= 40; k += 9) {
9474 GemmMicrokernelTester()
9475 .mr(4)
9476 .nr(4)
9477 .kr(2)
9478 .sr(1)
9479 .m(4)
9480 .n(n)
9481 .k(k)
9482 .a_stride(43)
9483 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9484 }
9485 }
9486 }
9487
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)9488 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
9489 TEST_REQUIRES_X86_SSE41;
9490 for (uint32_t n = 8; n <= 12; n += 4) {
9491 for (size_t k = 1; k <= 40; k += 9) {
9492 for (uint32_t m = 1; m <= 4; m++) {
9493 GemmMicrokernelTester()
9494 .mr(4)
9495 .nr(4)
9496 .kr(2)
9497 .sr(1)
9498 .m(m)
9499 .n(n)
9500 .k(k)
9501 .iterations(1)
9502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9503 }
9504 }
9505 }
9506 }
9507
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)9508 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
9509 TEST_REQUIRES_X86_SSE41;
9510 for (size_t k = 1; k <= 40; k += 9) {
9511 for (uint32_t n = 1; n <= 4; n++) {
9512 for (uint32_t m = 1; m <= 4; m++) {
9513 GemmMicrokernelTester()
9514 .mr(4)
9515 .nr(4)
9516 .kr(2)
9517 .sr(1)
9518 .m(m)
9519 .n(n)
9520 .k(k)
9521 .cm_stride(7)
9522 .iterations(1)
9523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9524 }
9525 }
9526 }
9527 }
9528
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)9529 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
9530 TEST_REQUIRES_X86_SSE41;
9531 GemmMicrokernelTester()
9532 .mr(4)
9533 .nr(4)
9534 .kr(2)
9535 .sr(1)
9536 .m(4)
9537 .n(4)
9538 .k(8)
9539 .qmin(128)
9540 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9541 }
9542
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)9543 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
9544 TEST_REQUIRES_X86_SSE41;
9545 GemmMicrokernelTester()
9546 .mr(4)
9547 .nr(4)
9548 .kr(2)
9549 .sr(1)
9550 .m(4)
9551 .n(4)
9552 .k(8)
9553 .qmax(128)
9554 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9555 }
9556
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)9557 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
9558 TEST_REQUIRES_X86_SSE41;
9559 GemmMicrokernelTester()
9560 .mr(4)
9561 .nr(4)
9562 .kr(2)
9563 .sr(1)
9564 .m(4)
9565 .n(4)
9566 .k(8)
9567 .cm_stride(7)
9568 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9569 }
9570
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_a_zero_point)9571 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
9572 TEST_REQUIRES_X86_SSE41;
9573 for (size_t k = 1; k <= 40; k += 9) {
9574 GemmMicrokernelTester()
9575 .mr(4)
9576 .nr(4)
9577 .kr(2)
9578 .sr(1)
9579 .m(4)
9580 .n(4)
9581 .k(k)
9582 .a_zero_point(0)
9583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9584 }
9585 }
9586
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_b_zero_point)9587 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
9588 TEST_REQUIRES_X86_SSE41;
9589 for (size_t k = 1; k <= 40; k += 9) {
9590 GemmMicrokernelTester()
9591 .mr(4)
9592 .nr(4)
9593 .kr(2)
9594 .sr(1)
9595 .m(4)
9596 .n(4)
9597 .k(k)
9598 .b_zero_point(0)
9599 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9600 }
9601 }
9602
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_zero_point)9603 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
9604 TEST_REQUIRES_X86_SSE41;
9605 for (size_t k = 1; k <= 40; k += 9) {
9606 GemmMicrokernelTester()
9607 .mr(4)
9608 .nr(4)
9609 .kr(2)
9610 .sr(1)
9611 .m(4)
9612 .n(4)
9613 .k(k)
9614 .a_zero_point(0)
9615 .b_zero_point(0)
9616 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9617 }
9618 }
9619 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9620
9621
9622 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)9623 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
9624 TEST_REQUIRES_X86_XOP;
9625 GemmMicrokernelTester()
9626 .mr(1)
9627 .nr(4)
9628 .kr(2)
9629 .sr(1)
9630 .m(1)
9631 .n(4)
9632 .k(8)
9633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9634 }
9635
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)9636 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
9637 TEST_REQUIRES_X86_XOP;
9638 GemmMicrokernelTester()
9639 .mr(1)
9640 .nr(4)
9641 .kr(2)
9642 .sr(1)
9643 .m(1)
9644 .n(4)
9645 .k(8)
9646 .cn_stride(7)
9647 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9648 }
9649
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_strided_a)9650 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_strided_a) {
9651 TEST_REQUIRES_X86_XOP;
9652 GemmMicrokernelTester()
9653 .mr(1)
9654 .nr(4)
9655 .kr(2)
9656 .sr(1)
9657 .m(1)
9658 .n(4)
9659 .k(8)
9660 .a_stride(11)
9661 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9662 }
9663
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)9664 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
9665 TEST_REQUIRES_X86_XOP;
9666 for (uint32_t n = 1; n <= 4; n++) {
9667 for (uint32_t m = 1; m <= 1; m++) {
9668 GemmMicrokernelTester()
9669 .mr(1)
9670 .nr(4)
9671 .kr(2)
9672 .sr(1)
9673 .m(m)
9674 .n(n)
9675 .k(8)
9676 .iterations(1)
9677 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9678 }
9679 }
9680 }
9681
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)9682 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
9683 TEST_REQUIRES_X86_XOP;
9684 for (uint32_t m = 1; m <= 1; m++) {
9685 GemmMicrokernelTester()
9686 .mr(1)
9687 .nr(4)
9688 .kr(2)
9689 .sr(1)
9690 .m(m)
9691 .n(4)
9692 .k(8)
9693 .iterations(1)
9694 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9695 }
9696 }
9697
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)9698 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
9699 TEST_REQUIRES_X86_XOP;
9700 for (uint32_t n = 1; n <= 4; n++) {
9701 GemmMicrokernelTester()
9702 .mr(1)
9703 .nr(4)
9704 .kr(2)
9705 .sr(1)
9706 .m(1)
9707 .n(n)
9708 .k(8)
9709 .iterations(1)
9710 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9711 }
9712 }
9713
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)9714 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
9715 TEST_REQUIRES_X86_XOP;
9716 for (size_t k = 1; k < 8; k++) {
9717 GemmMicrokernelTester()
9718 .mr(1)
9719 .nr(4)
9720 .kr(2)
9721 .sr(1)
9722 .m(1)
9723 .n(4)
9724 .k(k)
9725 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9726 }
9727 }
9728
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_strided_a)9729 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_strided_a) {
9730 TEST_REQUIRES_X86_XOP;
9731 for (size_t k = 1; k < 8; k++) {
9732 GemmMicrokernelTester()
9733 .mr(1)
9734 .nr(4)
9735 .kr(2)
9736 .sr(1)
9737 .m(1)
9738 .n(4)
9739 .k(k)
9740 .a_stride(11)
9741 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9742 }
9743 }
9744
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)9745 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
9746 TEST_REQUIRES_X86_XOP;
9747 for (size_t k = 1; k < 8; k++) {
9748 for (uint32_t n = 1; n <= 4; n++) {
9749 for (uint32_t m = 1; m <= 1; m++) {
9750 GemmMicrokernelTester()
9751 .mr(1)
9752 .nr(4)
9753 .kr(2)
9754 .sr(1)
9755 .m(m)
9756 .n(n)
9757 .k(k)
9758 .iterations(1)
9759 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9760 }
9761 }
9762 }
9763 }
9764
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)9765 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
9766 TEST_REQUIRES_X86_XOP;
9767 for (size_t k = 9; k < 16; k++) {
9768 GemmMicrokernelTester()
9769 .mr(1)
9770 .nr(4)
9771 .kr(2)
9772 .sr(1)
9773 .m(1)
9774 .n(4)
9775 .k(k)
9776 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9777 }
9778 }
9779
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_strided_a)9780 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_strided_a) {
9781 TEST_REQUIRES_X86_XOP;
9782 for (size_t k = 9; k < 16; k++) {
9783 GemmMicrokernelTester()
9784 .mr(1)
9785 .nr(4)
9786 .kr(2)
9787 .sr(1)
9788 .m(1)
9789 .n(4)
9790 .k(k)
9791 .a_stride(19)
9792 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9793 }
9794 }
9795
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)9796 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
9797 TEST_REQUIRES_X86_XOP;
9798 for (size_t k = 9; k < 16; k++) {
9799 for (uint32_t n = 1; n <= 4; n++) {
9800 for (uint32_t m = 1; m <= 1; m++) {
9801 GemmMicrokernelTester()
9802 .mr(1)
9803 .nr(4)
9804 .kr(2)
9805 .sr(1)
9806 .m(m)
9807 .n(n)
9808 .k(k)
9809 .iterations(1)
9810 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9811 }
9812 }
9813 }
9814 }
9815
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)9816 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
9817 TEST_REQUIRES_X86_XOP;
9818 for (size_t k = 16; k <= 80; k += 8) {
9819 GemmMicrokernelTester()
9820 .mr(1)
9821 .nr(4)
9822 .kr(2)
9823 .sr(1)
9824 .m(1)
9825 .n(4)
9826 .k(k)
9827 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9828 }
9829 }
9830
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_strided_a)9831 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_strided_a) {
9832 TEST_REQUIRES_X86_XOP;
9833 for (size_t k = 16; k <= 80; k += 8) {
9834 GemmMicrokernelTester()
9835 .mr(1)
9836 .nr(4)
9837 .kr(2)
9838 .sr(1)
9839 .m(1)
9840 .n(4)
9841 .k(k)
9842 .a_stride(83)
9843 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9844 }
9845 }
9846
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)9847 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
9848 TEST_REQUIRES_X86_XOP;
9849 for (size_t k = 16; k <= 80; k += 8) {
9850 for (uint32_t n = 1; n <= 4; n++) {
9851 for (uint32_t m = 1; m <= 1; m++) {
9852 GemmMicrokernelTester()
9853 .mr(1)
9854 .nr(4)
9855 .kr(2)
9856 .sr(1)
9857 .m(m)
9858 .n(n)
9859 .k(k)
9860 .iterations(1)
9861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9862 }
9863 }
9864 }
9865 }
9866
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)9867 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
9868 TEST_REQUIRES_X86_XOP;
9869 for (uint32_t n = 5; n < 8; n++) {
9870 for (size_t k = 1; k <= 40; k += 9) {
9871 GemmMicrokernelTester()
9872 .mr(1)
9873 .nr(4)
9874 .kr(2)
9875 .sr(1)
9876 .m(1)
9877 .n(n)
9878 .k(k)
9879 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9880 }
9881 }
9882 }
9883
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)9884 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
9885 TEST_REQUIRES_X86_XOP;
9886 for (uint32_t n = 5; n < 8; n++) {
9887 for (size_t k = 1; k <= 40; k += 9) {
9888 GemmMicrokernelTester()
9889 .mr(1)
9890 .nr(4)
9891 .kr(2)
9892 .sr(1)
9893 .m(1)
9894 .n(n)
9895 .k(k)
9896 .cn_stride(7)
9897 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9898 }
9899 }
9900 }
9901
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_a)9902 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_a) {
9903 TEST_REQUIRES_X86_XOP;
9904 for (uint32_t n = 5; n < 8; n++) {
9905 for (size_t k = 1; k <= 40; k += 9) {
9906 GemmMicrokernelTester()
9907 .mr(1)
9908 .nr(4)
9909 .kr(2)
9910 .sr(1)
9911 .m(1)
9912 .n(n)
9913 .k(k)
9914 .a_stride(43)
9915 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9916 }
9917 }
9918 }
9919
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)9920 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
9921 TEST_REQUIRES_X86_XOP;
9922 for (uint32_t n = 5; n < 8; n++) {
9923 for (size_t k = 1; k <= 40; k += 9) {
9924 for (uint32_t m = 1; m <= 1; m++) {
9925 GemmMicrokernelTester()
9926 .mr(1)
9927 .nr(4)
9928 .kr(2)
9929 .sr(1)
9930 .m(m)
9931 .n(n)
9932 .k(k)
9933 .iterations(1)
9934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9935 }
9936 }
9937 }
9938 }
9939
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)9940 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
9941 TEST_REQUIRES_X86_XOP;
9942 for (uint32_t n = 8; n <= 12; n += 4) {
9943 for (size_t k = 1; k <= 40; k += 9) {
9944 GemmMicrokernelTester()
9945 .mr(1)
9946 .nr(4)
9947 .kr(2)
9948 .sr(1)
9949 .m(1)
9950 .n(n)
9951 .k(k)
9952 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9953 }
9954 }
9955 }
9956
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)9957 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
9958 TEST_REQUIRES_X86_XOP;
9959 for (uint32_t n = 8; n <= 12; n += 4) {
9960 for (size_t k = 1; k <= 40; k += 9) {
9961 GemmMicrokernelTester()
9962 .mr(1)
9963 .nr(4)
9964 .kr(2)
9965 .sr(1)
9966 .m(1)
9967 .n(n)
9968 .k(k)
9969 .cn_stride(7)
9970 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9971 }
9972 }
9973 }
9974
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_a)9975 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_a) {
9976 TEST_REQUIRES_X86_XOP;
9977 for (uint32_t n = 8; n <= 12; n += 4) {
9978 for (size_t k = 1; k <= 40; k += 9) {
9979 GemmMicrokernelTester()
9980 .mr(1)
9981 .nr(4)
9982 .kr(2)
9983 .sr(1)
9984 .m(1)
9985 .n(n)
9986 .k(k)
9987 .a_stride(43)
9988 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9989 }
9990 }
9991 }
9992
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)9993 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
9994 TEST_REQUIRES_X86_XOP;
9995 for (uint32_t n = 8; n <= 12; n += 4) {
9996 for (size_t k = 1; k <= 40; k += 9) {
9997 for (uint32_t m = 1; m <= 1; m++) {
9998 GemmMicrokernelTester()
9999 .mr(1)
10000 .nr(4)
10001 .kr(2)
10002 .sr(1)
10003 .m(m)
10004 .n(n)
10005 .k(k)
10006 .iterations(1)
10007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10008 }
10009 }
10010 }
10011 }
10012
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)10013 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
10014 TEST_REQUIRES_X86_XOP;
10015 for (size_t k = 1; k <= 40; k += 9) {
10016 for (uint32_t n = 1; n <= 4; n++) {
10017 for (uint32_t m = 1; m <= 1; m++) {
10018 GemmMicrokernelTester()
10019 .mr(1)
10020 .nr(4)
10021 .kr(2)
10022 .sr(1)
10023 .m(m)
10024 .n(n)
10025 .k(k)
10026 .cm_stride(7)
10027 .iterations(1)
10028 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10029 }
10030 }
10031 }
10032 }
10033
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)10034 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
10035 TEST_REQUIRES_X86_XOP;
10036 GemmMicrokernelTester()
10037 .mr(1)
10038 .nr(4)
10039 .kr(2)
10040 .sr(1)
10041 .m(1)
10042 .n(4)
10043 .k(8)
10044 .qmin(128)
10045 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10046 }
10047
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)10048 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
10049 TEST_REQUIRES_X86_XOP;
10050 GemmMicrokernelTester()
10051 .mr(1)
10052 .nr(4)
10053 .kr(2)
10054 .sr(1)
10055 .m(1)
10056 .n(4)
10057 .k(8)
10058 .qmax(128)
10059 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10060 }
10061
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)10062 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
10063 TEST_REQUIRES_X86_XOP;
10064 GemmMicrokernelTester()
10065 .mr(1)
10066 .nr(4)
10067 .kr(2)
10068 .sr(1)
10069 .m(1)
10070 .n(4)
10071 .k(8)
10072 .cm_stride(7)
10073 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10074 }
10075
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_a_zero_point)10076 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
10077 TEST_REQUIRES_X86_XOP;
10078 for (size_t k = 1; k <= 40; k += 9) {
10079 GemmMicrokernelTester()
10080 .mr(1)
10081 .nr(4)
10082 .kr(2)
10083 .sr(1)
10084 .m(1)
10085 .n(4)
10086 .k(k)
10087 .a_zero_point(0)
10088 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10089 }
10090 }
10091
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_b_zero_point)10092 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
10093 TEST_REQUIRES_X86_XOP;
10094 for (size_t k = 1; k <= 40; k += 9) {
10095 GemmMicrokernelTester()
10096 .mr(1)
10097 .nr(4)
10098 .kr(2)
10099 .sr(1)
10100 .m(1)
10101 .n(4)
10102 .k(k)
10103 .b_zero_point(0)
10104 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10105 }
10106 }
10107
TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_zero_point)10108 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
10109 TEST_REQUIRES_X86_XOP;
10110 for (size_t k = 1; k <= 40; k += 9) {
10111 GemmMicrokernelTester()
10112 .mr(1)
10113 .nr(4)
10114 .kr(2)
10115 .sr(1)
10116 .m(1)
10117 .n(4)
10118 .k(k)
10119 .a_zero_point(0)
10120 .b_zero_point(0)
10121 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10122 }
10123 }
10124 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10125
10126
10127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)10128 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
10129 TEST_REQUIRES_X86_XOP;
10130 GemmMicrokernelTester()
10131 .mr(2)
10132 .nr(4)
10133 .kr(2)
10134 .sr(1)
10135 .m(2)
10136 .n(4)
10137 .k(8)
10138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10139 }
10140
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)10141 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
10142 TEST_REQUIRES_X86_XOP;
10143 GemmMicrokernelTester()
10144 .mr(2)
10145 .nr(4)
10146 .kr(2)
10147 .sr(1)
10148 .m(2)
10149 .n(4)
10150 .k(8)
10151 .cn_stride(7)
10152 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10153 }
10154
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_strided_a)10155 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_strided_a) {
10156 TEST_REQUIRES_X86_XOP;
10157 GemmMicrokernelTester()
10158 .mr(2)
10159 .nr(4)
10160 .kr(2)
10161 .sr(1)
10162 .m(2)
10163 .n(4)
10164 .k(8)
10165 .a_stride(11)
10166 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10167 }
10168
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)10169 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
10170 TEST_REQUIRES_X86_XOP;
10171 for (uint32_t n = 1; n <= 4; n++) {
10172 for (uint32_t m = 1; m <= 2; m++) {
10173 GemmMicrokernelTester()
10174 .mr(2)
10175 .nr(4)
10176 .kr(2)
10177 .sr(1)
10178 .m(m)
10179 .n(n)
10180 .k(8)
10181 .iterations(1)
10182 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10183 }
10184 }
10185 }
10186
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)10187 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
10188 TEST_REQUIRES_X86_XOP;
10189 for (uint32_t m = 1; m <= 2; m++) {
10190 GemmMicrokernelTester()
10191 .mr(2)
10192 .nr(4)
10193 .kr(2)
10194 .sr(1)
10195 .m(m)
10196 .n(4)
10197 .k(8)
10198 .iterations(1)
10199 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10200 }
10201 }
10202
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)10203 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
10204 TEST_REQUIRES_X86_XOP;
10205 for (uint32_t n = 1; n <= 4; n++) {
10206 GemmMicrokernelTester()
10207 .mr(2)
10208 .nr(4)
10209 .kr(2)
10210 .sr(1)
10211 .m(2)
10212 .n(n)
10213 .k(8)
10214 .iterations(1)
10215 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10216 }
10217 }
10218
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)10219 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
10220 TEST_REQUIRES_X86_XOP;
10221 for (size_t k = 1; k < 8; k++) {
10222 GemmMicrokernelTester()
10223 .mr(2)
10224 .nr(4)
10225 .kr(2)
10226 .sr(1)
10227 .m(2)
10228 .n(4)
10229 .k(k)
10230 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10231 }
10232 }
10233
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_strided_a)10234 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_strided_a) {
10235 TEST_REQUIRES_X86_XOP;
10236 for (size_t k = 1; k < 8; k++) {
10237 GemmMicrokernelTester()
10238 .mr(2)
10239 .nr(4)
10240 .kr(2)
10241 .sr(1)
10242 .m(2)
10243 .n(4)
10244 .k(k)
10245 .a_stride(11)
10246 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10247 }
10248 }
10249
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)10250 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
10251 TEST_REQUIRES_X86_XOP;
10252 for (size_t k = 1; k < 8; k++) {
10253 for (uint32_t n = 1; n <= 4; n++) {
10254 for (uint32_t m = 1; m <= 2; m++) {
10255 GemmMicrokernelTester()
10256 .mr(2)
10257 .nr(4)
10258 .kr(2)
10259 .sr(1)
10260 .m(m)
10261 .n(n)
10262 .k(k)
10263 .iterations(1)
10264 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10265 }
10266 }
10267 }
10268 }
10269
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)10270 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
10271 TEST_REQUIRES_X86_XOP;
10272 for (size_t k = 9; k < 16; k++) {
10273 GemmMicrokernelTester()
10274 .mr(2)
10275 .nr(4)
10276 .kr(2)
10277 .sr(1)
10278 .m(2)
10279 .n(4)
10280 .k(k)
10281 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10282 }
10283 }
10284
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_strided_a)10285 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_strided_a) {
10286 TEST_REQUIRES_X86_XOP;
10287 for (size_t k = 9; k < 16; k++) {
10288 GemmMicrokernelTester()
10289 .mr(2)
10290 .nr(4)
10291 .kr(2)
10292 .sr(1)
10293 .m(2)
10294 .n(4)
10295 .k(k)
10296 .a_stride(19)
10297 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10298 }
10299 }
10300
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)10301 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
10302 TEST_REQUIRES_X86_XOP;
10303 for (size_t k = 9; k < 16; k++) {
10304 for (uint32_t n = 1; n <= 4; n++) {
10305 for (uint32_t m = 1; m <= 2; m++) {
10306 GemmMicrokernelTester()
10307 .mr(2)
10308 .nr(4)
10309 .kr(2)
10310 .sr(1)
10311 .m(m)
10312 .n(n)
10313 .k(k)
10314 .iterations(1)
10315 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10316 }
10317 }
10318 }
10319 }
10320
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)10321 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
10322 TEST_REQUIRES_X86_XOP;
10323 for (size_t k = 16; k <= 80; k += 8) {
10324 GemmMicrokernelTester()
10325 .mr(2)
10326 .nr(4)
10327 .kr(2)
10328 .sr(1)
10329 .m(2)
10330 .n(4)
10331 .k(k)
10332 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10333 }
10334 }
10335
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_strided_a)10336 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_strided_a) {
10337 TEST_REQUIRES_X86_XOP;
10338 for (size_t k = 16; k <= 80; k += 8) {
10339 GemmMicrokernelTester()
10340 .mr(2)
10341 .nr(4)
10342 .kr(2)
10343 .sr(1)
10344 .m(2)
10345 .n(4)
10346 .k(k)
10347 .a_stride(83)
10348 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10349 }
10350 }
10351
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)10352 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
10353 TEST_REQUIRES_X86_XOP;
10354 for (size_t k = 16; k <= 80; k += 8) {
10355 for (uint32_t n = 1; n <= 4; n++) {
10356 for (uint32_t m = 1; m <= 2; m++) {
10357 GemmMicrokernelTester()
10358 .mr(2)
10359 .nr(4)
10360 .kr(2)
10361 .sr(1)
10362 .m(m)
10363 .n(n)
10364 .k(k)
10365 .iterations(1)
10366 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10367 }
10368 }
10369 }
10370 }
10371
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)10372 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
10373 TEST_REQUIRES_X86_XOP;
10374 for (uint32_t n = 5; n < 8; n++) {
10375 for (size_t k = 1; k <= 40; k += 9) {
10376 GemmMicrokernelTester()
10377 .mr(2)
10378 .nr(4)
10379 .kr(2)
10380 .sr(1)
10381 .m(2)
10382 .n(n)
10383 .k(k)
10384 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10385 }
10386 }
10387 }
10388
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)10389 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
10390 TEST_REQUIRES_X86_XOP;
10391 for (uint32_t n = 5; n < 8; n++) {
10392 for (size_t k = 1; k <= 40; k += 9) {
10393 GemmMicrokernelTester()
10394 .mr(2)
10395 .nr(4)
10396 .kr(2)
10397 .sr(1)
10398 .m(2)
10399 .n(n)
10400 .k(k)
10401 .cn_stride(7)
10402 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10403 }
10404 }
10405 }
10406
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_a)10407 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_a) {
10408 TEST_REQUIRES_X86_XOP;
10409 for (uint32_t n = 5; n < 8; n++) {
10410 for (size_t k = 1; k <= 40; k += 9) {
10411 GemmMicrokernelTester()
10412 .mr(2)
10413 .nr(4)
10414 .kr(2)
10415 .sr(1)
10416 .m(2)
10417 .n(n)
10418 .k(k)
10419 .a_stride(43)
10420 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10421 }
10422 }
10423 }
10424
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)10425 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
10426 TEST_REQUIRES_X86_XOP;
10427 for (uint32_t n = 5; n < 8; n++) {
10428 for (size_t k = 1; k <= 40; k += 9) {
10429 for (uint32_t m = 1; m <= 2; m++) {
10430 GemmMicrokernelTester()
10431 .mr(2)
10432 .nr(4)
10433 .kr(2)
10434 .sr(1)
10435 .m(m)
10436 .n(n)
10437 .k(k)
10438 .iterations(1)
10439 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10440 }
10441 }
10442 }
10443 }
10444
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)10445 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
10446 TEST_REQUIRES_X86_XOP;
10447 for (uint32_t n = 8; n <= 12; n += 4) {
10448 for (size_t k = 1; k <= 40; k += 9) {
10449 GemmMicrokernelTester()
10450 .mr(2)
10451 .nr(4)
10452 .kr(2)
10453 .sr(1)
10454 .m(2)
10455 .n(n)
10456 .k(k)
10457 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10458 }
10459 }
10460 }
10461
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)10462 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
10463 TEST_REQUIRES_X86_XOP;
10464 for (uint32_t n = 8; n <= 12; n += 4) {
10465 for (size_t k = 1; k <= 40; k += 9) {
10466 GemmMicrokernelTester()
10467 .mr(2)
10468 .nr(4)
10469 .kr(2)
10470 .sr(1)
10471 .m(2)
10472 .n(n)
10473 .k(k)
10474 .cn_stride(7)
10475 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10476 }
10477 }
10478 }
10479
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_a)10480 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_a) {
10481 TEST_REQUIRES_X86_XOP;
10482 for (uint32_t n = 8; n <= 12; n += 4) {
10483 for (size_t k = 1; k <= 40; k += 9) {
10484 GemmMicrokernelTester()
10485 .mr(2)
10486 .nr(4)
10487 .kr(2)
10488 .sr(1)
10489 .m(2)
10490 .n(n)
10491 .k(k)
10492 .a_stride(43)
10493 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10494 }
10495 }
10496 }
10497
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)10498 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
10499 TEST_REQUIRES_X86_XOP;
10500 for (uint32_t n = 8; n <= 12; n += 4) {
10501 for (size_t k = 1; k <= 40; k += 9) {
10502 for (uint32_t m = 1; m <= 2; m++) {
10503 GemmMicrokernelTester()
10504 .mr(2)
10505 .nr(4)
10506 .kr(2)
10507 .sr(1)
10508 .m(m)
10509 .n(n)
10510 .k(k)
10511 .iterations(1)
10512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10513 }
10514 }
10515 }
10516 }
10517
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)10518 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
10519 TEST_REQUIRES_X86_XOP;
10520 for (size_t k = 1; k <= 40; k += 9) {
10521 for (uint32_t n = 1; n <= 4; n++) {
10522 for (uint32_t m = 1; m <= 2; m++) {
10523 GemmMicrokernelTester()
10524 .mr(2)
10525 .nr(4)
10526 .kr(2)
10527 .sr(1)
10528 .m(m)
10529 .n(n)
10530 .k(k)
10531 .cm_stride(7)
10532 .iterations(1)
10533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10534 }
10535 }
10536 }
10537 }
10538
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)10539 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
10540 TEST_REQUIRES_X86_XOP;
10541 GemmMicrokernelTester()
10542 .mr(2)
10543 .nr(4)
10544 .kr(2)
10545 .sr(1)
10546 .m(2)
10547 .n(4)
10548 .k(8)
10549 .qmin(128)
10550 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10551 }
10552
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)10553 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
10554 TEST_REQUIRES_X86_XOP;
10555 GemmMicrokernelTester()
10556 .mr(2)
10557 .nr(4)
10558 .kr(2)
10559 .sr(1)
10560 .m(2)
10561 .n(4)
10562 .k(8)
10563 .qmax(128)
10564 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10565 }
10566
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)10567 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
10568 TEST_REQUIRES_X86_XOP;
10569 GemmMicrokernelTester()
10570 .mr(2)
10571 .nr(4)
10572 .kr(2)
10573 .sr(1)
10574 .m(2)
10575 .n(4)
10576 .k(8)
10577 .cm_stride(7)
10578 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10579 }
10580
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_a_zero_point)10581 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
10582 TEST_REQUIRES_X86_XOP;
10583 for (size_t k = 1; k <= 40; k += 9) {
10584 GemmMicrokernelTester()
10585 .mr(2)
10586 .nr(4)
10587 .kr(2)
10588 .sr(1)
10589 .m(2)
10590 .n(4)
10591 .k(k)
10592 .a_zero_point(0)
10593 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10594 }
10595 }
10596
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_b_zero_point)10597 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
10598 TEST_REQUIRES_X86_XOP;
10599 for (size_t k = 1; k <= 40; k += 9) {
10600 GemmMicrokernelTester()
10601 .mr(2)
10602 .nr(4)
10603 .kr(2)
10604 .sr(1)
10605 .m(2)
10606 .n(4)
10607 .k(k)
10608 .b_zero_point(0)
10609 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10610 }
10611 }
10612
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_zero_point)10613 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
10614 TEST_REQUIRES_X86_XOP;
10615 for (size_t k = 1; k <= 40; k += 9) {
10616 GemmMicrokernelTester()
10617 .mr(2)
10618 .nr(4)
10619 .kr(2)
10620 .sr(1)
10621 .m(2)
10622 .n(4)
10623 .k(k)
10624 .a_zero_point(0)
10625 .b_zero_point(0)
10626 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10627 }
10628 }
10629 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10630
10631
10632 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)10633 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
10634 TEST_REQUIRES_X86_AVX;
10635 GemmMicrokernelTester()
10636 .mr(3)
10637 .nr(4)
10638 .kr(2)
10639 .sr(1)
10640 .m(3)
10641 .n(4)
10642 .k(8)
10643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10644 }
10645
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)10646 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
10647 TEST_REQUIRES_X86_AVX;
10648 GemmMicrokernelTester()
10649 .mr(3)
10650 .nr(4)
10651 .kr(2)
10652 .sr(1)
10653 .m(3)
10654 .n(4)
10655 .k(8)
10656 .cn_stride(7)
10657 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10658 }
10659
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_strided_a)10660 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_strided_a) {
10661 TEST_REQUIRES_X86_AVX;
10662 GemmMicrokernelTester()
10663 .mr(3)
10664 .nr(4)
10665 .kr(2)
10666 .sr(1)
10667 .m(3)
10668 .n(4)
10669 .k(8)
10670 .a_stride(11)
10671 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10672 }
10673
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)10674 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
10675 TEST_REQUIRES_X86_AVX;
10676 for (uint32_t n = 1; n <= 4; n++) {
10677 for (uint32_t m = 1; m <= 3; m++) {
10678 GemmMicrokernelTester()
10679 .mr(3)
10680 .nr(4)
10681 .kr(2)
10682 .sr(1)
10683 .m(m)
10684 .n(n)
10685 .k(8)
10686 .iterations(1)
10687 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10688 }
10689 }
10690 }
10691
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)10692 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
10693 TEST_REQUIRES_X86_AVX;
10694 for (uint32_t m = 1; m <= 3; m++) {
10695 GemmMicrokernelTester()
10696 .mr(3)
10697 .nr(4)
10698 .kr(2)
10699 .sr(1)
10700 .m(m)
10701 .n(4)
10702 .k(8)
10703 .iterations(1)
10704 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10705 }
10706 }
10707
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)10708 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
10709 TEST_REQUIRES_X86_AVX;
10710 for (uint32_t n = 1; n <= 4; n++) {
10711 GemmMicrokernelTester()
10712 .mr(3)
10713 .nr(4)
10714 .kr(2)
10715 .sr(1)
10716 .m(3)
10717 .n(n)
10718 .k(8)
10719 .iterations(1)
10720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10721 }
10722 }
10723
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)10724 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
10725 TEST_REQUIRES_X86_AVX;
10726 for (size_t k = 1; k < 8; k++) {
10727 GemmMicrokernelTester()
10728 .mr(3)
10729 .nr(4)
10730 .kr(2)
10731 .sr(1)
10732 .m(3)
10733 .n(4)
10734 .k(k)
10735 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10736 }
10737 }
10738
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_strided_a)10739 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_strided_a) {
10740 TEST_REQUIRES_X86_AVX;
10741 for (size_t k = 1; k < 8; k++) {
10742 GemmMicrokernelTester()
10743 .mr(3)
10744 .nr(4)
10745 .kr(2)
10746 .sr(1)
10747 .m(3)
10748 .n(4)
10749 .k(k)
10750 .a_stride(11)
10751 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10752 }
10753 }
10754
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)10755 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
10756 TEST_REQUIRES_X86_AVX;
10757 for (size_t k = 1; k < 8; k++) {
10758 for (uint32_t n = 1; n <= 4; n++) {
10759 for (uint32_t m = 1; m <= 3; m++) {
10760 GemmMicrokernelTester()
10761 .mr(3)
10762 .nr(4)
10763 .kr(2)
10764 .sr(1)
10765 .m(m)
10766 .n(n)
10767 .k(k)
10768 .iterations(1)
10769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10770 }
10771 }
10772 }
10773 }
10774
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)10775 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
10776 TEST_REQUIRES_X86_AVX;
10777 for (size_t k = 9; k < 16; k++) {
10778 GemmMicrokernelTester()
10779 .mr(3)
10780 .nr(4)
10781 .kr(2)
10782 .sr(1)
10783 .m(3)
10784 .n(4)
10785 .k(k)
10786 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10787 }
10788 }
10789
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_strided_a)10790 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_strided_a) {
10791 TEST_REQUIRES_X86_AVX;
10792 for (size_t k = 9; k < 16; k++) {
10793 GemmMicrokernelTester()
10794 .mr(3)
10795 .nr(4)
10796 .kr(2)
10797 .sr(1)
10798 .m(3)
10799 .n(4)
10800 .k(k)
10801 .a_stride(19)
10802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10803 }
10804 }
10805
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)10806 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
10807 TEST_REQUIRES_X86_AVX;
10808 for (size_t k = 9; k < 16; k++) {
10809 for (uint32_t n = 1; n <= 4; n++) {
10810 for (uint32_t m = 1; m <= 3; m++) {
10811 GemmMicrokernelTester()
10812 .mr(3)
10813 .nr(4)
10814 .kr(2)
10815 .sr(1)
10816 .m(m)
10817 .n(n)
10818 .k(k)
10819 .iterations(1)
10820 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10821 }
10822 }
10823 }
10824 }
10825
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)10826 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
10827 TEST_REQUIRES_X86_AVX;
10828 for (size_t k = 16; k <= 80; k += 8) {
10829 GemmMicrokernelTester()
10830 .mr(3)
10831 .nr(4)
10832 .kr(2)
10833 .sr(1)
10834 .m(3)
10835 .n(4)
10836 .k(k)
10837 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10838 }
10839 }
10840
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_strided_a)10841 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_strided_a) {
10842 TEST_REQUIRES_X86_AVX;
10843 for (size_t k = 16; k <= 80; k += 8) {
10844 GemmMicrokernelTester()
10845 .mr(3)
10846 .nr(4)
10847 .kr(2)
10848 .sr(1)
10849 .m(3)
10850 .n(4)
10851 .k(k)
10852 .a_stride(83)
10853 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10854 }
10855 }
10856
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)10857 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
10858 TEST_REQUIRES_X86_AVX;
10859 for (size_t k = 16; k <= 80; k += 8) {
10860 for (uint32_t n = 1; n <= 4; n++) {
10861 for (uint32_t m = 1; m <= 3; m++) {
10862 GemmMicrokernelTester()
10863 .mr(3)
10864 .nr(4)
10865 .kr(2)
10866 .sr(1)
10867 .m(m)
10868 .n(n)
10869 .k(k)
10870 .iterations(1)
10871 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10872 }
10873 }
10874 }
10875 }
10876
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)10877 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
10878 TEST_REQUIRES_X86_AVX;
10879 for (uint32_t n = 5; n < 8; n++) {
10880 for (size_t k = 1; k <= 40; k += 9) {
10881 GemmMicrokernelTester()
10882 .mr(3)
10883 .nr(4)
10884 .kr(2)
10885 .sr(1)
10886 .m(3)
10887 .n(n)
10888 .k(k)
10889 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10890 }
10891 }
10892 }
10893
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)10894 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
10895 TEST_REQUIRES_X86_AVX;
10896 for (uint32_t n = 5; n < 8; n++) {
10897 for (size_t k = 1; k <= 40; k += 9) {
10898 GemmMicrokernelTester()
10899 .mr(3)
10900 .nr(4)
10901 .kr(2)
10902 .sr(1)
10903 .m(3)
10904 .n(n)
10905 .k(k)
10906 .cn_stride(7)
10907 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10908 }
10909 }
10910 }
10911
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_a)10912 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_a) {
10913 TEST_REQUIRES_X86_AVX;
10914 for (uint32_t n = 5; n < 8; n++) {
10915 for (size_t k = 1; k <= 40; k += 9) {
10916 GemmMicrokernelTester()
10917 .mr(3)
10918 .nr(4)
10919 .kr(2)
10920 .sr(1)
10921 .m(3)
10922 .n(n)
10923 .k(k)
10924 .a_stride(43)
10925 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10926 }
10927 }
10928 }
10929
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)10930 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
10931 TEST_REQUIRES_X86_AVX;
10932 for (uint32_t n = 5; n < 8; n++) {
10933 for (size_t k = 1; k <= 40; k += 9) {
10934 for (uint32_t m = 1; m <= 3; m++) {
10935 GemmMicrokernelTester()
10936 .mr(3)
10937 .nr(4)
10938 .kr(2)
10939 .sr(1)
10940 .m(m)
10941 .n(n)
10942 .k(k)
10943 .iterations(1)
10944 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10945 }
10946 }
10947 }
10948 }
10949
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)10950 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
10951 TEST_REQUIRES_X86_AVX;
10952 for (uint32_t n = 8; n <= 12; n += 4) {
10953 for (size_t k = 1; k <= 40; k += 9) {
10954 GemmMicrokernelTester()
10955 .mr(3)
10956 .nr(4)
10957 .kr(2)
10958 .sr(1)
10959 .m(3)
10960 .n(n)
10961 .k(k)
10962 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10963 }
10964 }
10965 }
10966
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)10967 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
10968 TEST_REQUIRES_X86_AVX;
10969 for (uint32_t n = 8; n <= 12; n += 4) {
10970 for (size_t k = 1; k <= 40; k += 9) {
10971 GemmMicrokernelTester()
10972 .mr(3)
10973 .nr(4)
10974 .kr(2)
10975 .sr(1)
10976 .m(3)
10977 .n(n)
10978 .k(k)
10979 .cn_stride(7)
10980 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10981 }
10982 }
10983 }
10984
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_a)10985 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_a) {
10986 TEST_REQUIRES_X86_AVX;
10987 for (uint32_t n = 8; n <= 12; n += 4) {
10988 for (size_t k = 1; k <= 40; k += 9) {
10989 GemmMicrokernelTester()
10990 .mr(3)
10991 .nr(4)
10992 .kr(2)
10993 .sr(1)
10994 .m(3)
10995 .n(n)
10996 .k(k)
10997 .a_stride(43)
10998 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10999 }
11000 }
11001 }
11002
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)11003 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
11004 TEST_REQUIRES_X86_AVX;
11005 for (uint32_t n = 8; n <= 12; n += 4) {
11006 for (size_t k = 1; k <= 40; k += 9) {
11007 for (uint32_t m = 1; m <= 3; m++) {
11008 GemmMicrokernelTester()
11009 .mr(3)
11010 .nr(4)
11011 .kr(2)
11012 .sr(1)
11013 .m(m)
11014 .n(n)
11015 .k(k)
11016 .iterations(1)
11017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11018 }
11019 }
11020 }
11021 }
11022
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)11023 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
11024 TEST_REQUIRES_X86_AVX;
11025 for (size_t k = 1; k <= 40; k += 9) {
11026 for (uint32_t n = 1; n <= 4; n++) {
11027 for (uint32_t m = 1; m <= 3; m++) {
11028 GemmMicrokernelTester()
11029 .mr(3)
11030 .nr(4)
11031 .kr(2)
11032 .sr(1)
11033 .m(m)
11034 .n(n)
11035 .k(k)
11036 .cm_stride(7)
11037 .iterations(1)
11038 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11039 }
11040 }
11041 }
11042 }
11043
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)11044 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
11045 TEST_REQUIRES_X86_AVX;
11046 GemmMicrokernelTester()
11047 .mr(3)
11048 .nr(4)
11049 .kr(2)
11050 .sr(1)
11051 .m(3)
11052 .n(4)
11053 .k(8)
11054 .qmin(128)
11055 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11056 }
11057
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)11058 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
11059 TEST_REQUIRES_X86_AVX;
11060 GemmMicrokernelTester()
11061 .mr(3)
11062 .nr(4)
11063 .kr(2)
11064 .sr(1)
11065 .m(3)
11066 .n(4)
11067 .k(8)
11068 .qmax(128)
11069 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11070 }
11071
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)11072 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
11073 TEST_REQUIRES_X86_AVX;
11074 GemmMicrokernelTester()
11075 .mr(3)
11076 .nr(4)
11077 .kr(2)
11078 .sr(1)
11079 .m(3)
11080 .n(4)
11081 .k(8)
11082 .cm_stride(7)
11083 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11084 }
11085
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_a_zero_point)11086 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
11087 TEST_REQUIRES_X86_AVX;
11088 for (size_t k = 1; k <= 40; k += 9) {
11089 GemmMicrokernelTester()
11090 .mr(3)
11091 .nr(4)
11092 .kr(2)
11093 .sr(1)
11094 .m(3)
11095 .n(4)
11096 .k(k)
11097 .a_zero_point(0)
11098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11099 }
11100 }
11101
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_b_zero_point)11102 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
11103 TEST_REQUIRES_X86_AVX;
11104 for (size_t k = 1; k <= 40; k += 9) {
11105 GemmMicrokernelTester()
11106 .mr(3)
11107 .nr(4)
11108 .kr(2)
11109 .sr(1)
11110 .m(3)
11111 .n(4)
11112 .k(k)
11113 .b_zero_point(0)
11114 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11115 }
11116 }
11117
TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_zero_point)11118 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
11119 TEST_REQUIRES_X86_AVX;
11120 for (size_t k = 1; k <= 40; k += 9) {
11121 GemmMicrokernelTester()
11122 .mr(3)
11123 .nr(4)
11124 .kr(2)
11125 .sr(1)
11126 .m(3)
11127 .n(4)
11128 .k(k)
11129 .a_zero_point(0)
11130 .b_zero_point(0)
11131 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11132 }
11133 }
11134 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11135
11136
11137 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)11138 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
11139 TEST_REQUIRES_X86_XOP;
11140 GemmMicrokernelTester()
11141 .mr(4)
11142 .nr(4)
11143 .kr(2)
11144 .sr(1)
11145 .m(4)
11146 .n(4)
11147 .k(8)
11148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11149 }
11150
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)11151 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
11152 TEST_REQUIRES_X86_XOP;
11153 GemmMicrokernelTester()
11154 .mr(4)
11155 .nr(4)
11156 .kr(2)
11157 .sr(1)
11158 .m(4)
11159 .n(4)
11160 .k(8)
11161 .cn_stride(7)
11162 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11163 }
11164
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_strided_a)11165 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_strided_a) {
11166 TEST_REQUIRES_X86_XOP;
11167 GemmMicrokernelTester()
11168 .mr(4)
11169 .nr(4)
11170 .kr(2)
11171 .sr(1)
11172 .m(4)
11173 .n(4)
11174 .k(8)
11175 .a_stride(11)
11176 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11177 }
11178
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)11179 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
11180 TEST_REQUIRES_X86_XOP;
11181 for (uint32_t n = 1; n <= 4; n++) {
11182 for (uint32_t m = 1; m <= 4; m++) {
11183 GemmMicrokernelTester()
11184 .mr(4)
11185 .nr(4)
11186 .kr(2)
11187 .sr(1)
11188 .m(m)
11189 .n(n)
11190 .k(8)
11191 .iterations(1)
11192 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11193 }
11194 }
11195 }
11196
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)11197 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
11198 TEST_REQUIRES_X86_XOP;
11199 for (uint32_t m = 1; m <= 4; m++) {
11200 GemmMicrokernelTester()
11201 .mr(4)
11202 .nr(4)
11203 .kr(2)
11204 .sr(1)
11205 .m(m)
11206 .n(4)
11207 .k(8)
11208 .iterations(1)
11209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11210 }
11211 }
11212
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)11213 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
11214 TEST_REQUIRES_X86_XOP;
11215 for (uint32_t n = 1; n <= 4; n++) {
11216 GemmMicrokernelTester()
11217 .mr(4)
11218 .nr(4)
11219 .kr(2)
11220 .sr(1)
11221 .m(4)
11222 .n(n)
11223 .k(8)
11224 .iterations(1)
11225 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11226 }
11227 }
11228
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)11229 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
11230 TEST_REQUIRES_X86_XOP;
11231 for (size_t k = 1; k < 8; k++) {
11232 GemmMicrokernelTester()
11233 .mr(4)
11234 .nr(4)
11235 .kr(2)
11236 .sr(1)
11237 .m(4)
11238 .n(4)
11239 .k(k)
11240 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11241 }
11242 }
11243
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_strided_a)11244 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_strided_a) {
11245 TEST_REQUIRES_X86_XOP;
11246 for (size_t k = 1; k < 8; k++) {
11247 GemmMicrokernelTester()
11248 .mr(4)
11249 .nr(4)
11250 .kr(2)
11251 .sr(1)
11252 .m(4)
11253 .n(4)
11254 .k(k)
11255 .a_stride(11)
11256 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11257 }
11258 }
11259
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)11260 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
11261 TEST_REQUIRES_X86_XOP;
11262 for (size_t k = 1; k < 8; k++) {
11263 for (uint32_t n = 1; n <= 4; n++) {
11264 for (uint32_t m = 1; m <= 4; m++) {
11265 GemmMicrokernelTester()
11266 .mr(4)
11267 .nr(4)
11268 .kr(2)
11269 .sr(1)
11270 .m(m)
11271 .n(n)
11272 .k(k)
11273 .iterations(1)
11274 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11275 }
11276 }
11277 }
11278 }
11279
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)11280 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
11281 TEST_REQUIRES_X86_XOP;
11282 for (size_t k = 9; k < 16; k++) {
11283 GemmMicrokernelTester()
11284 .mr(4)
11285 .nr(4)
11286 .kr(2)
11287 .sr(1)
11288 .m(4)
11289 .n(4)
11290 .k(k)
11291 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11292 }
11293 }
11294
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_strided_a)11295 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_strided_a) {
11296 TEST_REQUIRES_X86_XOP;
11297 for (size_t k = 9; k < 16; k++) {
11298 GemmMicrokernelTester()
11299 .mr(4)
11300 .nr(4)
11301 .kr(2)
11302 .sr(1)
11303 .m(4)
11304 .n(4)
11305 .k(k)
11306 .a_stride(19)
11307 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11308 }
11309 }
11310
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)11311 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
11312 TEST_REQUIRES_X86_XOP;
11313 for (size_t k = 9; k < 16; k++) {
11314 for (uint32_t n = 1; n <= 4; n++) {
11315 for (uint32_t m = 1; m <= 4; m++) {
11316 GemmMicrokernelTester()
11317 .mr(4)
11318 .nr(4)
11319 .kr(2)
11320 .sr(1)
11321 .m(m)
11322 .n(n)
11323 .k(k)
11324 .iterations(1)
11325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11326 }
11327 }
11328 }
11329 }
11330
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)11331 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
11332 TEST_REQUIRES_X86_XOP;
11333 for (size_t k = 16; k <= 80; k += 8) {
11334 GemmMicrokernelTester()
11335 .mr(4)
11336 .nr(4)
11337 .kr(2)
11338 .sr(1)
11339 .m(4)
11340 .n(4)
11341 .k(k)
11342 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11343 }
11344 }
11345
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_strided_a)11346 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_strided_a) {
11347 TEST_REQUIRES_X86_XOP;
11348 for (size_t k = 16; k <= 80; k += 8) {
11349 GemmMicrokernelTester()
11350 .mr(4)
11351 .nr(4)
11352 .kr(2)
11353 .sr(1)
11354 .m(4)
11355 .n(4)
11356 .k(k)
11357 .a_stride(83)
11358 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11359 }
11360 }
11361
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)11362 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
11363 TEST_REQUIRES_X86_XOP;
11364 for (size_t k = 16; k <= 80; k += 8) {
11365 for (uint32_t n = 1; n <= 4; n++) {
11366 for (uint32_t m = 1; m <= 4; m++) {
11367 GemmMicrokernelTester()
11368 .mr(4)
11369 .nr(4)
11370 .kr(2)
11371 .sr(1)
11372 .m(m)
11373 .n(n)
11374 .k(k)
11375 .iterations(1)
11376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11377 }
11378 }
11379 }
11380 }
11381
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)11382 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
11383 TEST_REQUIRES_X86_XOP;
11384 for (uint32_t n = 5; n < 8; n++) {
11385 for (size_t k = 1; k <= 40; k += 9) {
11386 GemmMicrokernelTester()
11387 .mr(4)
11388 .nr(4)
11389 .kr(2)
11390 .sr(1)
11391 .m(4)
11392 .n(n)
11393 .k(k)
11394 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11395 }
11396 }
11397 }
11398
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)11399 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
11400 TEST_REQUIRES_X86_XOP;
11401 for (uint32_t n = 5; n < 8; n++) {
11402 for (size_t k = 1; k <= 40; k += 9) {
11403 GemmMicrokernelTester()
11404 .mr(4)
11405 .nr(4)
11406 .kr(2)
11407 .sr(1)
11408 .m(4)
11409 .n(n)
11410 .k(k)
11411 .cn_stride(7)
11412 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11413 }
11414 }
11415 }
11416
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_a)11417 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_a) {
11418 TEST_REQUIRES_X86_XOP;
11419 for (uint32_t n = 5; n < 8; n++) {
11420 for (size_t k = 1; k <= 40; k += 9) {
11421 GemmMicrokernelTester()
11422 .mr(4)
11423 .nr(4)
11424 .kr(2)
11425 .sr(1)
11426 .m(4)
11427 .n(n)
11428 .k(k)
11429 .a_stride(43)
11430 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11431 }
11432 }
11433 }
11434
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)11435 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
11436 TEST_REQUIRES_X86_XOP;
11437 for (uint32_t n = 5; n < 8; n++) {
11438 for (size_t k = 1; k <= 40; k += 9) {
11439 for (uint32_t m = 1; m <= 4; m++) {
11440 GemmMicrokernelTester()
11441 .mr(4)
11442 .nr(4)
11443 .kr(2)
11444 .sr(1)
11445 .m(m)
11446 .n(n)
11447 .k(k)
11448 .iterations(1)
11449 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11450 }
11451 }
11452 }
11453 }
11454
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)11455 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
11456 TEST_REQUIRES_X86_XOP;
11457 for (uint32_t n = 8; n <= 12; n += 4) {
11458 for (size_t k = 1; k <= 40; k += 9) {
11459 GemmMicrokernelTester()
11460 .mr(4)
11461 .nr(4)
11462 .kr(2)
11463 .sr(1)
11464 .m(4)
11465 .n(n)
11466 .k(k)
11467 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11468 }
11469 }
11470 }
11471
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)11472 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
11473 TEST_REQUIRES_X86_XOP;
11474 for (uint32_t n = 8; n <= 12; n += 4) {
11475 for (size_t k = 1; k <= 40; k += 9) {
11476 GemmMicrokernelTester()
11477 .mr(4)
11478 .nr(4)
11479 .kr(2)
11480 .sr(1)
11481 .m(4)
11482 .n(n)
11483 .k(k)
11484 .cn_stride(7)
11485 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11486 }
11487 }
11488 }
11489
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_a)11490 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_a) {
11491 TEST_REQUIRES_X86_XOP;
11492 for (uint32_t n = 8; n <= 12; n += 4) {
11493 for (size_t k = 1; k <= 40; k += 9) {
11494 GemmMicrokernelTester()
11495 .mr(4)
11496 .nr(4)
11497 .kr(2)
11498 .sr(1)
11499 .m(4)
11500 .n(n)
11501 .k(k)
11502 .a_stride(43)
11503 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11504 }
11505 }
11506 }
11507
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)11508 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
11509 TEST_REQUIRES_X86_XOP;
11510 for (uint32_t n = 8; n <= 12; n += 4) {
11511 for (size_t k = 1; k <= 40; k += 9) {
11512 for (uint32_t m = 1; m <= 4; m++) {
11513 GemmMicrokernelTester()
11514 .mr(4)
11515 .nr(4)
11516 .kr(2)
11517 .sr(1)
11518 .m(m)
11519 .n(n)
11520 .k(k)
11521 .iterations(1)
11522 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11523 }
11524 }
11525 }
11526 }
11527
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)11528 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
11529 TEST_REQUIRES_X86_XOP;
11530 for (size_t k = 1; k <= 40; k += 9) {
11531 for (uint32_t n = 1; n <= 4; n++) {
11532 for (uint32_t m = 1; m <= 4; m++) {
11533 GemmMicrokernelTester()
11534 .mr(4)
11535 .nr(4)
11536 .kr(2)
11537 .sr(1)
11538 .m(m)
11539 .n(n)
11540 .k(k)
11541 .cm_stride(7)
11542 .iterations(1)
11543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11544 }
11545 }
11546 }
11547 }
11548
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)11549 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
11550 TEST_REQUIRES_X86_XOP;
11551 GemmMicrokernelTester()
11552 .mr(4)
11553 .nr(4)
11554 .kr(2)
11555 .sr(1)
11556 .m(4)
11557 .n(4)
11558 .k(8)
11559 .qmin(128)
11560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11561 }
11562
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)11563 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
11564 TEST_REQUIRES_X86_XOP;
11565 GemmMicrokernelTester()
11566 .mr(4)
11567 .nr(4)
11568 .kr(2)
11569 .sr(1)
11570 .m(4)
11571 .n(4)
11572 .k(8)
11573 .qmax(128)
11574 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11575 }
11576
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)11577 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
11578 TEST_REQUIRES_X86_XOP;
11579 GemmMicrokernelTester()
11580 .mr(4)
11581 .nr(4)
11582 .kr(2)
11583 .sr(1)
11584 .m(4)
11585 .n(4)
11586 .k(8)
11587 .cm_stride(7)
11588 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11589 }
11590
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_a_zero_point)11591 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
11592 TEST_REQUIRES_X86_XOP;
11593 for (size_t k = 1; k <= 40; k += 9) {
11594 GemmMicrokernelTester()
11595 .mr(4)
11596 .nr(4)
11597 .kr(2)
11598 .sr(1)
11599 .m(4)
11600 .n(4)
11601 .k(k)
11602 .a_zero_point(0)
11603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11604 }
11605 }
11606
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_b_zero_point)11607 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
11608 TEST_REQUIRES_X86_XOP;
11609 for (size_t k = 1; k <= 40; k += 9) {
11610 GemmMicrokernelTester()
11611 .mr(4)
11612 .nr(4)
11613 .kr(2)
11614 .sr(1)
11615 .m(4)
11616 .n(4)
11617 .k(k)
11618 .b_zero_point(0)
11619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11620 }
11621 }
11622
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_zero_point)11623 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
11624 TEST_REQUIRES_X86_XOP;
11625 for (size_t k = 1; k <= 40; k += 9) {
11626 GemmMicrokernelTester()
11627 .mr(4)
11628 .nr(4)
11629 .kr(2)
11630 .sr(1)
11631 .m(4)
11632 .n(4)
11633 .k(k)
11634 .a_zero_point(0)
11635 .b_zero_point(0)
11636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11637 }
11638 }
11639 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11640
11641
11642 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)11643 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
11644 TEST_REQUIRES_X86_SSE41;
11645 GemmMicrokernelTester()
11646 .mr(2)
11647 .nr(4)
11648 .kr(2)
11649 .sr(4)
11650 .m(2)
11651 .n(4)
11652 .k(8)
11653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11654 }
11655
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)11656 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
11657 TEST_REQUIRES_X86_SSE41;
11658 GemmMicrokernelTester()
11659 .mr(2)
11660 .nr(4)
11661 .kr(2)
11662 .sr(4)
11663 .m(2)
11664 .n(4)
11665 .k(8)
11666 .cn_stride(7)
11667 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11668 }
11669
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_strided_a)11670 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
11671 TEST_REQUIRES_X86_SSE41;
11672 GemmMicrokernelTester()
11673 .mr(2)
11674 .nr(4)
11675 .kr(2)
11676 .sr(4)
11677 .m(2)
11678 .n(4)
11679 .k(8)
11680 .a_stride(11)
11681 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11682 }
11683
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)11684 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
11685 TEST_REQUIRES_X86_SSE41;
11686 for (uint32_t n = 1; n <= 4; n++) {
11687 for (uint32_t m = 1; m <= 2; m++) {
11688 GemmMicrokernelTester()
11689 .mr(2)
11690 .nr(4)
11691 .kr(2)
11692 .sr(4)
11693 .m(m)
11694 .n(n)
11695 .k(8)
11696 .iterations(1)
11697 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11698 }
11699 }
11700 }
11701
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)11702 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
11703 TEST_REQUIRES_X86_SSE41;
11704 for (uint32_t m = 1; m <= 2; m++) {
11705 GemmMicrokernelTester()
11706 .mr(2)
11707 .nr(4)
11708 .kr(2)
11709 .sr(4)
11710 .m(m)
11711 .n(4)
11712 .k(8)
11713 .iterations(1)
11714 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11715 }
11716 }
11717
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)11718 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
11719 TEST_REQUIRES_X86_SSE41;
11720 for (uint32_t n = 1; n <= 4; n++) {
11721 GemmMicrokernelTester()
11722 .mr(2)
11723 .nr(4)
11724 .kr(2)
11725 .sr(4)
11726 .m(2)
11727 .n(n)
11728 .k(8)
11729 .iterations(1)
11730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11731 }
11732 }
11733
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)11734 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
11735 TEST_REQUIRES_X86_SSE41;
11736 for (size_t k = 1; k < 8; k++) {
11737 GemmMicrokernelTester()
11738 .mr(2)
11739 .nr(4)
11740 .kr(2)
11741 .sr(4)
11742 .m(2)
11743 .n(4)
11744 .k(k)
11745 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11746 }
11747 }
11748
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_strided_a)11749 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
11750 TEST_REQUIRES_X86_SSE41;
11751 for (size_t k = 1; k < 8; k++) {
11752 GemmMicrokernelTester()
11753 .mr(2)
11754 .nr(4)
11755 .kr(2)
11756 .sr(4)
11757 .m(2)
11758 .n(4)
11759 .k(k)
11760 .a_stride(11)
11761 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11762 }
11763 }
11764
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)11765 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
11766 TEST_REQUIRES_X86_SSE41;
11767 for (size_t k = 1; k < 8; k++) {
11768 for (uint32_t n = 1; n <= 4; n++) {
11769 for (uint32_t m = 1; m <= 2; m++) {
11770 GemmMicrokernelTester()
11771 .mr(2)
11772 .nr(4)
11773 .kr(2)
11774 .sr(4)
11775 .m(m)
11776 .n(n)
11777 .k(k)
11778 .iterations(1)
11779 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11780 }
11781 }
11782 }
11783 }
11784
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)11785 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
11786 TEST_REQUIRES_X86_SSE41;
11787 for (size_t k = 9; k < 16; k++) {
11788 GemmMicrokernelTester()
11789 .mr(2)
11790 .nr(4)
11791 .kr(2)
11792 .sr(4)
11793 .m(2)
11794 .n(4)
11795 .k(k)
11796 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11797 }
11798 }
11799
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_strided_a)11800 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
11801 TEST_REQUIRES_X86_SSE41;
11802 for (size_t k = 9; k < 16; k++) {
11803 GemmMicrokernelTester()
11804 .mr(2)
11805 .nr(4)
11806 .kr(2)
11807 .sr(4)
11808 .m(2)
11809 .n(4)
11810 .k(k)
11811 .a_stride(19)
11812 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11813 }
11814 }
11815
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)11816 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
11817 TEST_REQUIRES_X86_SSE41;
11818 for (size_t k = 9; k < 16; k++) {
11819 for (uint32_t n = 1; n <= 4; n++) {
11820 for (uint32_t m = 1; m <= 2; m++) {
11821 GemmMicrokernelTester()
11822 .mr(2)
11823 .nr(4)
11824 .kr(2)
11825 .sr(4)
11826 .m(m)
11827 .n(n)
11828 .k(k)
11829 .iterations(1)
11830 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11831 }
11832 }
11833 }
11834 }
11835
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)11836 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
11837 TEST_REQUIRES_X86_SSE41;
11838 for (size_t k = 16; k <= 80; k += 8) {
11839 GemmMicrokernelTester()
11840 .mr(2)
11841 .nr(4)
11842 .kr(2)
11843 .sr(4)
11844 .m(2)
11845 .n(4)
11846 .k(k)
11847 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11848 }
11849 }
11850
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_strided_a)11851 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_strided_a) {
11852 TEST_REQUIRES_X86_SSE41;
11853 for (size_t k = 16; k <= 80; k += 8) {
11854 GemmMicrokernelTester()
11855 .mr(2)
11856 .nr(4)
11857 .kr(2)
11858 .sr(4)
11859 .m(2)
11860 .n(4)
11861 .k(k)
11862 .a_stride(83)
11863 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11864 }
11865 }
11866
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)11867 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
11868 TEST_REQUIRES_X86_SSE41;
11869 for (size_t k = 16; k <= 80; k += 8) {
11870 for (uint32_t n = 1; n <= 4; n++) {
11871 for (uint32_t m = 1; m <= 2; m++) {
11872 GemmMicrokernelTester()
11873 .mr(2)
11874 .nr(4)
11875 .kr(2)
11876 .sr(4)
11877 .m(m)
11878 .n(n)
11879 .k(k)
11880 .iterations(1)
11881 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11882 }
11883 }
11884 }
11885 }
11886
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)11887 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
11888 TEST_REQUIRES_X86_SSE41;
11889 for (uint32_t n = 5; n < 8; n++) {
11890 for (size_t k = 1; k <= 40; k += 9) {
11891 GemmMicrokernelTester()
11892 .mr(2)
11893 .nr(4)
11894 .kr(2)
11895 .sr(4)
11896 .m(2)
11897 .n(n)
11898 .k(k)
11899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11900 }
11901 }
11902 }
11903
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)11904 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
11905 TEST_REQUIRES_X86_SSE41;
11906 for (uint32_t n = 5; n < 8; n++) {
11907 for (size_t k = 1; k <= 40; k += 9) {
11908 GemmMicrokernelTester()
11909 .mr(2)
11910 .nr(4)
11911 .kr(2)
11912 .sr(4)
11913 .m(2)
11914 .n(n)
11915 .k(k)
11916 .cn_stride(7)
11917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11918 }
11919 }
11920 }
11921
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_a)11922 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
11923 TEST_REQUIRES_X86_SSE41;
11924 for (uint32_t n = 5; n < 8; n++) {
11925 for (size_t k = 1; k <= 40; k += 9) {
11926 GemmMicrokernelTester()
11927 .mr(2)
11928 .nr(4)
11929 .kr(2)
11930 .sr(4)
11931 .m(2)
11932 .n(n)
11933 .k(k)
11934 .a_stride(43)
11935 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11936 }
11937 }
11938 }
11939
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)11940 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
11941 TEST_REQUIRES_X86_SSE41;
11942 for (uint32_t n = 5; n < 8; n++) {
11943 for (size_t k = 1; k <= 40; k += 9) {
11944 for (uint32_t m = 1; m <= 2; m++) {
11945 GemmMicrokernelTester()
11946 .mr(2)
11947 .nr(4)
11948 .kr(2)
11949 .sr(4)
11950 .m(m)
11951 .n(n)
11952 .k(k)
11953 .iterations(1)
11954 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11955 }
11956 }
11957 }
11958 }
11959
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)11960 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
11961 TEST_REQUIRES_X86_SSE41;
11962 for (uint32_t n = 8; n <= 12; n += 4) {
11963 for (size_t k = 1; k <= 40; k += 9) {
11964 GemmMicrokernelTester()
11965 .mr(2)
11966 .nr(4)
11967 .kr(2)
11968 .sr(4)
11969 .m(2)
11970 .n(n)
11971 .k(k)
11972 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11973 }
11974 }
11975 }
11976
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)11977 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
11978 TEST_REQUIRES_X86_SSE41;
11979 for (uint32_t n = 8; n <= 12; n += 4) {
11980 for (size_t k = 1; k <= 40; k += 9) {
11981 GemmMicrokernelTester()
11982 .mr(2)
11983 .nr(4)
11984 .kr(2)
11985 .sr(4)
11986 .m(2)
11987 .n(n)
11988 .k(k)
11989 .cn_stride(7)
11990 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11991 }
11992 }
11993 }
11994
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_a)11995 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_a) {
11996 TEST_REQUIRES_X86_SSE41;
11997 for (uint32_t n = 8; n <= 12; n += 4) {
11998 for (size_t k = 1; k <= 40; k += 9) {
11999 GemmMicrokernelTester()
12000 .mr(2)
12001 .nr(4)
12002 .kr(2)
12003 .sr(4)
12004 .m(2)
12005 .n(n)
12006 .k(k)
12007 .a_stride(43)
12008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12009 }
12010 }
12011 }
12012
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)12013 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
12014 TEST_REQUIRES_X86_SSE41;
12015 for (uint32_t n = 8; n <= 12; n += 4) {
12016 for (size_t k = 1; k <= 40; k += 9) {
12017 for (uint32_t m = 1; m <= 2; m++) {
12018 GemmMicrokernelTester()
12019 .mr(2)
12020 .nr(4)
12021 .kr(2)
12022 .sr(4)
12023 .m(m)
12024 .n(n)
12025 .k(k)
12026 .iterations(1)
12027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12028 }
12029 }
12030 }
12031 }
12032
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)12033 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
12034 TEST_REQUIRES_X86_SSE41;
12035 for (size_t k = 1; k <= 40; k += 9) {
12036 for (uint32_t n = 1; n <= 4; n++) {
12037 for (uint32_t m = 1; m <= 2; m++) {
12038 GemmMicrokernelTester()
12039 .mr(2)
12040 .nr(4)
12041 .kr(2)
12042 .sr(4)
12043 .m(m)
12044 .n(n)
12045 .k(k)
12046 .cm_stride(7)
12047 .iterations(1)
12048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12049 }
12050 }
12051 }
12052 }
12053
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)12054 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
12055 TEST_REQUIRES_X86_SSE41;
12056 GemmMicrokernelTester()
12057 .mr(2)
12058 .nr(4)
12059 .kr(2)
12060 .sr(4)
12061 .m(2)
12062 .n(4)
12063 .k(8)
12064 .qmin(128)
12065 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12066 }
12067
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)12068 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
12069 TEST_REQUIRES_X86_SSE41;
12070 GemmMicrokernelTester()
12071 .mr(2)
12072 .nr(4)
12073 .kr(2)
12074 .sr(4)
12075 .m(2)
12076 .n(4)
12077 .k(8)
12078 .qmax(128)
12079 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12080 }
12081
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)12082 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
12083 TEST_REQUIRES_X86_SSE41;
12084 GemmMicrokernelTester()
12085 .mr(2)
12086 .nr(4)
12087 .kr(2)
12088 .sr(4)
12089 .m(2)
12090 .n(4)
12091 .k(8)
12092 .cm_stride(7)
12093 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12094 }
12095
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_a_zero_point)12096 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_a_zero_point) {
12097 TEST_REQUIRES_X86_SSE41;
12098 for (size_t k = 1; k <= 40; k += 9) {
12099 GemmMicrokernelTester()
12100 .mr(2)
12101 .nr(4)
12102 .kr(2)
12103 .sr(4)
12104 .m(2)
12105 .n(4)
12106 .k(k)
12107 .a_zero_point(0)
12108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12109 }
12110 }
12111
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_b_zero_point)12112 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_b_zero_point) {
12113 TEST_REQUIRES_X86_SSE41;
12114 for (size_t k = 1; k <= 40; k += 9) {
12115 GemmMicrokernelTester()
12116 .mr(2)
12117 .nr(4)
12118 .kr(2)
12119 .sr(4)
12120 .m(2)
12121 .n(4)
12122 .k(k)
12123 .b_zero_point(0)
12124 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12125 }
12126 }
12127
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_zero_point)12128 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_zero_point) {
12129 TEST_REQUIRES_X86_SSE41;
12130 for (size_t k = 1; k <= 40; k += 9) {
12131 GemmMicrokernelTester()
12132 .mr(2)
12133 .nr(4)
12134 .kr(2)
12135 .sr(4)
12136 .m(2)
12137 .n(4)
12138 .k(k)
12139 .a_zero_point(0)
12140 .b_zero_point(0)
12141 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12142 }
12143 }
12144 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12145
12146
12147 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)12148 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
12149 TEST_REQUIRES_X86_SSE2;
12150 GemmMicrokernelTester()
12151 .mr(3)
12152 .nr(4)
12153 .kr(2)
12154 .sr(4)
12155 .m(3)
12156 .n(4)
12157 .k(8)
12158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12159 }
12160
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)12161 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
12162 TEST_REQUIRES_X86_SSE2;
12163 GemmMicrokernelTester()
12164 .mr(3)
12165 .nr(4)
12166 .kr(2)
12167 .sr(4)
12168 .m(3)
12169 .n(4)
12170 .k(8)
12171 .cn_stride(7)
12172 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12173 }
12174
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_strided_a)12175 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
12176 TEST_REQUIRES_X86_SSE2;
12177 GemmMicrokernelTester()
12178 .mr(3)
12179 .nr(4)
12180 .kr(2)
12181 .sr(4)
12182 .m(3)
12183 .n(4)
12184 .k(8)
12185 .a_stride(11)
12186 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12187 }
12188
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)12189 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
12190 TEST_REQUIRES_X86_SSE2;
12191 for (uint32_t n = 1; n <= 4; n++) {
12192 for (uint32_t m = 1; m <= 3; m++) {
12193 GemmMicrokernelTester()
12194 .mr(3)
12195 .nr(4)
12196 .kr(2)
12197 .sr(4)
12198 .m(m)
12199 .n(n)
12200 .k(8)
12201 .iterations(1)
12202 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12203 }
12204 }
12205 }
12206
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)12207 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
12208 TEST_REQUIRES_X86_SSE2;
12209 for (uint32_t m = 1; m <= 3; m++) {
12210 GemmMicrokernelTester()
12211 .mr(3)
12212 .nr(4)
12213 .kr(2)
12214 .sr(4)
12215 .m(m)
12216 .n(4)
12217 .k(8)
12218 .iterations(1)
12219 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12220 }
12221 }
12222
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)12223 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
12224 TEST_REQUIRES_X86_SSE2;
12225 for (uint32_t n = 1; n <= 4; n++) {
12226 GemmMicrokernelTester()
12227 .mr(3)
12228 .nr(4)
12229 .kr(2)
12230 .sr(4)
12231 .m(3)
12232 .n(n)
12233 .k(8)
12234 .iterations(1)
12235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12236 }
12237 }
12238
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)12239 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
12240 TEST_REQUIRES_X86_SSE2;
12241 for (size_t k = 1; k < 8; k++) {
12242 GemmMicrokernelTester()
12243 .mr(3)
12244 .nr(4)
12245 .kr(2)
12246 .sr(4)
12247 .m(3)
12248 .n(4)
12249 .k(k)
12250 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12251 }
12252 }
12253
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_strided_a)12254 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
12255 TEST_REQUIRES_X86_SSE2;
12256 for (size_t k = 1; k < 8; k++) {
12257 GemmMicrokernelTester()
12258 .mr(3)
12259 .nr(4)
12260 .kr(2)
12261 .sr(4)
12262 .m(3)
12263 .n(4)
12264 .k(k)
12265 .a_stride(11)
12266 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12267 }
12268 }
12269
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)12270 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
12271 TEST_REQUIRES_X86_SSE2;
12272 for (size_t k = 1; k < 8; k++) {
12273 for (uint32_t n = 1; n <= 4; n++) {
12274 for (uint32_t m = 1; m <= 3; m++) {
12275 GemmMicrokernelTester()
12276 .mr(3)
12277 .nr(4)
12278 .kr(2)
12279 .sr(4)
12280 .m(m)
12281 .n(n)
12282 .k(k)
12283 .iterations(1)
12284 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12285 }
12286 }
12287 }
12288 }
12289
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)12290 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
12291 TEST_REQUIRES_X86_SSE2;
12292 for (size_t k = 9; k < 16; k++) {
12293 GemmMicrokernelTester()
12294 .mr(3)
12295 .nr(4)
12296 .kr(2)
12297 .sr(4)
12298 .m(3)
12299 .n(4)
12300 .k(k)
12301 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12302 }
12303 }
12304
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_strided_a)12305 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
12306 TEST_REQUIRES_X86_SSE2;
12307 for (size_t k = 9; k < 16; k++) {
12308 GemmMicrokernelTester()
12309 .mr(3)
12310 .nr(4)
12311 .kr(2)
12312 .sr(4)
12313 .m(3)
12314 .n(4)
12315 .k(k)
12316 .a_stride(19)
12317 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12318 }
12319 }
12320
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)12321 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
12322 TEST_REQUIRES_X86_SSE2;
12323 for (size_t k = 9; k < 16; k++) {
12324 for (uint32_t n = 1; n <= 4; n++) {
12325 for (uint32_t m = 1; m <= 3; m++) {
12326 GemmMicrokernelTester()
12327 .mr(3)
12328 .nr(4)
12329 .kr(2)
12330 .sr(4)
12331 .m(m)
12332 .n(n)
12333 .k(k)
12334 .iterations(1)
12335 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12336 }
12337 }
12338 }
12339 }
12340
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)12341 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
12342 TEST_REQUIRES_X86_SSE2;
12343 for (size_t k = 16; k <= 80; k += 8) {
12344 GemmMicrokernelTester()
12345 .mr(3)
12346 .nr(4)
12347 .kr(2)
12348 .sr(4)
12349 .m(3)
12350 .n(4)
12351 .k(k)
12352 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12353 }
12354 }
12355
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_strided_a)12356 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_strided_a) {
12357 TEST_REQUIRES_X86_SSE2;
12358 for (size_t k = 16; k <= 80; k += 8) {
12359 GemmMicrokernelTester()
12360 .mr(3)
12361 .nr(4)
12362 .kr(2)
12363 .sr(4)
12364 .m(3)
12365 .n(4)
12366 .k(k)
12367 .a_stride(83)
12368 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12369 }
12370 }
12371
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)12372 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
12373 TEST_REQUIRES_X86_SSE2;
12374 for (size_t k = 16; k <= 80; k += 8) {
12375 for (uint32_t n = 1; n <= 4; n++) {
12376 for (uint32_t m = 1; m <= 3; m++) {
12377 GemmMicrokernelTester()
12378 .mr(3)
12379 .nr(4)
12380 .kr(2)
12381 .sr(4)
12382 .m(m)
12383 .n(n)
12384 .k(k)
12385 .iterations(1)
12386 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12387 }
12388 }
12389 }
12390 }
12391
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)12392 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
12393 TEST_REQUIRES_X86_SSE2;
12394 for (uint32_t n = 5; n < 8; n++) {
12395 for (size_t k = 1; k <= 40; k += 9) {
12396 GemmMicrokernelTester()
12397 .mr(3)
12398 .nr(4)
12399 .kr(2)
12400 .sr(4)
12401 .m(3)
12402 .n(n)
12403 .k(k)
12404 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12405 }
12406 }
12407 }
12408
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)12409 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
12410 TEST_REQUIRES_X86_SSE2;
12411 for (uint32_t n = 5; n < 8; n++) {
12412 for (size_t k = 1; k <= 40; k += 9) {
12413 GemmMicrokernelTester()
12414 .mr(3)
12415 .nr(4)
12416 .kr(2)
12417 .sr(4)
12418 .m(3)
12419 .n(n)
12420 .k(k)
12421 .cn_stride(7)
12422 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12423 }
12424 }
12425 }
12426
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_a)12427 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
12428 TEST_REQUIRES_X86_SSE2;
12429 for (uint32_t n = 5; n < 8; n++) {
12430 for (size_t k = 1; k <= 40; k += 9) {
12431 GemmMicrokernelTester()
12432 .mr(3)
12433 .nr(4)
12434 .kr(2)
12435 .sr(4)
12436 .m(3)
12437 .n(n)
12438 .k(k)
12439 .a_stride(43)
12440 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12441 }
12442 }
12443 }
12444
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)12445 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
12446 TEST_REQUIRES_X86_SSE2;
12447 for (uint32_t n = 5; n < 8; n++) {
12448 for (size_t k = 1; k <= 40; k += 9) {
12449 for (uint32_t m = 1; m <= 3; m++) {
12450 GemmMicrokernelTester()
12451 .mr(3)
12452 .nr(4)
12453 .kr(2)
12454 .sr(4)
12455 .m(m)
12456 .n(n)
12457 .k(k)
12458 .iterations(1)
12459 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12460 }
12461 }
12462 }
12463 }
12464
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)12465 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
12466 TEST_REQUIRES_X86_SSE2;
12467 for (uint32_t n = 8; n <= 12; n += 4) {
12468 for (size_t k = 1; k <= 40; k += 9) {
12469 GemmMicrokernelTester()
12470 .mr(3)
12471 .nr(4)
12472 .kr(2)
12473 .sr(4)
12474 .m(3)
12475 .n(n)
12476 .k(k)
12477 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12478 }
12479 }
12480 }
12481
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)12482 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
12483 TEST_REQUIRES_X86_SSE2;
12484 for (uint32_t n = 8; n <= 12; n += 4) {
12485 for (size_t k = 1; k <= 40; k += 9) {
12486 GemmMicrokernelTester()
12487 .mr(3)
12488 .nr(4)
12489 .kr(2)
12490 .sr(4)
12491 .m(3)
12492 .n(n)
12493 .k(k)
12494 .cn_stride(7)
12495 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12496 }
12497 }
12498 }
12499
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_a)12500 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_a) {
12501 TEST_REQUIRES_X86_SSE2;
12502 for (uint32_t n = 8; n <= 12; n += 4) {
12503 for (size_t k = 1; k <= 40; k += 9) {
12504 GemmMicrokernelTester()
12505 .mr(3)
12506 .nr(4)
12507 .kr(2)
12508 .sr(4)
12509 .m(3)
12510 .n(n)
12511 .k(k)
12512 .a_stride(43)
12513 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12514 }
12515 }
12516 }
12517
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)12518 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
12519 TEST_REQUIRES_X86_SSE2;
12520 for (uint32_t n = 8; n <= 12; n += 4) {
12521 for (size_t k = 1; k <= 40; k += 9) {
12522 for (uint32_t m = 1; m <= 3; m++) {
12523 GemmMicrokernelTester()
12524 .mr(3)
12525 .nr(4)
12526 .kr(2)
12527 .sr(4)
12528 .m(m)
12529 .n(n)
12530 .k(k)
12531 .iterations(1)
12532 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12533 }
12534 }
12535 }
12536 }
12537
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)12538 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
12539 TEST_REQUIRES_X86_SSE2;
12540 for (size_t k = 1; k <= 40; k += 9) {
12541 for (uint32_t n = 1; n <= 4; n++) {
12542 for (uint32_t m = 1; m <= 3; m++) {
12543 GemmMicrokernelTester()
12544 .mr(3)
12545 .nr(4)
12546 .kr(2)
12547 .sr(4)
12548 .m(m)
12549 .n(n)
12550 .k(k)
12551 .cm_stride(7)
12552 .iterations(1)
12553 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12554 }
12555 }
12556 }
12557 }
12558
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)12559 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
12560 TEST_REQUIRES_X86_SSE2;
12561 GemmMicrokernelTester()
12562 .mr(3)
12563 .nr(4)
12564 .kr(2)
12565 .sr(4)
12566 .m(3)
12567 .n(4)
12568 .k(8)
12569 .qmin(128)
12570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12571 }
12572
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)12573 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
12574 TEST_REQUIRES_X86_SSE2;
12575 GemmMicrokernelTester()
12576 .mr(3)
12577 .nr(4)
12578 .kr(2)
12579 .sr(4)
12580 .m(3)
12581 .n(4)
12582 .k(8)
12583 .qmax(128)
12584 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12585 }
12586
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)12587 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
12588 TEST_REQUIRES_X86_SSE2;
12589 GemmMicrokernelTester()
12590 .mr(3)
12591 .nr(4)
12592 .kr(2)
12593 .sr(4)
12594 .m(3)
12595 .n(4)
12596 .k(8)
12597 .cm_stride(7)
12598 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12599 }
12600
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_a_zero_point)12601 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_a_zero_point) {
12602 TEST_REQUIRES_X86_SSE2;
12603 for (size_t k = 1; k <= 40; k += 9) {
12604 GemmMicrokernelTester()
12605 .mr(3)
12606 .nr(4)
12607 .kr(2)
12608 .sr(4)
12609 .m(3)
12610 .n(4)
12611 .k(k)
12612 .a_zero_point(0)
12613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12614 }
12615 }
12616
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_b_zero_point)12617 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_b_zero_point) {
12618 TEST_REQUIRES_X86_SSE2;
12619 for (size_t k = 1; k <= 40; k += 9) {
12620 GemmMicrokernelTester()
12621 .mr(3)
12622 .nr(4)
12623 .kr(2)
12624 .sr(4)
12625 .m(3)
12626 .n(4)
12627 .k(k)
12628 .b_zero_point(0)
12629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12630 }
12631 }
12632
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_zero_point)12633 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_zero_point) {
12634 TEST_REQUIRES_X86_SSE2;
12635 for (size_t k = 1; k <= 40; k += 9) {
12636 GemmMicrokernelTester()
12637 .mr(3)
12638 .nr(4)
12639 .kr(2)
12640 .sr(4)
12641 .m(3)
12642 .n(4)
12643 .k(k)
12644 .a_zero_point(0)
12645 .b_zero_point(0)
12646 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12647 }
12648 }
12649 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12650
12651
12652 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)12653 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
12654 TEST_REQUIRES_X86_SSE41;
12655 GemmMicrokernelTester()
12656 .mr(3)
12657 .nr(4)
12658 .kr(2)
12659 .sr(4)
12660 .m(3)
12661 .n(4)
12662 .k(8)
12663 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12664 }
12665
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)12666 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
12667 TEST_REQUIRES_X86_SSE41;
12668 GemmMicrokernelTester()
12669 .mr(3)
12670 .nr(4)
12671 .kr(2)
12672 .sr(4)
12673 .m(3)
12674 .n(4)
12675 .k(8)
12676 .cn_stride(7)
12677 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12678 }
12679
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_strided_a)12680 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
12681 TEST_REQUIRES_X86_SSE41;
12682 GemmMicrokernelTester()
12683 .mr(3)
12684 .nr(4)
12685 .kr(2)
12686 .sr(4)
12687 .m(3)
12688 .n(4)
12689 .k(8)
12690 .a_stride(11)
12691 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12692 }
12693
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)12694 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
12695 TEST_REQUIRES_X86_SSE41;
12696 for (uint32_t n = 1; n <= 4; n++) {
12697 for (uint32_t m = 1; m <= 3; m++) {
12698 GemmMicrokernelTester()
12699 .mr(3)
12700 .nr(4)
12701 .kr(2)
12702 .sr(4)
12703 .m(m)
12704 .n(n)
12705 .k(8)
12706 .iterations(1)
12707 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12708 }
12709 }
12710 }
12711
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)12712 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
12713 TEST_REQUIRES_X86_SSE41;
12714 for (uint32_t m = 1; m <= 3; m++) {
12715 GemmMicrokernelTester()
12716 .mr(3)
12717 .nr(4)
12718 .kr(2)
12719 .sr(4)
12720 .m(m)
12721 .n(4)
12722 .k(8)
12723 .iterations(1)
12724 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12725 }
12726 }
12727
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)12728 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
12729 TEST_REQUIRES_X86_SSE41;
12730 for (uint32_t n = 1; n <= 4; n++) {
12731 GemmMicrokernelTester()
12732 .mr(3)
12733 .nr(4)
12734 .kr(2)
12735 .sr(4)
12736 .m(3)
12737 .n(n)
12738 .k(8)
12739 .iterations(1)
12740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12741 }
12742 }
12743
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)12744 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
12745 TEST_REQUIRES_X86_SSE41;
12746 for (size_t k = 1; k < 8; k++) {
12747 GemmMicrokernelTester()
12748 .mr(3)
12749 .nr(4)
12750 .kr(2)
12751 .sr(4)
12752 .m(3)
12753 .n(4)
12754 .k(k)
12755 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12756 }
12757 }
12758
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_strided_a)12759 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
12760 TEST_REQUIRES_X86_SSE41;
12761 for (size_t k = 1; k < 8; k++) {
12762 GemmMicrokernelTester()
12763 .mr(3)
12764 .nr(4)
12765 .kr(2)
12766 .sr(4)
12767 .m(3)
12768 .n(4)
12769 .k(k)
12770 .a_stride(11)
12771 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12772 }
12773 }
12774
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)12775 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
12776 TEST_REQUIRES_X86_SSE41;
12777 for (size_t k = 1; k < 8; k++) {
12778 for (uint32_t n = 1; n <= 4; n++) {
12779 for (uint32_t m = 1; m <= 3; m++) {
12780 GemmMicrokernelTester()
12781 .mr(3)
12782 .nr(4)
12783 .kr(2)
12784 .sr(4)
12785 .m(m)
12786 .n(n)
12787 .k(k)
12788 .iterations(1)
12789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12790 }
12791 }
12792 }
12793 }
12794
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)12795 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
12796 TEST_REQUIRES_X86_SSE41;
12797 for (size_t k = 9; k < 16; k++) {
12798 GemmMicrokernelTester()
12799 .mr(3)
12800 .nr(4)
12801 .kr(2)
12802 .sr(4)
12803 .m(3)
12804 .n(4)
12805 .k(k)
12806 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12807 }
12808 }
12809
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_strided_a)12810 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
12811 TEST_REQUIRES_X86_SSE41;
12812 for (size_t k = 9; k < 16; k++) {
12813 GemmMicrokernelTester()
12814 .mr(3)
12815 .nr(4)
12816 .kr(2)
12817 .sr(4)
12818 .m(3)
12819 .n(4)
12820 .k(k)
12821 .a_stride(19)
12822 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12823 }
12824 }
12825
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)12826 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
12827 TEST_REQUIRES_X86_SSE41;
12828 for (size_t k = 9; k < 16; k++) {
12829 for (uint32_t n = 1; n <= 4; n++) {
12830 for (uint32_t m = 1; m <= 3; m++) {
12831 GemmMicrokernelTester()
12832 .mr(3)
12833 .nr(4)
12834 .kr(2)
12835 .sr(4)
12836 .m(m)
12837 .n(n)
12838 .k(k)
12839 .iterations(1)
12840 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12841 }
12842 }
12843 }
12844 }
12845
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)12846 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
12847 TEST_REQUIRES_X86_SSE41;
12848 for (size_t k = 16; k <= 80; k += 8) {
12849 GemmMicrokernelTester()
12850 .mr(3)
12851 .nr(4)
12852 .kr(2)
12853 .sr(4)
12854 .m(3)
12855 .n(4)
12856 .k(k)
12857 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12858 }
12859 }
12860
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_strided_a)12861 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_strided_a) {
12862 TEST_REQUIRES_X86_SSE41;
12863 for (size_t k = 16; k <= 80; k += 8) {
12864 GemmMicrokernelTester()
12865 .mr(3)
12866 .nr(4)
12867 .kr(2)
12868 .sr(4)
12869 .m(3)
12870 .n(4)
12871 .k(k)
12872 .a_stride(83)
12873 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12874 }
12875 }
12876
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)12877 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
12878 TEST_REQUIRES_X86_SSE41;
12879 for (size_t k = 16; k <= 80; k += 8) {
12880 for (uint32_t n = 1; n <= 4; n++) {
12881 for (uint32_t m = 1; m <= 3; m++) {
12882 GemmMicrokernelTester()
12883 .mr(3)
12884 .nr(4)
12885 .kr(2)
12886 .sr(4)
12887 .m(m)
12888 .n(n)
12889 .k(k)
12890 .iterations(1)
12891 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12892 }
12893 }
12894 }
12895 }
12896
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)12897 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
12898 TEST_REQUIRES_X86_SSE41;
12899 for (uint32_t n = 5; n < 8; n++) {
12900 for (size_t k = 1; k <= 40; k += 9) {
12901 GemmMicrokernelTester()
12902 .mr(3)
12903 .nr(4)
12904 .kr(2)
12905 .sr(4)
12906 .m(3)
12907 .n(n)
12908 .k(k)
12909 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12910 }
12911 }
12912 }
12913
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)12914 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
12915 TEST_REQUIRES_X86_SSE41;
12916 for (uint32_t n = 5; n < 8; n++) {
12917 for (size_t k = 1; k <= 40; k += 9) {
12918 GemmMicrokernelTester()
12919 .mr(3)
12920 .nr(4)
12921 .kr(2)
12922 .sr(4)
12923 .m(3)
12924 .n(n)
12925 .k(k)
12926 .cn_stride(7)
12927 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12928 }
12929 }
12930 }
12931
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_a)12932 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
12933 TEST_REQUIRES_X86_SSE41;
12934 for (uint32_t n = 5; n < 8; n++) {
12935 for (size_t k = 1; k <= 40; k += 9) {
12936 GemmMicrokernelTester()
12937 .mr(3)
12938 .nr(4)
12939 .kr(2)
12940 .sr(4)
12941 .m(3)
12942 .n(n)
12943 .k(k)
12944 .a_stride(43)
12945 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12946 }
12947 }
12948 }
12949
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)12950 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
12951 TEST_REQUIRES_X86_SSE41;
12952 for (uint32_t n = 5; n < 8; n++) {
12953 for (size_t k = 1; k <= 40; k += 9) {
12954 for (uint32_t m = 1; m <= 3; m++) {
12955 GemmMicrokernelTester()
12956 .mr(3)
12957 .nr(4)
12958 .kr(2)
12959 .sr(4)
12960 .m(m)
12961 .n(n)
12962 .k(k)
12963 .iterations(1)
12964 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12965 }
12966 }
12967 }
12968 }
12969
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)12970 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
12971 TEST_REQUIRES_X86_SSE41;
12972 for (uint32_t n = 8; n <= 12; n += 4) {
12973 for (size_t k = 1; k <= 40; k += 9) {
12974 GemmMicrokernelTester()
12975 .mr(3)
12976 .nr(4)
12977 .kr(2)
12978 .sr(4)
12979 .m(3)
12980 .n(n)
12981 .k(k)
12982 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12983 }
12984 }
12985 }
12986
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)12987 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
12988 TEST_REQUIRES_X86_SSE41;
12989 for (uint32_t n = 8; n <= 12; n += 4) {
12990 for (size_t k = 1; k <= 40; k += 9) {
12991 GemmMicrokernelTester()
12992 .mr(3)
12993 .nr(4)
12994 .kr(2)
12995 .sr(4)
12996 .m(3)
12997 .n(n)
12998 .k(k)
12999 .cn_stride(7)
13000 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13001 }
13002 }
13003 }
13004
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_a)13005 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_a) {
13006 TEST_REQUIRES_X86_SSE41;
13007 for (uint32_t n = 8; n <= 12; n += 4) {
13008 for (size_t k = 1; k <= 40; k += 9) {
13009 GemmMicrokernelTester()
13010 .mr(3)
13011 .nr(4)
13012 .kr(2)
13013 .sr(4)
13014 .m(3)
13015 .n(n)
13016 .k(k)
13017 .a_stride(43)
13018 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13019 }
13020 }
13021 }
13022
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)13023 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
13024 TEST_REQUIRES_X86_SSE41;
13025 for (uint32_t n = 8; n <= 12; n += 4) {
13026 for (size_t k = 1; k <= 40; k += 9) {
13027 for (uint32_t m = 1; m <= 3; m++) {
13028 GemmMicrokernelTester()
13029 .mr(3)
13030 .nr(4)
13031 .kr(2)
13032 .sr(4)
13033 .m(m)
13034 .n(n)
13035 .k(k)
13036 .iterations(1)
13037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13038 }
13039 }
13040 }
13041 }
13042
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)13043 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
13044 TEST_REQUIRES_X86_SSE41;
13045 for (size_t k = 1; k <= 40; k += 9) {
13046 for (uint32_t n = 1; n <= 4; n++) {
13047 for (uint32_t m = 1; m <= 3; m++) {
13048 GemmMicrokernelTester()
13049 .mr(3)
13050 .nr(4)
13051 .kr(2)
13052 .sr(4)
13053 .m(m)
13054 .n(n)
13055 .k(k)
13056 .cm_stride(7)
13057 .iterations(1)
13058 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13059 }
13060 }
13061 }
13062 }
13063
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)13064 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
13065 TEST_REQUIRES_X86_SSE41;
13066 GemmMicrokernelTester()
13067 .mr(3)
13068 .nr(4)
13069 .kr(2)
13070 .sr(4)
13071 .m(3)
13072 .n(4)
13073 .k(8)
13074 .qmin(128)
13075 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13076 }
13077
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)13078 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
13079 TEST_REQUIRES_X86_SSE41;
13080 GemmMicrokernelTester()
13081 .mr(3)
13082 .nr(4)
13083 .kr(2)
13084 .sr(4)
13085 .m(3)
13086 .n(4)
13087 .k(8)
13088 .qmax(128)
13089 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13090 }
13091
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)13092 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
13093 TEST_REQUIRES_X86_SSE41;
13094 GemmMicrokernelTester()
13095 .mr(3)
13096 .nr(4)
13097 .kr(2)
13098 .sr(4)
13099 .m(3)
13100 .n(4)
13101 .k(8)
13102 .cm_stride(7)
13103 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13104 }
13105
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_a_zero_point)13106 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_a_zero_point) {
13107 TEST_REQUIRES_X86_SSE41;
13108 for (size_t k = 1; k <= 40; k += 9) {
13109 GemmMicrokernelTester()
13110 .mr(3)
13111 .nr(4)
13112 .kr(2)
13113 .sr(4)
13114 .m(3)
13115 .n(4)
13116 .k(k)
13117 .a_zero_point(0)
13118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13119 }
13120 }
13121
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_b_zero_point)13122 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_b_zero_point) {
13123 TEST_REQUIRES_X86_SSE41;
13124 for (size_t k = 1; k <= 40; k += 9) {
13125 GemmMicrokernelTester()
13126 .mr(3)
13127 .nr(4)
13128 .kr(2)
13129 .sr(4)
13130 .m(3)
13131 .n(4)
13132 .k(k)
13133 .b_zero_point(0)
13134 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13135 }
13136 }
13137
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_zero_point)13138 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_zero_point) {
13139 TEST_REQUIRES_X86_SSE41;
13140 for (size_t k = 1; k <= 40; k += 9) {
13141 GemmMicrokernelTester()
13142 .mr(3)
13143 .nr(4)
13144 .kr(2)
13145 .sr(4)
13146 .m(3)
13147 .n(4)
13148 .k(k)
13149 .a_zero_point(0)
13150 .b_zero_point(0)
13151 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13152 }
13153 }
13154 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13155
13156
13157 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)13158 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
13159 TEST_REQUIRES_X86_SSE2;
13160 GemmMicrokernelTester()
13161 .mr(4)
13162 .nr(4)
13163 .kr(2)
13164 .sr(4)
13165 .m(4)
13166 .n(4)
13167 .k(8)
13168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13169 }
13170
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)13171 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
13172 TEST_REQUIRES_X86_SSE2;
13173 GemmMicrokernelTester()
13174 .mr(4)
13175 .nr(4)
13176 .kr(2)
13177 .sr(4)
13178 .m(4)
13179 .n(4)
13180 .k(8)
13181 .cn_stride(7)
13182 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13183 }
13184
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_strided_a)13185 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
13186 TEST_REQUIRES_X86_SSE2;
13187 GemmMicrokernelTester()
13188 .mr(4)
13189 .nr(4)
13190 .kr(2)
13191 .sr(4)
13192 .m(4)
13193 .n(4)
13194 .k(8)
13195 .a_stride(11)
13196 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13197 }
13198
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)13199 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
13200 TEST_REQUIRES_X86_SSE2;
13201 for (uint32_t n = 1; n <= 4; n++) {
13202 for (uint32_t m = 1; m <= 4; m++) {
13203 GemmMicrokernelTester()
13204 .mr(4)
13205 .nr(4)
13206 .kr(2)
13207 .sr(4)
13208 .m(m)
13209 .n(n)
13210 .k(8)
13211 .iterations(1)
13212 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13213 }
13214 }
13215 }
13216
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)13217 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
13218 TEST_REQUIRES_X86_SSE2;
13219 for (uint32_t m = 1; m <= 4; m++) {
13220 GemmMicrokernelTester()
13221 .mr(4)
13222 .nr(4)
13223 .kr(2)
13224 .sr(4)
13225 .m(m)
13226 .n(4)
13227 .k(8)
13228 .iterations(1)
13229 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13230 }
13231 }
13232
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)13233 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
13234 TEST_REQUIRES_X86_SSE2;
13235 for (uint32_t n = 1; n <= 4; n++) {
13236 GemmMicrokernelTester()
13237 .mr(4)
13238 .nr(4)
13239 .kr(2)
13240 .sr(4)
13241 .m(4)
13242 .n(n)
13243 .k(8)
13244 .iterations(1)
13245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13246 }
13247 }
13248
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)13249 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
13250 TEST_REQUIRES_X86_SSE2;
13251 for (size_t k = 1; k < 8; k++) {
13252 GemmMicrokernelTester()
13253 .mr(4)
13254 .nr(4)
13255 .kr(2)
13256 .sr(4)
13257 .m(4)
13258 .n(4)
13259 .k(k)
13260 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13261 }
13262 }
13263
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_strided_a)13264 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
13265 TEST_REQUIRES_X86_SSE2;
13266 for (size_t k = 1; k < 8; k++) {
13267 GemmMicrokernelTester()
13268 .mr(4)
13269 .nr(4)
13270 .kr(2)
13271 .sr(4)
13272 .m(4)
13273 .n(4)
13274 .k(k)
13275 .a_stride(11)
13276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13277 }
13278 }
13279
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)13280 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
13281 TEST_REQUIRES_X86_SSE2;
13282 for (size_t k = 1; k < 8; k++) {
13283 for (uint32_t n = 1; n <= 4; n++) {
13284 for (uint32_t m = 1; m <= 4; m++) {
13285 GemmMicrokernelTester()
13286 .mr(4)
13287 .nr(4)
13288 .kr(2)
13289 .sr(4)
13290 .m(m)
13291 .n(n)
13292 .k(k)
13293 .iterations(1)
13294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13295 }
13296 }
13297 }
13298 }
13299
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)13300 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
13301 TEST_REQUIRES_X86_SSE2;
13302 for (size_t k = 9; k < 16; k++) {
13303 GemmMicrokernelTester()
13304 .mr(4)
13305 .nr(4)
13306 .kr(2)
13307 .sr(4)
13308 .m(4)
13309 .n(4)
13310 .k(k)
13311 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13312 }
13313 }
13314
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_strided_a)13315 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
13316 TEST_REQUIRES_X86_SSE2;
13317 for (size_t k = 9; k < 16; k++) {
13318 GemmMicrokernelTester()
13319 .mr(4)
13320 .nr(4)
13321 .kr(2)
13322 .sr(4)
13323 .m(4)
13324 .n(4)
13325 .k(k)
13326 .a_stride(19)
13327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13328 }
13329 }
13330
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)13331 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
13332 TEST_REQUIRES_X86_SSE2;
13333 for (size_t k = 9; k < 16; k++) {
13334 for (uint32_t n = 1; n <= 4; n++) {
13335 for (uint32_t m = 1; m <= 4; m++) {
13336 GemmMicrokernelTester()
13337 .mr(4)
13338 .nr(4)
13339 .kr(2)
13340 .sr(4)
13341 .m(m)
13342 .n(n)
13343 .k(k)
13344 .iterations(1)
13345 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13346 }
13347 }
13348 }
13349 }
13350
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)13351 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
13352 TEST_REQUIRES_X86_SSE2;
13353 for (size_t k = 16; k <= 80; k += 8) {
13354 GemmMicrokernelTester()
13355 .mr(4)
13356 .nr(4)
13357 .kr(2)
13358 .sr(4)
13359 .m(4)
13360 .n(4)
13361 .k(k)
13362 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13363 }
13364 }
13365
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_strided_a)13366 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_strided_a) {
13367 TEST_REQUIRES_X86_SSE2;
13368 for (size_t k = 16; k <= 80; k += 8) {
13369 GemmMicrokernelTester()
13370 .mr(4)
13371 .nr(4)
13372 .kr(2)
13373 .sr(4)
13374 .m(4)
13375 .n(4)
13376 .k(k)
13377 .a_stride(83)
13378 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13379 }
13380 }
13381
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)13382 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
13383 TEST_REQUIRES_X86_SSE2;
13384 for (size_t k = 16; k <= 80; k += 8) {
13385 for (uint32_t n = 1; n <= 4; n++) {
13386 for (uint32_t m = 1; m <= 4; m++) {
13387 GemmMicrokernelTester()
13388 .mr(4)
13389 .nr(4)
13390 .kr(2)
13391 .sr(4)
13392 .m(m)
13393 .n(n)
13394 .k(k)
13395 .iterations(1)
13396 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13397 }
13398 }
13399 }
13400 }
13401
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)13402 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
13403 TEST_REQUIRES_X86_SSE2;
13404 for (uint32_t n = 5; n < 8; n++) {
13405 for (size_t k = 1; k <= 40; k += 9) {
13406 GemmMicrokernelTester()
13407 .mr(4)
13408 .nr(4)
13409 .kr(2)
13410 .sr(4)
13411 .m(4)
13412 .n(n)
13413 .k(k)
13414 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13415 }
13416 }
13417 }
13418
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)13419 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
13420 TEST_REQUIRES_X86_SSE2;
13421 for (uint32_t n = 5; n < 8; n++) {
13422 for (size_t k = 1; k <= 40; k += 9) {
13423 GemmMicrokernelTester()
13424 .mr(4)
13425 .nr(4)
13426 .kr(2)
13427 .sr(4)
13428 .m(4)
13429 .n(n)
13430 .k(k)
13431 .cn_stride(7)
13432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13433 }
13434 }
13435 }
13436
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_a)13437 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
13438 TEST_REQUIRES_X86_SSE2;
13439 for (uint32_t n = 5; n < 8; n++) {
13440 for (size_t k = 1; k <= 40; k += 9) {
13441 GemmMicrokernelTester()
13442 .mr(4)
13443 .nr(4)
13444 .kr(2)
13445 .sr(4)
13446 .m(4)
13447 .n(n)
13448 .k(k)
13449 .a_stride(43)
13450 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13451 }
13452 }
13453 }
13454
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)13455 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
13456 TEST_REQUIRES_X86_SSE2;
13457 for (uint32_t n = 5; n < 8; n++) {
13458 for (size_t k = 1; k <= 40; k += 9) {
13459 for (uint32_t m = 1; m <= 4; m++) {
13460 GemmMicrokernelTester()
13461 .mr(4)
13462 .nr(4)
13463 .kr(2)
13464 .sr(4)
13465 .m(m)
13466 .n(n)
13467 .k(k)
13468 .iterations(1)
13469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13470 }
13471 }
13472 }
13473 }
13474
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)13475 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
13476 TEST_REQUIRES_X86_SSE2;
13477 for (uint32_t n = 8; n <= 12; n += 4) {
13478 for (size_t k = 1; k <= 40; k += 9) {
13479 GemmMicrokernelTester()
13480 .mr(4)
13481 .nr(4)
13482 .kr(2)
13483 .sr(4)
13484 .m(4)
13485 .n(n)
13486 .k(k)
13487 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13488 }
13489 }
13490 }
13491
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)13492 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
13493 TEST_REQUIRES_X86_SSE2;
13494 for (uint32_t n = 8; n <= 12; n += 4) {
13495 for (size_t k = 1; k <= 40; k += 9) {
13496 GemmMicrokernelTester()
13497 .mr(4)
13498 .nr(4)
13499 .kr(2)
13500 .sr(4)
13501 .m(4)
13502 .n(n)
13503 .k(k)
13504 .cn_stride(7)
13505 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13506 }
13507 }
13508 }
13509
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_a)13510 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_a) {
13511 TEST_REQUIRES_X86_SSE2;
13512 for (uint32_t n = 8; n <= 12; n += 4) {
13513 for (size_t k = 1; k <= 40; k += 9) {
13514 GemmMicrokernelTester()
13515 .mr(4)
13516 .nr(4)
13517 .kr(2)
13518 .sr(4)
13519 .m(4)
13520 .n(n)
13521 .k(k)
13522 .a_stride(43)
13523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13524 }
13525 }
13526 }
13527
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)13528 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
13529 TEST_REQUIRES_X86_SSE2;
13530 for (uint32_t n = 8; n <= 12; n += 4) {
13531 for (size_t k = 1; k <= 40; k += 9) {
13532 for (uint32_t m = 1; m <= 4; m++) {
13533 GemmMicrokernelTester()
13534 .mr(4)
13535 .nr(4)
13536 .kr(2)
13537 .sr(4)
13538 .m(m)
13539 .n(n)
13540 .k(k)
13541 .iterations(1)
13542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13543 }
13544 }
13545 }
13546 }
13547
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)13548 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
13549 TEST_REQUIRES_X86_SSE2;
13550 for (size_t k = 1; k <= 40; k += 9) {
13551 for (uint32_t n = 1; n <= 4; n++) {
13552 for (uint32_t m = 1; m <= 4; m++) {
13553 GemmMicrokernelTester()
13554 .mr(4)
13555 .nr(4)
13556 .kr(2)
13557 .sr(4)
13558 .m(m)
13559 .n(n)
13560 .k(k)
13561 .cm_stride(7)
13562 .iterations(1)
13563 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13564 }
13565 }
13566 }
13567 }
13568
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)13569 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
13570 TEST_REQUIRES_X86_SSE2;
13571 GemmMicrokernelTester()
13572 .mr(4)
13573 .nr(4)
13574 .kr(2)
13575 .sr(4)
13576 .m(4)
13577 .n(4)
13578 .k(8)
13579 .qmin(128)
13580 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13581 }
13582
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)13583 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
13584 TEST_REQUIRES_X86_SSE2;
13585 GemmMicrokernelTester()
13586 .mr(4)
13587 .nr(4)
13588 .kr(2)
13589 .sr(4)
13590 .m(4)
13591 .n(4)
13592 .k(8)
13593 .qmax(128)
13594 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13595 }
13596
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)13597 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
13598 TEST_REQUIRES_X86_SSE2;
13599 GemmMicrokernelTester()
13600 .mr(4)
13601 .nr(4)
13602 .kr(2)
13603 .sr(4)
13604 .m(4)
13605 .n(4)
13606 .k(8)
13607 .cm_stride(7)
13608 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13609 }
13610
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_a_zero_point)13611 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_a_zero_point) {
13612 TEST_REQUIRES_X86_SSE2;
13613 for (size_t k = 1; k <= 40; k += 9) {
13614 GemmMicrokernelTester()
13615 .mr(4)
13616 .nr(4)
13617 .kr(2)
13618 .sr(4)
13619 .m(4)
13620 .n(4)
13621 .k(k)
13622 .a_zero_point(0)
13623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13624 }
13625 }
13626
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_b_zero_point)13627 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_b_zero_point) {
13628 TEST_REQUIRES_X86_SSE2;
13629 for (size_t k = 1; k <= 40; k += 9) {
13630 GemmMicrokernelTester()
13631 .mr(4)
13632 .nr(4)
13633 .kr(2)
13634 .sr(4)
13635 .m(4)
13636 .n(4)
13637 .k(k)
13638 .b_zero_point(0)
13639 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13640 }
13641 }
13642
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_zero_point)13643 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_zero_point) {
13644 TEST_REQUIRES_X86_SSE2;
13645 for (size_t k = 1; k <= 40; k += 9) {
13646 GemmMicrokernelTester()
13647 .mr(4)
13648 .nr(4)
13649 .kr(2)
13650 .sr(4)
13651 .m(4)
13652 .n(4)
13653 .k(k)
13654 .a_zero_point(0)
13655 .b_zero_point(0)
13656 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13657 }
13658 }
13659 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13660
13661
13662 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)13663 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
13664 TEST_REQUIRES_X86_AVX;
13665 GemmMicrokernelTester()
13666 .mr(3)
13667 .nr(4)
13668 .kr(2)
13669 .sr(4)
13670 .m(3)
13671 .n(4)
13672 .k(8)
13673 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13674 }
13675
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)13676 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
13677 TEST_REQUIRES_X86_AVX;
13678 GemmMicrokernelTester()
13679 .mr(3)
13680 .nr(4)
13681 .kr(2)
13682 .sr(4)
13683 .m(3)
13684 .n(4)
13685 .k(8)
13686 .cn_stride(7)
13687 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13688 }
13689
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_strided_a)13690 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_strided_a) {
13691 TEST_REQUIRES_X86_AVX;
13692 GemmMicrokernelTester()
13693 .mr(3)
13694 .nr(4)
13695 .kr(2)
13696 .sr(4)
13697 .m(3)
13698 .n(4)
13699 .k(8)
13700 .a_stride(11)
13701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13702 }
13703
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)13704 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
13705 TEST_REQUIRES_X86_AVX;
13706 for (uint32_t n = 1; n <= 4; n++) {
13707 for (uint32_t m = 1; m <= 3; m++) {
13708 GemmMicrokernelTester()
13709 .mr(3)
13710 .nr(4)
13711 .kr(2)
13712 .sr(4)
13713 .m(m)
13714 .n(n)
13715 .k(8)
13716 .iterations(1)
13717 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13718 }
13719 }
13720 }
13721
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)13722 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
13723 TEST_REQUIRES_X86_AVX;
13724 for (uint32_t m = 1; m <= 3; m++) {
13725 GemmMicrokernelTester()
13726 .mr(3)
13727 .nr(4)
13728 .kr(2)
13729 .sr(4)
13730 .m(m)
13731 .n(4)
13732 .k(8)
13733 .iterations(1)
13734 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13735 }
13736 }
13737
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)13738 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
13739 TEST_REQUIRES_X86_AVX;
13740 for (uint32_t n = 1; n <= 4; n++) {
13741 GemmMicrokernelTester()
13742 .mr(3)
13743 .nr(4)
13744 .kr(2)
13745 .sr(4)
13746 .m(3)
13747 .n(n)
13748 .k(8)
13749 .iterations(1)
13750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13751 }
13752 }
13753
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)13754 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
13755 TEST_REQUIRES_X86_AVX;
13756 for (size_t k = 1; k < 8; k++) {
13757 GemmMicrokernelTester()
13758 .mr(3)
13759 .nr(4)
13760 .kr(2)
13761 .sr(4)
13762 .m(3)
13763 .n(4)
13764 .k(k)
13765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13766 }
13767 }
13768
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_strided_a)13769 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_strided_a) {
13770 TEST_REQUIRES_X86_AVX;
13771 for (size_t k = 1; k < 8; k++) {
13772 GemmMicrokernelTester()
13773 .mr(3)
13774 .nr(4)
13775 .kr(2)
13776 .sr(4)
13777 .m(3)
13778 .n(4)
13779 .k(k)
13780 .a_stride(11)
13781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13782 }
13783 }
13784
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)13785 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
13786 TEST_REQUIRES_X86_AVX;
13787 for (size_t k = 1; k < 8; k++) {
13788 for (uint32_t n = 1; n <= 4; n++) {
13789 for (uint32_t m = 1; m <= 3; m++) {
13790 GemmMicrokernelTester()
13791 .mr(3)
13792 .nr(4)
13793 .kr(2)
13794 .sr(4)
13795 .m(m)
13796 .n(n)
13797 .k(k)
13798 .iterations(1)
13799 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13800 }
13801 }
13802 }
13803 }
13804
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)13805 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
13806 TEST_REQUIRES_X86_AVX;
13807 for (size_t k = 9; k < 16; k++) {
13808 GemmMicrokernelTester()
13809 .mr(3)
13810 .nr(4)
13811 .kr(2)
13812 .sr(4)
13813 .m(3)
13814 .n(4)
13815 .k(k)
13816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13817 }
13818 }
13819
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_strided_a)13820 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_strided_a) {
13821 TEST_REQUIRES_X86_AVX;
13822 for (size_t k = 9; k < 16; k++) {
13823 GemmMicrokernelTester()
13824 .mr(3)
13825 .nr(4)
13826 .kr(2)
13827 .sr(4)
13828 .m(3)
13829 .n(4)
13830 .k(k)
13831 .a_stride(19)
13832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13833 }
13834 }
13835
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)13836 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
13837 TEST_REQUIRES_X86_AVX;
13838 for (size_t k = 9; k < 16; k++) {
13839 for (uint32_t n = 1; n <= 4; n++) {
13840 for (uint32_t m = 1; m <= 3; m++) {
13841 GemmMicrokernelTester()
13842 .mr(3)
13843 .nr(4)
13844 .kr(2)
13845 .sr(4)
13846 .m(m)
13847 .n(n)
13848 .k(k)
13849 .iterations(1)
13850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13851 }
13852 }
13853 }
13854 }
13855
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)13856 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
13857 TEST_REQUIRES_X86_AVX;
13858 for (size_t k = 16; k <= 80; k += 8) {
13859 GemmMicrokernelTester()
13860 .mr(3)
13861 .nr(4)
13862 .kr(2)
13863 .sr(4)
13864 .m(3)
13865 .n(4)
13866 .k(k)
13867 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13868 }
13869 }
13870
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_strided_a)13871 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_strided_a) {
13872 TEST_REQUIRES_X86_AVX;
13873 for (size_t k = 16; k <= 80; k += 8) {
13874 GemmMicrokernelTester()
13875 .mr(3)
13876 .nr(4)
13877 .kr(2)
13878 .sr(4)
13879 .m(3)
13880 .n(4)
13881 .k(k)
13882 .a_stride(83)
13883 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13884 }
13885 }
13886
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)13887 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
13888 TEST_REQUIRES_X86_AVX;
13889 for (size_t k = 16; k <= 80; k += 8) {
13890 for (uint32_t n = 1; n <= 4; n++) {
13891 for (uint32_t m = 1; m <= 3; m++) {
13892 GemmMicrokernelTester()
13893 .mr(3)
13894 .nr(4)
13895 .kr(2)
13896 .sr(4)
13897 .m(m)
13898 .n(n)
13899 .k(k)
13900 .iterations(1)
13901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13902 }
13903 }
13904 }
13905 }
13906
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)13907 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
13908 TEST_REQUIRES_X86_AVX;
13909 for (uint32_t n = 5; n < 8; n++) {
13910 for (size_t k = 1; k <= 40; k += 9) {
13911 GemmMicrokernelTester()
13912 .mr(3)
13913 .nr(4)
13914 .kr(2)
13915 .sr(4)
13916 .m(3)
13917 .n(n)
13918 .k(k)
13919 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13920 }
13921 }
13922 }
13923
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)13924 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
13925 TEST_REQUIRES_X86_AVX;
13926 for (uint32_t n = 5; n < 8; n++) {
13927 for (size_t k = 1; k <= 40; k += 9) {
13928 GemmMicrokernelTester()
13929 .mr(3)
13930 .nr(4)
13931 .kr(2)
13932 .sr(4)
13933 .m(3)
13934 .n(n)
13935 .k(k)
13936 .cn_stride(7)
13937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13938 }
13939 }
13940 }
13941
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_a)13942 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_a) {
13943 TEST_REQUIRES_X86_AVX;
13944 for (uint32_t n = 5; n < 8; n++) {
13945 for (size_t k = 1; k <= 40; k += 9) {
13946 GemmMicrokernelTester()
13947 .mr(3)
13948 .nr(4)
13949 .kr(2)
13950 .sr(4)
13951 .m(3)
13952 .n(n)
13953 .k(k)
13954 .a_stride(43)
13955 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13956 }
13957 }
13958 }
13959
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)13960 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
13961 TEST_REQUIRES_X86_AVX;
13962 for (uint32_t n = 5; n < 8; n++) {
13963 for (size_t k = 1; k <= 40; k += 9) {
13964 for (uint32_t m = 1; m <= 3; m++) {
13965 GemmMicrokernelTester()
13966 .mr(3)
13967 .nr(4)
13968 .kr(2)
13969 .sr(4)
13970 .m(m)
13971 .n(n)
13972 .k(k)
13973 .iterations(1)
13974 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13975 }
13976 }
13977 }
13978 }
13979
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)13980 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
13981 TEST_REQUIRES_X86_AVX;
13982 for (uint32_t n = 8; n <= 12; n += 4) {
13983 for (size_t k = 1; k <= 40; k += 9) {
13984 GemmMicrokernelTester()
13985 .mr(3)
13986 .nr(4)
13987 .kr(2)
13988 .sr(4)
13989 .m(3)
13990 .n(n)
13991 .k(k)
13992 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13993 }
13994 }
13995 }
13996
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)13997 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
13998 TEST_REQUIRES_X86_AVX;
13999 for (uint32_t n = 8; n <= 12; n += 4) {
14000 for (size_t k = 1; k <= 40; k += 9) {
14001 GemmMicrokernelTester()
14002 .mr(3)
14003 .nr(4)
14004 .kr(2)
14005 .sr(4)
14006 .m(3)
14007 .n(n)
14008 .k(k)
14009 .cn_stride(7)
14010 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14011 }
14012 }
14013 }
14014
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_a)14015 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_a) {
14016 TEST_REQUIRES_X86_AVX;
14017 for (uint32_t n = 8; n <= 12; n += 4) {
14018 for (size_t k = 1; k <= 40; k += 9) {
14019 GemmMicrokernelTester()
14020 .mr(3)
14021 .nr(4)
14022 .kr(2)
14023 .sr(4)
14024 .m(3)
14025 .n(n)
14026 .k(k)
14027 .a_stride(43)
14028 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14029 }
14030 }
14031 }
14032
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)14033 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
14034 TEST_REQUIRES_X86_AVX;
14035 for (uint32_t n = 8; n <= 12; n += 4) {
14036 for (size_t k = 1; k <= 40; k += 9) {
14037 for (uint32_t m = 1; m <= 3; m++) {
14038 GemmMicrokernelTester()
14039 .mr(3)
14040 .nr(4)
14041 .kr(2)
14042 .sr(4)
14043 .m(m)
14044 .n(n)
14045 .k(k)
14046 .iterations(1)
14047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14048 }
14049 }
14050 }
14051 }
14052
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)14053 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
14054 TEST_REQUIRES_X86_AVX;
14055 for (size_t k = 1; k <= 40; k += 9) {
14056 for (uint32_t n = 1; n <= 4; n++) {
14057 for (uint32_t m = 1; m <= 3; m++) {
14058 GemmMicrokernelTester()
14059 .mr(3)
14060 .nr(4)
14061 .kr(2)
14062 .sr(4)
14063 .m(m)
14064 .n(n)
14065 .k(k)
14066 .cm_stride(7)
14067 .iterations(1)
14068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14069 }
14070 }
14071 }
14072 }
14073
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)14074 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
14075 TEST_REQUIRES_X86_AVX;
14076 GemmMicrokernelTester()
14077 .mr(3)
14078 .nr(4)
14079 .kr(2)
14080 .sr(4)
14081 .m(3)
14082 .n(4)
14083 .k(8)
14084 .qmin(128)
14085 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14086 }
14087
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)14088 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
14089 TEST_REQUIRES_X86_AVX;
14090 GemmMicrokernelTester()
14091 .mr(3)
14092 .nr(4)
14093 .kr(2)
14094 .sr(4)
14095 .m(3)
14096 .n(4)
14097 .k(8)
14098 .qmax(128)
14099 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14100 }
14101
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)14102 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
14103 TEST_REQUIRES_X86_AVX;
14104 GemmMicrokernelTester()
14105 .mr(3)
14106 .nr(4)
14107 .kr(2)
14108 .sr(4)
14109 .m(3)
14110 .n(4)
14111 .k(8)
14112 .cm_stride(7)
14113 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14114 }
14115
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_a_zero_point)14116 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_a_zero_point) {
14117 TEST_REQUIRES_X86_AVX;
14118 for (size_t k = 1; k <= 40; k += 9) {
14119 GemmMicrokernelTester()
14120 .mr(3)
14121 .nr(4)
14122 .kr(2)
14123 .sr(4)
14124 .m(3)
14125 .n(4)
14126 .k(k)
14127 .a_zero_point(0)
14128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14129 }
14130 }
14131
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_b_zero_point)14132 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_b_zero_point) {
14133 TEST_REQUIRES_X86_AVX;
14134 for (size_t k = 1; k <= 40; k += 9) {
14135 GemmMicrokernelTester()
14136 .mr(3)
14137 .nr(4)
14138 .kr(2)
14139 .sr(4)
14140 .m(3)
14141 .n(4)
14142 .k(k)
14143 .b_zero_point(0)
14144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14145 }
14146 }
14147
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_zero_point)14148 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_zero_point) {
14149 TEST_REQUIRES_X86_AVX;
14150 for (size_t k = 1; k <= 40; k += 9) {
14151 GemmMicrokernelTester()
14152 .mr(3)
14153 .nr(4)
14154 .kr(2)
14155 .sr(4)
14156 .m(3)
14157 .n(4)
14158 .k(k)
14159 .a_zero_point(0)
14160 .b_zero_point(0)
14161 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14162 }
14163 }
14164 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14165
14166
14167 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)14168 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
14169 TEST_REQUIRES_X86_XOP;
14170 GemmMicrokernelTester()
14171 .mr(3)
14172 .nr(4)
14173 .kr(2)
14174 .sr(4)
14175 .m(3)
14176 .n(4)
14177 .k(8)
14178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14179 }
14180
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)14181 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
14182 TEST_REQUIRES_X86_XOP;
14183 GemmMicrokernelTester()
14184 .mr(3)
14185 .nr(4)
14186 .kr(2)
14187 .sr(4)
14188 .m(3)
14189 .n(4)
14190 .k(8)
14191 .cn_stride(7)
14192 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14193 }
14194
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_strided_a)14195 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_strided_a) {
14196 TEST_REQUIRES_X86_XOP;
14197 GemmMicrokernelTester()
14198 .mr(3)
14199 .nr(4)
14200 .kr(2)
14201 .sr(4)
14202 .m(3)
14203 .n(4)
14204 .k(8)
14205 .a_stride(11)
14206 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14207 }
14208
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)14209 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
14210 TEST_REQUIRES_X86_XOP;
14211 for (uint32_t n = 1; n <= 4; n++) {
14212 for (uint32_t m = 1; m <= 3; m++) {
14213 GemmMicrokernelTester()
14214 .mr(3)
14215 .nr(4)
14216 .kr(2)
14217 .sr(4)
14218 .m(m)
14219 .n(n)
14220 .k(8)
14221 .iterations(1)
14222 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14223 }
14224 }
14225 }
14226
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)14227 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
14228 TEST_REQUIRES_X86_XOP;
14229 for (uint32_t m = 1; m <= 3; m++) {
14230 GemmMicrokernelTester()
14231 .mr(3)
14232 .nr(4)
14233 .kr(2)
14234 .sr(4)
14235 .m(m)
14236 .n(4)
14237 .k(8)
14238 .iterations(1)
14239 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14240 }
14241 }
14242
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)14243 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
14244 TEST_REQUIRES_X86_XOP;
14245 for (uint32_t n = 1; n <= 4; n++) {
14246 GemmMicrokernelTester()
14247 .mr(3)
14248 .nr(4)
14249 .kr(2)
14250 .sr(4)
14251 .m(3)
14252 .n(n)
14253 .k(8)
14254 .iterations(1)
14255 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14256 }
14257 }
14258
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)14259 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
14260 TEST_REQUIRES_X86_XOP;
14261 for (size_t k = 1; k < 8; k++) {
14262 GemmMicrokernelTester()
14263 .mr(3)
14264 .nr(4)
14265 .kr(2)
14266 .sr(4)
14267 .m(3)
14268 .n(4)
14269 .k(k)
14270 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14271 }
14272 }
14273
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_strided_a)14274 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_strided_a) {
14275 TEST_REQUIRES_X86_XOP;
14276 for (size_t k = 1; k < 8; k++) {
14277 GemmMicrokernelTester()
14278 .mr(3)
14279 .nr(4)
14280 .kr(2)
14281 .sr(4)
14282 .m(3)
14283 .n(4)
14284 .k(k)
14285 .a_stride(11)
14286 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14287 }
14288 }
14289
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)14290 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
14291 TEST_REQUIRES_X86_XOP;
14292 for (size_t k = 1; k < 8; k++) {
14293 for (uint32_t n = 1; n <= 4; n++) {
14294 for (uint32_t m = 1; m <= 3; m++) {
14295 GemmMicrokernelTester()
14296 .mr(3)
14297 .nr(4)
14298 .kr(2)
14299 .sr(4)
14300 .m(m)
14301 .n(n)
14302 .k(k)
14303 .iterations(1)
14304 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14305 }
14306 }
14307 }
14308 }
14309
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)14310 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
14311 TEST_REQUIRES_X86_XOP;
14312 for (size_t k = 9; k < 16; k++) {
14313 GemmMicrokernelTester()
14314 .mr(3)
14315 .nr(4)
14316 .kr(2)
14317 .sr(4)
14318 .m(3)
14319 .n(4)
14320 .k(k)
14321 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14322 }
14323 }
14324
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_strided_a)14325 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_strided_a) {
14326 TEST_REQUIRES_X86_XOP;
14327 for (size_t k = 9; k < 16; k++) {
14328 GemmMicrokernelTester()
14329 .mr(3)
14330 .nr(4)
14331 .kr(2)
14332 .sr(4)
14333 .m(3)
14334 .n(4)
14335 .k(k)
14336 .a_stride(19)
14337 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14338 }
14339 }
14340
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)14341 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
14342 TEST_REQUIRES_X86_XOP;
14343 for (size_t k = 9; k < 16; k++) {
14344 for (uint32_t n = 1; n <= 4; n++) {
14345 for (uint32_t m = 1; m <= 3; m++) {
14346 GemmMicrokernelTester()
14347 .mr(3)
14348 .nr(4)
14349 .kr(2)
14350 .sr(4)
14351 .m(m)
14352 .n(n)
14353 .k(k)
14354 .iterations(1)
14355 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14356 }
14357 }
14358 }
14359 }
14360
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)14361 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
14362 TEST_REQUIRES_X86_XOP;
14363 for (size_t k = 16; k <= 80; k += 8) {
14364 GemmMicrokernelTester()
14365 .mr(3)
14366 .nr(4)
14367 .kr(2)
14368 .sr(4)
14369 .m(3)
14370 .n(4)
14371 .k(k)
14372 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14373 }
14374 }
14375
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_strided_a)14376 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_strided_a) {
14377 TEST_REQUIRES_X86_XOP;
14378 for (size_t k = 16; k <= 80; k += 8) {
14379 GemmMicrokernelTester()
14380 .mr(3)
14381 .nr(4)
14382 .kr(2)
14383 .sr(4)
14384 .m(3)
14385 .n(4)
14386 .k(k)
14387 .a_stride(83)
14388 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14389 }
14390 }
14391
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)14392 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
14393 TEST_REQUIRES_X86_XOP;
14394 for (size_t k = 16; k <= 80; k += 8) {
14395 for (uint32_t n = 1; n <= 4; n++) {
14396 for (uint32_t m = 1; m <= 3; m++) {
14397 GemmMicrokernelTester()
14398 .mr(3)
14399 .nr(4)
14400 .kr(2)
14401 .sr(4)
14402 .m(m)
14403 .n(n)
14404 .k(k)
14405 .iterations(1)
14406 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14407 }
14408 }
14409 }
14410 }
14411
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)14412 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
14413 TEST_REQUIRES_X86_XOP;
14414 for (uint32_t n = 5; n < 8; n++) {
14415 for (size_t k = 1; k <= 40; k += 9) {
14416 GemmMicrokernelTester()
14417 .mr(3)
14418 .nr(4)
14419 .kr(2)
14420 .sr(4)
14421 .m(3)
14422 .n(n)
14423 .k(k)
14424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14425 }
14426 }
14427 }
14428
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)14429 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
14430 TEST_REQUIRES_X86_XOP;
14431 for (uint32_t n = 5; n < 8; n++) {
14432 for (size_t k = 1; k <= 40; k += 9) {
14433 GemmMicrokernelTester()
14434 .mr(3)
14435 .nr(4)
14436 .kr(2)
14437 .sr(4)
14438 .m(3)
14439 .n(n)
14440 .k(k)
14441 .cn_stride(7)
14442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14443 }
14444 }
14445 }
14446
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_a)14447 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_a) {
14448 TEST_REQUIRES_X86_XOP;
14449 for (uint32_t n = 5; n < 8; n++) {
14450 for (size_t k = 1; k <= 40; k += 9) {
14451 GemmMicrokernelTester()
14452 .mr(3)
14453 .nr(4)
14454 .kr(2)
14455 .sr(4)
14456 .m(3)
14457 .n(n)
14458 .k(k)
14459 .a_stride(43)
14460 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14461 }
14462 }
14463 }
14464
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)14465 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
14466 TEST_REQUIRES_X86_XOP;
14467 for (uint32_t n = 5; n < 8; n++) {
14468 for (size_t k = 1; k <= 40; k += 9) {
14469 for (uint32_t m = 1; m <= 3; m++) {
14470 GemmMicrokernelTester()
14471 .mr(3)
14472 .nr(4)
14473 .kr(2)
14474 .sr(4)
14475 .m(m)
14476 .n(n)
14477 .k(k)
14478 .iterations(1)
14479 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14480 }
14481 }
14482 }
14483 }
14484
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)14485 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
14486 TEST_REQUIRES_X86_XOP;
14487 for (uint32_t n = 8; n <= 12; n += 4) {
14488 for (size_t k = 1; k <= 40; k += 9) {
14489 GemmMicrokernelTester()
14490 .mr(3)
14491 .nr(4)
14492 .kr(2)
14493 .sr(4)
14494 .m(3)
14495 .n(n)
14496 .k(k)
14497 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14498 }
14499 }
14500 }
14501
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)14502 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
14503 TEST_REQUIRES_X86_XOP;
14504 for (uint32_t n = 8; n <= 12; n += 4) {
14505 for (size_t k = 1; k <= 40; k += 9) {
14506 GemmMicrokernelTester()
14507 .mr(3)
14508 .nr(4)
14509 .kr(2)
14510 .sr(4)
14511 .m(3)
14512 .n(n)
14513 .k(k)
14514 .cn_stride(7)
14515 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14516 }
14517 }
14518 }
14519
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_a)14520 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_a) {
14521 TEST_REQUIRES_X86_XOP;
14522 for (uint32_t n = 8; n <= 12; n += 4) {
14523 for (size_t k = 1; k <= 40; k += 9) {
14524 GemmMicrokernelTester()
14525 .mr(3)
14526 .nr(4)
14527 .kr(2)
14528 .sr(4)
14529 .m(3)
14530 .n(n)
14531 .k(k)
14532 .a_stride(43)
14533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14534 }
14535 }
14536 }
14537
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)14538 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
14539 TEST_REQUIRES_X86_XOP;
14540 for (uint32_t n = 8; n <= 12; n += 4) {
14541 for (size_t k = 1; k <= 40; k += 9) {
14542 for (uint32_t m = 1; m <= 3; m++) {
14543 GemmMicrokernelTester()
14544 .mr(3)
14545 .nr(4)
14546 .kr(2)
14547 .sr(4)
14548 .m(m)
14549 .n(n)
14550 .k(k)
14551 .iterations(1)
14552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14553 }
14554 }
14555 }
14556 }
14557
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)14558 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
14559 TEST_REQUIRES_X86_XOP;
14560 for (size_t k = 1; k <= 40; k += 9) {
14561 for (uint32_t n = 1; n <= 4; n++) {
14562 for (uint32_t m = 1; m <= 3; m++) {
14563 GemmMicrokernelTester()
14564 .mr(3)
14565 .nr(4)
14566 .kr(2)
14567 .sr(4)
14568 .m(m)
14569 .n(n)
14570 .k(k)
14571 .cm_stride(7)
14572 .iterations(1)
14573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14574 }
14575 }
14576 }
14577 }
14578
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)14579 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
14580 TEST_REQUIRES_X86_XOP;
14581 GemmMicrokernelTester()
14582 .mr(3)
14583 .nr(4)
14584 .kr(2)
14585 .sr(4)
14586 .m(3)
14587 .n(4)
14588 .k(8)
14589 .qmin(128)
14590 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14591 }
14592
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)14593 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
14594 TEST_REQUIRES_X86_XOP;
14595 GemmMicrokernelTester()
14596 .mr(3)
14597 .nr(4)
14598 .kr(2)
14599 .sr(4)
14600 .m(3)
14601 .n(4)
14602 .k(8)
14603 .qmax(128)
14604 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14605 }
14606
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)14607 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
14608 TEST_REQUIRES_X86_XOP;
14609 GemmMicrokernelTester()
14610 .mr(3)
14611 .nr(4)
14612 .kr(2)
14613 .sr(4)
14614 .m(3)
14615 .n(4)
14616 .k(8)
14617 .cm_stride(7)
14618 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14619 }
14620
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_a_zero_point)14621 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_a_zero_point) {
14622 TEST_REQUIRES_X86_XOP;
14623 for (size_t k = 1; k <= 40; k += 9) {
14624 GemmMicrokernelTester()
14625 .mr(3)
14626 .nr(4)
14627 .kr(2)
14628 .sr(4)
14629 .m(3)
14630 .n(4)
14631 .k(k)
14632 .a_zero_point(0)
14633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14634 }
14635 }
14636
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_b_zero_point)14637 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_b_zero_point) {
14638 TEST_REQUIRES_X86_XOP;
14639 for (size_t k = 1; k <= 40; k += 9) {
14640 GemmMicrokernelTester()
14641 .mr(3)
14642 .nr(4)
14643 .kr(2)
14644 .sr(4)
14645 .m(3)
14646 .n(4)
14647 .k(k)
14648 .b_zero_point(0)
14649 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14650 }
14651 }
14652
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_zero_point)14653 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_zero_point) {
14654 TEST_REQUIRES_X86_XOP;
14655 for (size_t k = 1; k <= 40; k += 9) {
14656 GemmMicrokernelTester()
14657 .mr(3)
14658 .nr(4)
14659 .kr(2)
14660 .sr(4)
14661 .m(3)
14662 .n(4)
14663 .k(k)
14664 .a_zero_point(0)
14665 .b_zero_point(0)
14666 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14667 }
14668 }
14669 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14670
14671
14672 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)14673 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
14674 TEST_REQUIRES_X86_SSE2;
14675 GemmMicrokernelTester()
14676 .mr(1)
14677 .nr(4)
14678 .kr(2)
14679 .sr(4)
14680 .m(1)
14681 .n(4)
14682 .k(8)
14683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14684 }
14685
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)14686 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
14687 TEST_REQUIRES_X86_SSE2;
14688 GemmMicrokernelTester()
14689 .mr(1)
14690 .nr(4)
14691 .kr(2)
14692 .sr(4)
14693 .m(1)
14694 .n(4)
14695 .k(8)
14696 .cn_stride(7)
14697 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14698 }
14699
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_strided_a)14700 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
14701 TEST_REQUIRES_X86_SSE2;
14702 GemmMicrokernelTester()
14703 .mr(1)
14704 .nr(4)
14705 .kr(2)
14706 .sr(4)
14707 .m(1)
14708 .n(4)
14709 .k(8)
14710 .a_stride(11)
14711 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14712 }
14713
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)14714 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
14715 TEST_REQUIRES_X86_SSE2;
14716 for (uint32_t n = 1; n <= 4; n++) {
14717 for (uint32_t m = 1; m <= 1; m++) {
14718 GemmMicrokernelTester()
14719 .mr(1)
14720 .nr(4)
14721 .kr(2)
14722 .sr(4)
14723 .m(m)
14724 .n(n)
14725 .k(8)
14726 .iterations(1)
14727 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14728 }
14729 }
14730 }
14731
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)14732 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
14733 TEST_REQUIRES_X86_SSE2;
14734 for (uint32_t m = 1; m <= 1; m++) {
14735 GemmMicrokernelTester()
14736 .mr(1)
14737 .nr(4)
14738 .kr(2)
14739 .sr(4)
14740 .m(m)
14741 .n(4)
14742 .k(8)
14743 .iterations(1)
14744 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14745 }
14746 }
14747
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)14748 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
14749 TEST_REQUIRES_X86_SSE2;
14750 for (uint32_t n = 1; n <= 4; n++) {
14751 GemmMicrokernelTester()
14752 .mr(1)
14753 .nr(4)
14754 .kr(2)
14755 .sr(4)
14756 .m(1)
14757 .n(n)
14758 .k(8)
14759 .iterations(1)
14760 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14761 }
14762 }
14763
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)14764 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
14765 TEST_REQUIRES_X86_SSE2;
14766 for (size_t k = 1; k < 8; k++) {
14767 GemmMicrokernelTester()
14768 .mr(1)
14769 .nr(4)
14770 .kr(2)
14771 .sr(4)
14772 .m(1)
14773 .n(4)
14774 .k(k)
14775 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14776 }
14777 }
14778
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_strided_a)14779 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
14780 TEST_REQUIRES_X86_SSE2;
14781 for (size_t k = 1; k < 8; k++) {
14782 GemmMicrokernelTester()
14783 .mr(1)
14784 .nr(4)
14785 .kr(2)
14786 .sr(4)
14787 .m(1)
14788 .n(4)
14789 .k(k)
14790 .a_stride(11)
14791 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14792 }
14793 }
14794
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)14795 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
14796 TEST_REQUIRES_X86_SSE2;
14797 for (size_t k = 1; k < 8; k++) {
14798 for (uint32_t n = 1; n <= 4; n++) {
14799 for (uint32_t m = 1; m <= 1; m++) {
14800 GemmMicrokernelTester()
14801 .mr(1)
14802 .nr(4)
14803 .kr(2)
14804 .sr(4)
14805 .m(m)
14806 .n(n)
14807 .k(k)
14808 .iterations(1)
14809 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14810 }
14811 }
14812 }
14813 }
14814
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)14815 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
14816 TEST_REQUIRES_X86_SSE2;
14817 for (size_t k = 9; k < 16; k++) {
14818 GemmMicrokernelTester()
14819 .mr(1)
14820 .nr(4)
14821 .kr(2)
14822 .sr(4)
14823 .m(1)
14824 .n(4)
14825 .k(k)
14826 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14827 }
14828 }
14829
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_strided_a)14830 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
14831 TEST_REQUIRES_X86_SSE2;
14832 for (size_t k = 9; k < 16; k++) {
14833 GemmMicrokernelTester()
14834 .mr(1)
14835 .nr(4)
14836 .kr(2)
14837 .sr(4)
14838 .m(1)
14839 .n(4)
14840 .k(k)
14841 .a_stride(19)
14842 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14843 }
14844 }
14845
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)14846 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
14847 TEST_REQUIRES_X86_SSE2;
14848 for (size_t k = 9; k < 16; k++) {
14849 for (uint32_t n = 1; n <= 4; n++) {
14850 for (uint32_t m = 1; m <= 1; m++) {
14851 GemmMicrokernelTester()
14852 .mr(1)
14853 .nr(4)
14854 .kr(2)
14855 .sr(4)
14856 .m(m)
14857 .n(n)
14858 .k(k)
14859 .iterations(1)
14860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14861 }
14862 }
14863 }
14864 }
14865
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)14866 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
14867 TEST_REQUIRES_X86_SSE2;
14868 for (size_t k = 16; k <= 80; k += 8) {
14869 GemmMicrokernelTester()
14870 .mr(1)
14871 .nr(4)
14872 .kr(2)
14873 .sr(4)
14874 .m(1)
14875 .n(4)
14876 .k(k)
14877 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14878 }
14879 }
14880
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_strided_a)14881 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_strided_a) {
14882 TEST_REQUIRES_X86_SSE2;
14883 for (size_t k = 16; k <= 80; k += 8) {
14884 GemmMicrokernelTester()
14885 .mr(1)
14886 .nr(4)
14887 .kr(2)
14888 .sr(4)
14889 .m(1)
14890 .n(4)
14891 .k(k)
14892 .a_stride(83)
14893 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14894 }
14895 }
14896
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)14897 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
14898 TEST_REQUIRES_X86_SSE2;
14899 for (size_t k = 16; k <= 80; k += 8) {
14900 for (uint32_t n = 1; n <= 4; n++) {
14901 for (uint32_t m = 1; m <= 1; m++) {
14902 GemmMicrokernelTester()
14903 .mr(1)
14904 .nr(4)
14905 .kr(2)
14906 .sr(4)
14907 .m(m)
14908 .n(n)
14909 .k(k)
14910 .iterations(1)
14911 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14912 }
14913 }
14914 }
14915 }
14916
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)14917 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
14918 TEST_REQUIRES_X86_SSE2;
14919 for (uint32_t n = 5; n < 8; n++) {
14920 for (size_t k = 1; k <= 40; k += 9) {
14921 GemmMicrokernelTester()
14922 .mr(1)
14923 .nr(4)
14924 .kr(2)
14925 .sr(4)
14926 .m(1)
14927 .n(n)
14928 .k(k)
14929 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14930 }
14931 }
14932 }
14933
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)14934 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
14935 TEST_REQUIRES_X86_SSE2;
14936 for (uint32_t n = 5; n < 8; n++) {
14937 for (size_t k = 1; k <= 40; k += 9) {
14938 GemmMicrokernelTester()
14939 .mr(1)
14940 .nr(4)
14941 .kr(2)
14942 .sr(4)
14943 .m(1)
14944 .n(n)
14945 .k(k)
14946 .cn_stride(7)
14947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14948 }
14949 }
14950 }
14951
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_a)14952 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
14953 TEST_REQUIRES_X86_SSE2;
14954 for (uint32_t n = 5; n < 8; n++) {
14955 for (size_t k = 1; k <= 40; k += 9) {
14956 GemmMicrokernelTester()
14957 .mr(1)
14958 .nr(4)
14959 .kr(2)
14960 .sr(4)
14961 .m(1)
14962 .n(n)
14963 .k(k)
14964 .a_stride(43)
14965 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14966 }
14967 }
14968 }
14969
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)14970 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
14971 TEST_REQUIRES_X86_SSE2;
14972 for (uint32_t n = 5; n < 8; n++) {
14973 for (size_t k = 1; k <= 40; k += 9) {
14974 for (uint32_t m = 1; m <= 1; m++) {
14975 GemmMicrokernelTester()
14976 .mr(1)
14977 .nr(4)
14978 .kr(2)
14979 .sr(4)
14980 .m(m)
14981 .n(n)
14982 .k(k)
14983 .iterations(1)
14984 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14985 }
14986 }
14987 }
14988 }
14989
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)14990 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
14991 TEST_REQUIRES_X86_SSE2;
14992 for (uint32_t n = 8; n <= 12; n += 4) {
14993 for (size_t k = 1; k <= 40; k += 9) {
14994 GemmMicrokernelTester()
14995 .mr(1)
14996 .nr(4)
14997 .kr(2)
14998 .sr(4)
14999 .m(1)
15000 .n(n)
15001 .k(k)
15002 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15003 }
15004 }
15005 }
15006
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)15007 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
15008 TEST_REQUIRES_X86_SSE2;
15009 for (uint32_t n = 8; n <= 12; n += 4) {
15010 for (size_t k = 1; k <= 40; k += 9) {
15011 GemmMicrokernelTester()
15012 .mr(1)
15013 .nr(4)
15014 .kr(2)
15015 .sr(4)
15016 .m(1)
15017 .n(n)
15018 .k(k)
15019 .cn_stride(7)
15020 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15021 }
15022 }
15023 }
15024
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_a)15025 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_a) {
15026 TEST_REQUIRES_X86_SSE2;
15027 for (uint32_t n = 8; n <= 12; n += 4) {
15028 for (size_t k = 1; k <= 40; k += 9) {
15029 GemmMicrokernelTester()
15030 .mr(1)
15031 .nr(4)
15032 .kr(2)
15033 .sr(4)
15034 .m(1)
15035 .n(n)
15036 .k(k)
15037 .a_stride(43)
15038 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15039 }
15040 }
15041 }
15042
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)15043 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
15044 TEST_REQUIRES_X86_SSE2;
15045 for (uint32_t n = 8; n <= 12; n += 4) {
15046 for (size_t k = 1; k <= 40; k += 9) {
15047 for (uint32_t m = 1; m <= 1; m++) {
15048 GemmMicrokernelTester()
15049 .mr(1)
15050 .nr(4)
15051 .kr(2)
15052 .sr(4)
15053 .m(m)
15054 .n(n)
15055 .k(k)
15056 .iterations(1)
15057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15058 }
15059 }
15060 }
15061 }
15062
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)15063 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
15064 TEST_REQUIRES_X86_SSE2;
15065 for (size_t k = 1; k <= 40; k += 9) {
15066 for (uint32_t n = 1; n <= 4; n++) {
15067 for (uint32_t m = 1; m <= 1; m++) {
15068 GemmMicrokernelTester()
15069 .mr(1)
15070 .nr(4)
15071 .kr(2)
15072 .sr(4)
15073 .m(m)
15074 .n(n)
15075 .k(k)
15076 .cm_stride(7)
15077 .iterations(1)
15078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15079 }
15080 }
15081 }
15082 }
15083
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)15084 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
15085 TEST_REQUIRES_X86_SSE2;
15086 GemmMicrokernelTester()
15087 .mr(1)
15088 .nr(4)
15089 .kr(2)
15090 .sr(4)
15091 .m(1)
15092 .n(4)
15093 .k(8)
15094 .qmin(128)
15095 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15096 }
15097
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)15098 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
15099 TEST_REQUIRES_X86_SSE2;
15100 GemmMicrokernelTester()
15101 .mr(1)
15102 .nr(4)
15103 .kr(2)
15104 .sr(4)
15105 .m(1)
15106 .n(4)
15107 .k(8)
15108 .qmax(128)
15109 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15110 }
15111
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)15112 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
15113 TEST_REQUIRES_X86_SSE2;
15114 GemmMicrokernelTester()
15115 .mr(1)
15116 .nr(4)
15117 .kr(2)
15118 .sr(4)
15119 .m(1)
15120 .n(4)
15121 .k(8)
15122 .cm_stride(7)
15123 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15124 }
15125
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_a_zero_point)15126 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_a_zero_point) {
15127 TEST_REQUIRES_X86_SSE2;
15128 for (size_t k = 1; k <= 40; k += 9) {
15129 GemmMicrokernelTester()
15130 .mr(1)
15131 .nr(4)
15132 .kr(2)
15133 .sr(4)
15134 .m(1)
15135 .n(4)
15136 .k(k)
15137 .a_zero_point(0)
15138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15139 }
15140 }
15141
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_b_zero_point)15142 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_b_zero_point) {
15143 TEST_REQUIRES_X86_SSE2;
15144 for (size_t k = 1; k <= 40; k += 9) {
15145 GemmMicrokernelTester()
15146 .mr(1)
15147 .nr(4)
15148 .kr(2)
15149 .sr(4)
15150 .m(1)
15151 .n(4)
15152 .k(k)
15153 .b_zero_point(0)
15154 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15155 }
15156 }
15157
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_zero_point)15158 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_zero_point) {
15159 TEST_REQUIRES_X86_SSE2;
15160 for (size_t k = 1; k <= 40; k += 9) {
15161 GemmMicrokernelTester()
15162 .mr(1)
15163 .nr(4)
15164 .kr(2)
15165 .sr(4)
15166 .m(1)
15167 .n(4)
15168 .k(k)
15169 .a_zero_point(0)
15170 .b_zero_point(0)
15171 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15172 }
15173 }
15174 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15175
15176
15177 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)15178 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
15179 TEST_REQUIRES_X86_SSE2;
15180 GemmMicrokernelTester()
15181 .mr(4)
15182 .nr(4)
15183 .kr(2)
15184 .sr(4)
15185 .m(4)
15186 .n(4)
15187 .k(8)
15188 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15189 }
15190
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)15191 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
15192 TEST_REQUIRES_X86_SSE2;
15193 GemmMicrokernelTester()
15194 .mr(4)
15195 .nr(4)
15196 .kr(2)
15197 .sr(4)
15198 .m(4)
15199 .n(4)
15200 .k(8)
15201 .cn_stride(7)
15202 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15203 }
15204
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_strided_a)15205 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
15206 TEST_REQUIRES_X86_SSE2;
15207 GemmMicrokernelTester()
15208 .mr(4)
15209 .nr(4)
15210 .kr(2)
15211 .sr(4)
15212 .m(4)
15213 .n(4)
15214 .k(8)
15215 .a_stride(11)
15216 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15217 }
15218
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)15219 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
15220 TEST_REQUIRES_X86_SSE2;
15221 for (uint32_t n = 1; n <= 4; n++) {
15222 for (uint32_t m = 1; m <= 4; m++) {
15223 GemmMicrokernelTester()
15224 .mr(4)
15225 .nr(4)
15226 .kr(2)
15227 .sr(4)
15228 .m(m)
15229 .n(n)
15230 .k(8)
15231 .iterations(1)
15232 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15233 }
15234 }
15235 }
15236
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)15237 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
15238 TEST_REQUIRES_X86_SSE2;
15239 for (uint32_t m = 1; m <= 4; m++) {
15240 GemmMicrokernelTester()
15241 .mr(4)
15242 .nr(4)
15243 .kr(2)
15244 .sr(4)
15245 .m(m)
15246 .n(4)
15247 .k(8)
15248 .iterations(1)
15249 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15250 }
15251 }
15252
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)15253 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
15254 TEST_REQUIRES_X86_SSE2;
15255 for (uint32_t n = 1; n <= 4; n++) {
15256 GemmMicrokernelTester()
15257 .mr(4)
15258 .nr(4)
15259 .kr(2)
15260 .sr(4)
15261 .m(4)
15262 .n(n)
15263 .k(8)
15264 .iterations(1)
15265 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15266 }
15267 }
15268
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)15269 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
15270 TEST_REQUIRES_X86_SSE2;
15271 for (size_t k = 1; k < 8; k++) {
15272 GemmMicrokernelTester()
15273 .mr(4)
15274 .nr(4)
15275 .kr(2)
15276 .sr(4)
15277 .m(4)
15278 .n(4)
15279 .k(k)
15280 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15281 }
15282 }
15283
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_strided_a)15284 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
15285 TEST_REQUIRES_X86_SSE2;
15286 for (size_t k = 1; k < 8; k++) {
15287 GemmMicrokernelTester()
15288 .mr(4)
15289 .nr(4)
15290 .kr(2)
15291 .sr(4)
15292 .m(4)
15293 .n(4)
15294 .k(k)
15295 .a_stride(11)
15296 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15297 }
15298 }
15299
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)15300 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
15301 TEST_REQUIRES_X86_SSE2;
15302 for (size_t k = 1; k < 8; k++) {
15303 for (uint32_t n = 1; n <= 4; n++) {
15304 for (uint32_t m = 1; m <= 4; m++) {
15305 GemmMicrokernelTester()
15306 .mr(4)
15307 .nr(4)
15308 .kr(2)
15309 .sr(4)
15310 .m(m)
15311 .n(n)
15312 .k(k)
15313 .iterations(1)
15314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15315 }
15316 }
15317 }
15318 }
15319
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)15320 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
15321 TEST_REQUIRES_X86_SSE2;
15322 for (size_t k = 9; k < 16; k++) {
15323 GemmMicrokernelTester()
15324 .mr(4)
15325 .nr(4)
15326 .kr(2)
15327 .sr(4)
15328 .m(4)
15329 .n(4)
15330 .k(k)
15331 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15332 }
15333 }
15334
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_strided_a)15335 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
15336 TEST_REQUIRES_X86_SSE2;
15337 for (size_t k = 9; k < 16; k++) {
15338 GemmMicrokernelTester()
15339 .mr(4)
15340 .nr(4)
15341 .kr(2)
15342 .sr(4)
15343 .m(4)
15344 .n(4)
15345 .k(k)
15346 .a_stride(19)
15347 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15348 }
15349 }
15350
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)15351 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
15352 TEST_REQUIRES_X86_SSE2;
15353 for (size_t k = 9; k < 16; k++) {
15354 for (uint32_t n = 1; n <= 4; n++) {
15355 for (uint32_t m = 1; m <= 4; m++) {
15356 GemmMicrokernelTester()
15357 .mr(4)
15358 .nr(4)
15359 .kr(2)
15360 .sr(4)
15361 .m(m)
15362 .n(n)
15363 .k(k)
15364 .iterations(1)
15365 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15366 }
15367 }
15368 }
15369 }
15370
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)15371 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
15372 TEST_REQUIRES_X86_SSE2;
15373 for (size_t k = 16; k <= 80; k += 8) {
15374 GemmMicrokernelTester()
15375 .mr(4)
15376 .nr(4)
15377 .kr(2)
15378 .sr(4)
15379 .m(4)
15380 .n(4)
15381 .k(k)
15382 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15383 }
15384 }
15385
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_strided_a)15386 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_strided_a) {
15387 TEST_REQUIRES_X86_SSE2;
15388 for (size_t k = 16; k <= 80; k += 8) {
15389 GemmMicrokernelTester()
15390 .mr(4)
15391 .nr(4)
15392 .kr(2)
15393 .sr(4)
15394 .m(4)
15395 .n(4)
15396 .k(k)
15397 .a_stride(83)
15398 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15399 }
15400 }
15401
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)15402 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
15403 TEST_REQUIRES_X86_SSE2;
15404 for (size_t k = 16; k <= 80; k += 8) {
15405 for (uint32_t n = 1; n <= 4; n++) {
15406 for (uint32_t m = 1; m <= 4; m++) {
15407 GemmMicrokernelTester()
15408 .mr(4)
15409 .nr(4)
15410 .kr(2)
15411 .sr(4)
15412 .m(m)
15413 .n(n)
15414 .k(k)
15415 .iterations(1)
15416 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15417 }
15418 }
15419 }
15420 }
15421
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)15422 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
15423 TEST_REQUIRES_X86_SSE2;
15424 for (uint32_t n = 5; n < 8; n++) {
15425 for (size_t k = 1; k <= 40; k += 9) {
15426 GemmMicrokernelTester()
15427 .mr(4)
15428 .nr(4)
15429 .kr(2)
15430 .sr(4)
15431 .m(4)
15432 .n(n)
15433 .k(k)
15434 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15435 }
15436 }
15437 }
15438
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)15439 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
15440 TEST_REQUIRES_X86_SSE2;
15441 for (uint32_t n = 5; n < 8; n++) {
15442 for (size_t k = 1; k <= 40; k += 9) {
15443 GemmMicrokernelTester()
15444 .mr(4)
15445 .nr(4)
15446 .kr(2)
15447 .sr(4)
15448 .m(4)
15449 .n(n)
15450 .k(k)
15451 .cn_stride(7)
15452 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15453 }
15454 }
15455 }
15456
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_a)15457 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
15458 TEST_REQUIRES_X86_SSE2;
15459 for (uint32_t n = 5; n < 8; n++) {
15460 for (size_t k = 1; k <= 40; k += 9) {
15461 GemmMicrokernelTester()
15462 .mr(4)
15463 .nr(4)
15464 .kr(2)
15465 .sr(4)
15466 .m(4)
15467 .n(n)
15468 .k(k)
15469 .a_stride(43)
15470 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15471 }
15472 }
15473 }
15474
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)15475 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
15476 TEST_REQUIRES_X86_SSE2;
15477 for (uint32_t n = 5; n < 8; n++) {
15478 for (size_t k = 1; k <= 40; k += 9) {
15479 for (uint32_t m = 1; m <= 4; m++) {
15480 GemmMicrokernelTester()
15481 .mr(4)
15482 .nr(4)
15483 .kr(2)
15484 .sr(4)
15485 .m(m)
15486 .n(n)
15487 .k(k)
15488 .iterations(1)
15489 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15490 }
15491 }
15492 }
15493 }
15494
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)15495 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
15496 TEST_REQUIRES_X86_SSE2;
15497 for (uint32_t n = 8; n <= 12; n += 4) {
15498 for (size_t k = 1; k <= 40; k += 9) {
15499 GemmMicrokernelTester()
15500 .mr(4)
15501 .nr(4)
15502 .kr(2)
15503 .sr(4)
15504 .m(4)
15505 .n(n)
15506 .k(k)
15507 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15508 }
15509 }
15510 }
15511
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)15512 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
15513 TEST_REQUIRES_X86_SSE2;
15514 for (uint32_t n = 8; n <= 12; n += 4) {
15515 for (size_t k = 1; k <= 40; k += 9) {
15516 GemmMicrokernelTester()
15517 .mr(4)
15518 .nr(4)
15519 .kr(2)
15520 .sr(4)
15521 .m(4)
15522 .n(n)
15523 .k(k)
15524 .cn_stride(7)
15525 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15526 }
15527 }
15528 }
15529
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_a)15530 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_a) {
15531 TEST_REQUIRES_X86_SSE2;
15532 for (uint32_t n = 8; n <= 12; n += 4) {
15533 for (size_t k = 1; k <= 40; k += 9) {
15534 GemmMicrokernelTester()
15535 .mr(4)
15536 .nr(4)
15537 .kr(2)
15538 .sr(4)
15539 .m(4)
15540 .n(n)
15541 .k(k)
15542 .a_stride(43)
15543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15544 }
15545 }
15546 }
15547
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)15548 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
15549 TEST_REQUIRES_X86_SSE2;
15550 for (uint32_t n = 8; n <= 12; n += 4) {
15551 for (size_t k = 1; k <= 40; k += 9) {
15552 for (uint32_t m = 1; m <= 4; m++) {
15553 GemmMicrokernelTester()
15554 .mr(4)
15555 .nr(4)
15556 .kr(2)
15557 .sr(4)
15558 .m(m)
15559 .n(n)
15560 .k(k)
15561 .iterations(1)
15562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15563 }
15564 }
15565 }
15566 }
15567
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)15568 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
15569 TEST_REQUIRES_X86_SSE2;
15570 for (size_t k = 1; k <= 40; k += 9) {
15571 for (uint32_t n = 1; n <= 4; n++) {
15572 for (uint32_t m = 1; m <= 4; m++) {
15573 GemmMicrokernelTester()
15574 .mr(4)
15575 .nr(4)
15576 .kr(2)
15577 .sr(4)
15578 .m(m)
15579 .n(n)
15580 .k(k)
15581 .cm_stride(7)
15582 .iterations(1)
15583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15584 }
15585 }
15586 }
15587 }
15588
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)15589 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
15590 TEST_REQUIRES_X86_SSE2;
15591 GemmMicrokernelTester()
15592 .mr(4)
15593 .nr(4)
15594 .kr(2)
15595 .sr(4)
15596 .m(4)
15597 .n(4)
15598 .k(8)
15599 .qmin(128)
15600 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15601 }
15602
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)15603 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
15604 TEST_REQUIRES_X86_SSE2;
15605 GemmMicrokernelTester()
15606 .mr(4)
15607 .nr(4)
15608 .kr(2)
15609 .sr(4)
15610 .m(4)
15611 .n(4)
15612 .k(8)
15613 .qmax(128)
15614 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15615 }
15616
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)15617 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
15618 TEST_REQUIRES_X86_SSE2;
15619 GemmMicrokernelTester()
15620 .mr(4)
15621 .nr(4)
15622 .kr(2)
15623 .sr(4)
15624 .m(4)
15625 .n(4)
15626 .k(8)
15627 .cm_stride(7)
15628 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15629 }
15630
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_a_zero_point)15631 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_a_zero_point) {
15632 TEST_REQUIRES_X86_SSE2;
15633 for (size_t k = 1; k <= 40; k += 9) {
15634 GemmMicrokernelTester()
15635 .mr(4)
15636 .nr(4)
15637 .kr(2)
15638 .sr(4)
15639 .m(4)
15640 .n(4)
15641 .k(k)
15642 .a_zero_point(0)
15643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15644 }
15645 }
15646
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_b_zero_point)15647 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_b_zero_point) {
15648 TEST_REQUIRES_X86_SSE2;
15649 for (size_t k = 1; k <= 40; k += 9) {
15650 GemmMicrokernelTester()
15651 .mr(4)
15652 .nr(4)
15653 .kr(2)
15654 .sr(4)
15655 .m(4)
15656 .n(4)
15657 .k(k)
15658 .b_zero_point(0)
15659 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15660 }
15661 }
15662
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_zero_point)15663 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_zero_point) {
15664 TEST_REQUIRES_X86_SSE2;
15665 for (size_t k = 1; k <= 40; k += 9) {
15666 GemmMicrokernelTester()
15667 .mr(4)
15668 .nr(4)
15669 .kr(2)
15670 .sr(4)
15671 .m(4)
15672 .n(4)
15673 .k(k)
15674 .a_zero_point(0)
15675 .b_zero_point(0)
15676 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15677 }
15678 }
15679 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15680
15681
15682 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)15683 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
15684 TEST_REQUIRES_X86_SSE41;
15685 GemmMicrokernelTester()
15686 .mr(4)
15687 .nr(4)
15688 .kr(2)
15689 .sr(4)
15690 .m(4)
15691 .n(4)
15692 .k(8)
15693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15694 }
15695
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)15696 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
15697 TEST_REQUIRES_X86_SSE41;
15698 GemmMicrokernelTester()
15699 .mr(4)
15700 .nr(4)
15701 .kr(2)
15702 .sr(4)
15703 .m(4)
15704 .n(4)
15705 .k(8)
15706 .cn_stride(7)
15707 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15708 }
15709
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_strided_a)15710 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_strided_a) {
15711 TEST_REQUIRES_X86_SSE41;
15712 GemmMicrokernelTester()
15713 .mr(4)
15714 .nr(4)
15715 .kr(2)
15716 .sr(4)
15717 .m(4)
15718 .n(4)
15719 .k(8)
15720 .a_stride(11)
15721 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15722 }
15723
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)15724 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
15725 TEST_REQUIRES_X86_SSE41;
15726 for (uint32_t n = 1; n <= 4; n++) {
15727 for (uint32_t m = 1; m <= 4; m++) {
15728 GemmMicrokernelTester()
15729 .mr(4)
15730 .nr(4)
15731 .kr(2)
15732 .sr(4)
15733 .m(m)
15734 .n(n)
15735 .k(8)
15736 .iterations(1)
15737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15738 }
15739 }
15740 }
15741
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)15742 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
15743 TEST_REQUIRES_X86_SSE41;
15744 for (uint32_t m = 1; m <= 4; m++) {
15745 GemmMicrokernelTester()
15746 .mr(4)
15747 .nr(4)
15748 .kr(2)
15749 .sr(4)
15750 .m(m)
15751 .n(4)
15752 .k(8)
15753 .iterations(1)
15754 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15755 }
15756 }
15757
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)15758 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
15759 TEST_REQUIRES_X86_SSE41;
15760 for (uint32_t n = 1; n <= 4; n++) {
15761 GemmMicrokernelTester()
15762 .mr(4)
15763 .nr(4)
15764 .kr(2)
15765 .sr(4)
15766 .m(4)
15767 .n(n)
15768 .k(8)
15769 .iterations(1)
15770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15771 }
15772 }
15773
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)15774 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
15775 TEST_REQUIRES_X86_SSE41;
15776 for (size_t k = 1; k < 8; k++) {
15777 GemmMicrokernelTester()
15778 .mr(4)
15779 .nr(4)
15780 .kr(2)
15781 .sr(4)
15782 .m(4)
15783 .n(4)
15784 .k(k)
15785 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15786 }
15787 }
15788
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_strided_a)15789 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_strided_a) {
15790 TEST_REQUIRES_X86_SSE41;
15791 for (size_t k = 1; k < 8; k++) {
15792 GemmMicrokernelTester()
15793 .mr(4)
15794 .nr(4)
15795 .kr(2)
15796 .sr(4)
15797 .m(4)
15798 .n(4)
15799 .k(k)
15800 .a_stride(11)
15801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15802 }
15803 }
15804
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)15805 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
15806 TEST_REQUIRES_X86_SSE41;
15807 for (size_t k = 1; k < 8; k++) {
15808 for (uint32_t n = 1; n <= 4; n++) {
15809 for (uint32_t m = 1; m <= 4; m++) {
15810 GemmMicrokernelTester()
15811 .mr(4)
15812 .nr(4)
15813 .kr(2)
15814 .sr(4)
15815 .m(m)
15816 .n(n)
15817 .k(k)
15818 .iterations(1)
15819 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15820 }
15821 }
15822 }
15823 }
15824
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)15825 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
15826 TEST_REQUIRES_X86_SSE41;
15827 for (size_t k = 9; k < 16; k++) {
15828 GemmMicrokernelTester()
15829 .mr(4)
15830 .nr(4)
15831 .kr(2)
15832 .sr(4)
15833 .m(4)
15834 .n(4)
15835 .k(k)
15836 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15837 }
15838 }
15839
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_strided_a)15840 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_strided_a) {
15841 TEST_REQUIRES_X86_SSE41;
15842 for (size_t k = 9; k < 16; k++) {
15843 GemmMicrokernelTester()
15844 .mr(4)
15845 .nr(4)
15846 .kr(2)
15847 .sr(4)
15848 .m(4)
15849 .n(4)
15850 .k(k)
15851 .a_stride(19)
15852 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15853 }
15854 }
15855
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)15856 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
15857 TEST_REQUIRES_X86_SSE41;
15858 for (size_t k = 9; k < 16; k++) {
15859 for (uint32_t n = 1; n <= 4; n++) {
15860 for (uint32_t m = 1; m <= 4; m++) {
15861 GemmMicrokernelTester()
15862 .mr(4)
15863 .nr(4)
15864 .kr(2)
15865 .sr(4)
15866 .m(m)
15867 .n(n)
15868 .k(k)
15869 .iterations(1)
15870 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15871 }
15872 }
15873 }
15874 }
15875
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)15876 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
15877 TEST_REQUIRES_X86_SSE41;
15878 for (size_t k = 16; k <= 80; k += 8) {
15879 GemmMicrokernelTester()
15880 .mr(4)
15881 .nr(4)
15882 .kr(2)
15883 .sr(4)
15884 .m(4)
15885 .n(4)
15886 .k(k)
15887 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15888 }
15889 }
15890
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_strided_a)15891 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_strided_a) {
15892 TEST_REQUIRES_X86_SSE41;
15893 for (size_t k = 16; k <= 80; k += 8) {
15894 GemmMicrokernelTester()
15895 .mr(4)
15896 .nr(4)
15897 .kr(2)
15898 .sr(4)
15899 .m(4)
15900 .n(4)
15901 .k(k)
15902 .a_stride(83)
15903 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15904 }
15905 }
15906
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)15907 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
15908 TEST_REQUIRES_X86_SSE41;
15909 for (size_t k = 16; k <= 80; k += 8) {
15910 for (uint32_t n = 1; n <= 4; n++) {
15911 for (uint32_t m = 1; m <= 4; m++) {
15912 GemmMicrokernelTester()
15913 .mr(4)
15914 .nr(4)
15915 .kr(2)
15916 .sr(4)
15917 .m(m)
15918 .n(n)
15919 .k(k)
15920 .iterations(1)
15921 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15922 }
15923 }
15924 }
15925 }
15926
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)15927 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
15928 TEST_REQUIRES_X86_SSE41;
15929 for (uint32_t n = 5; n < 8; n++) {
15930 for (size_t k = 1; k <= 40; k += 9) {
15931 GemmMicrokernelTester()
15932 .mr(4)
15933 .nr(4)
15934 .kr(2)
15935 .sr(4)
15936 .m(4)
15937 .n(n)
15938 .k(k)
15939 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15940 }
15941 }
15942 }
15943
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)15944 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
15945 TEST_REQUIRES_X86_SSE41;
15946 for (uint32_t n = 5; n < 8; n++) {
15947 for (size_t k = 1; k <= 40; k += 9) {
15948 GemmMicrokernelTester()
15949 .mr(4)
15950 .nr(4)
15951 .kr(2)
15952 .sr(4)
15953 .m(4)
15954 .n(n)
15955 .k(k)
15956 .cn_stride(7)
15957 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15958 }
15959 }
15960 }
15961
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_a)15962 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_a) {
15963 TEST_REQUIRES_X86_SSE41;
15964 for (uint32_t n = 5; n < 8; n++) {
15965 for (size_t k = 1; k <= 40; k += 9) {
15966 GemmMicrokernelTester()
15967 .mr(4)
15968 .nr(4)
15969 .kr(2)
15970 .sr(4)
15971 .m(4)
15972 .n(n)
15973 .k(k)
15974 .a_stride(43)
15975 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15976 }
15977 }
15978 }
15979
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)15980 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
15981 TEST_REQUIRES_X86_SSE41;
15982 for (uint32_t n = 5; n < 8; n++) {
15983 for (size_t k = 1; k <= 40; k += 9) {
15984 for (uint32_t m = 1; m <= 4; m++) {
15985 GemmMicrokernelTester()
15986 .mr(4)
15987 .nr(4)
15988 .kr(2)
15989 .sr(4)
15990 .m(m)
15991 .n(n)
15992 .k(k)
15993 .iterations(1)
15994 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15995 }
15996 }
15997 }
15998 }
15999
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)16000 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
16001 TEST_REQUIRES_X86_SSE41;
16002 for (uint32_t n = 8; n <= 12; n += 4) {
16003 for (size_t k = 1; k <= 40; k += 9) {
16004 GemmMicrokernelTester()
16005 .mr(4)
16006 .nr(4)
16007 .kr(2)
16008 .sr(4)
16009 .m(4)
16010 .n(n)
16011 .k(k)
16012 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16013 }
16014 }
16015 }
16016
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)16017 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
16018 TEST_REQUIRES_X86_SSE41;
16019 for (uint32_t n = 8; n <= 12; n += 4) {
16020 for (size_t k = 1; k <= 40; k += 9) {
16021 GemmMicrokernelTester()
16022 .mr(4)
16023 .nr(4)
16024 .kr(2)
16025 .sr(4)
16026 .m(4)
16027 .n(n)
16028 .k(k)
16029 .cn_stride(7)
16030 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16031 }
16032 }
16033 }
16034
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_a)16035 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_a) {
16036 TEST_REQUIRES_X86_SSE41;
16037 for (uint32_t n = 8; n <= 12; n += 4) {
16038 for (size_t k = 1; k <= 40; k += 9) {
16039 GemmMicrokernelTester()
16040 .mr(4)
16041 .nr(4)
16042 .kr(2)
16043 .sr(4)
16044 .m(4)
16045 .n(n)
16046 .k(k)
16047 .a_stride(43)
16048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16049 }
16050 }
16051 }
16052
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)16053 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
16054 TEST_REQUIRES_X86_SSE41;
16055 for (uint32_t n = 8; n <= 12; n += 4) {
16056 for (size_t k = 1; k <= 40; k += 9) {
16057 for (uint32_t m = 1; m <= 4; m++) {
16058 GemmMicrokernelTester()
16059 .mr(4)
16060 .nr(4)
16061 .kr(2)
16062 .sr(4)
16063 .m(m)
16064 .n(n)
16065 .k(k)
16066 .iterations(1)
16067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16068 }
16069 }
16070 }
16071 }
16072
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)16073 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
16074 TEST_REQUIRES_X86_SSE41;
16075 for (size_t k = 1; k <= 40; k += 9) {
16076 for (uint32_t n = 1; n <= 4; n++) {
16077 for (uint32_t m = 1; m <= 4; m++) {
16078 GemmMicrokernelTester()
16079 .mr(4)
16080 .nr(4)
16081 .kr(2)
16082 .sr(4)
16083 .m(m)
16084 .n(n)
16085 .k(k)
16086 .cm_stride(7)
16087 .iterations(1)
16088 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16089 }
16090 }
16091 }
16092 }
16093
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)16094 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
16095 TEST_REQUIRES_X86_SSE41;
16096 GemmMicrokernelTester()
16097 .mr(4)
16098 .nr(4)
16099 .kr(2)
16100 .sr(4)
16101 .m(4)
16102 .n(4)
16103 .k(8)
16104 .qmin(128)
16105 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16106 }
16107
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)16108 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
16109 TEST_REQUIRES_X86_SSE41;
16110 GemmMicrokernelTester()
16111 .mr(4)
16112 .nr(4)
16113 .kr(2)
16114 .sr(4)
16115 .m(4)
16116 .n(4)
16117 .k(8)
16118 .qmax(128)
16119 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16120 }
16121
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)16122 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
16123 TEST_REQUIRES_X86_SSE41;
16124 GemmMicrokernelTester()
16125 .mr(4)
16126 .nr(4)
16127 .kr(2)
16128 .sr(4)
16129 .m(4)
16130 .n(4)
16131 .k(8)
16132 .cm_stride(7)
16133 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16134 }
16135
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_a_zero_point)16136 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_a_zero_point) {
16137 TEST_REQUIRES_X86_SSE41;
16138 for (size_t k = 1; k <= 40; k += 9) {
16139 GemmMicrokernelTester()
16140 .mr(4)
16141 .nr(4)
16142 .kr(2)
16143 .sr(4)
16144 .m(4)
16145 .n(4)
16146 .k(k)
16147 .a_zero_point(0)
16148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16149 }
16150 }
16151
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_b_zero_point)16152 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_b_zero_point) {
16153 TEST_REQUIRES_X86_SSE41;
16154 for (size_t k = 1; k <= 40; k += 9) {
16155 GemmMicrokernelTester()
16156 .mr(4)
16157 .nr(4)
16158 .kr(2)
16159 .sr(4)
16160 .m(4)
16161 .n(4)
16162 .k(k)
16163 .b_zero_point(0)
16164 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16165 }
16166 }
16167
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_zero_point)16168 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_zero_point) {
16169 TEST_REQUIRES_X86_SSE41;
16170 for (size_t k = 1; k <= 40; k += 9) {
16171 GemmMicrokernelTester()
16172 .mr(4)
16173 .nr(4)
16174 .kr(2)
16175 .sr(4)
16176 .m(4)
16177 .n(4)
16178 .k(k)
16179 .a_zero_point(0)
16180 .b_zero_point(0)
16181 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16182 }
16183 }
16184 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16185
16186
16187 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)16188 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
16189 TEST_REQUIRES_X86_XOP;
16190 GemmMicrokernelTester()
16191 .mr(1)
16192 .nr(4)
16193 .kr(2)
16194 .sr(4)
16195 .m(1)
16196 .n(4)
16197 .k(8)
16198 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16199 }
16200
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)16201 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
16202 TEST_REQUIRES_X86_XOP;
16203 GemmMicrokernelTester()
16204 .mr(1)
16205 .nr(4)
16206 .kr(2)
16207 .sr(4)
16208 .m(1)
16209 .n(4)
16210 .k(8)
16211 .cn_stride(7)
16212 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16213 }
16214
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_strided_a)16215 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_strided_a) {
16216 TEST_REQUIRES_X86_XOP;
16217 GemmMicrokernelTester()
16218 .mr(1)
16219 .nr(4)
16220 .kr(2)
16221 .sr(4)
16222 .m(1)
16223 .n(4)
16224 .k(8)
16225 .a_stride(11)
16226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16227 }
16228
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)16229 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
16230 TEST_REQUIRES_X86_XOP;
16231 for (uint32_t n = 1; n <= 4; n++) {
16232 for (uint32_t m = 1; m <= 1; m++) {
16233 GemmMicrokernelTester()
16234 .mr(1)
16235 .nr(4)
16236 .kr(2)
16237 .sr(4)
16238 .m(m)
16239 .n(n)
16240 .k(8)
16241 .iterations(1)
16242 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16243 }
16244 }
16245 }
16246
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)16247 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
16248 TEST_REQUIRES_X86_XOP;
16249 for (uint32_t m = 1; m <= 1; m++) {
16250 GemmMicrokernelTester()
16251 .mr(1)
16252 .nr(4)
16253 .kr(2)
16254 .sr(4)
16255 .m(m)
16256 .n(4)
16257 .k(8)
16258 .iterations(1)
16259 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16260 }
16261 }
16262
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)16263 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
16264 TEST_REQUIRES_X86_XOP;
16265 for (uint32_t n = 1; n <= 4; n++) {
16266 GemmMicrokernelTester()
16267 .mr(1)
16268 .nr(4)
16269 .kr(2)
16270 .sr(4)
16271 .m(1)
16272 .n(n)
16273 .k(8)
16274 .iterations(1)
16275 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16276 }
16277 }
16278
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)16279 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
16280 TEST_REQUIRES_X86_XOP;
16281 for (size_t k = 1; k < 8; k++) {
16282 GemmMicrokernelTester()
16283 .mr(1)
16284 .nr(4)
16285 .kr(2)
16286 .sr(4)
16287 .m(1)
16288 .n(4)
16289 .k(k)
16290 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16291 }
16292 }
16293
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_strided_a)16294 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_strided_a) {
16295 TEST_REQUIRES_X86_XOP;
16296 for (size_t k = 1; k < 8; k++) {
16297 GemmMicrokernelTester()
16298 .mr(1)
16299 .nr(4)
16300 .kr(2)
16301 .sr(4)
16302 .m(1)
16303 .n(4)
16304 .k(k)
16305 .a_stride(11)
16306 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16307 }
16308 }
16309
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)16310 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
16311 TEST_REQUIRES_X86_XOP;
16312 for (size_t k = 1; k < 8; k++) {
16313 for (uint32_t n = 1; n <= 4; n++) {
16314 for (uint32_t m = 1; m <= 1; m++) {
16315 GemmMicrokernelTester()
16316 .mr(1)
16317 .nr(4)
16318 .kr(2)
16319 .sr(4)
16320 .m(m)
16321 .n(n)
16322 .k(k)
16323 .iterations(1)
16324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16325 }
16326 }
16327 }
16328 }
16329
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)16330 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
16331 TEST_REQUIRES_X86_XOP;
16332 for (size_t k = 9; k < 16; k++) {
16333 GemmMicrokernelTester()
16334 .mr(1)
16335 .nr(4)
16336 .kr(2)
16337 .sr(4)
16338 .m(1)
16339 .n(4)
16340 .k(k)
16341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16342 }
16343 }
16344
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_strided_a)16345 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_strided_a) {
16346 TEST_REQUIRES_X86_XOP;
16347 for (size_t k = 9; k < 16; k++) {
16348 GemmMicrokernelTester()
16349 .mr(1)
16350 .nr(4)
16351 .kr(2)
16352 .sr(4)
16353 .m(1)
16354 .n(4)
16355 .k(k)
16356 .a_stride(19)
16357 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16358 }
16359 }
16360
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)16361 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
16362 TEST_REQUIRES_X86_XOP;
16363 for (size_t k = 9; k < 16; k++) {
16364 for (uint32_t n = 1; n <= 4; n++) {
16365 for (uint32_t m = 1; m <= 1; m++) {
16366 GemmMicrokernelTester()
16367 .mr(1)
16368 .nr(4)
16369 .kr(2)
16370 .sr(4)
16371 .m(m)
16372 .n(n)
16373 .k(k)
16374 .iterations(1)
16375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16376 }
16377 }
16378 }
16379 }
16380
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)16381 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
16382 TEST_REQUIRES_X86_XOP;
16383 for (size_t k = 16; k <= 80; k += 8) {
16384 GemmMicrokernelTester()
16385 .mr(1)
16386 .nr(4)
16387 .kr(2)
16388 .sr(4)
16389 .m(1)
16390 .n(4)
16391 .k(k)
16392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16393 }
16394 }
16395
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_strided_a)16396 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_strided_a) {
16397 TEST_REQUIRES_X86_XOP;
16398 for (size_t k = 16; k <= 80; k += 8) {
16399 GemmMicrokernelTester()
16400 .mr(1)
16401 .nr(4)
16402 .kr(2)
16403 .sr(4)
16404 .m(1)
16405 .n(4)
16406 .k(k)
16407 .a_stride(83)
16408 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16409 }
16410 }
16411
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)16412 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
16413 TEST_REQUIRES_X86_XOP;
16414 for (size_t k = 16; k <= 80; k += 8) {
16415 for (uint32_t n = 1; n <= 4; n++) {
16416 for (uint32_t m = 1; m <= 1; m++) {
16417 GemmMicrokernelTester()
16418 .mr(1)
16419 .nr(4)
16420 .kr(2)
16421 .sr(4)
16422 .m(m)
16423 .n(n)
16424 .k(k)
16425 .iterations(1)
16426 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16427 }
16428 }
16429 }
16430 }
16431
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)16432 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
16433 TEST_REQUIRES_X86_XOP;
16434 for (uint32_t n = 5; n < 8; n++) {
16435 for (size_t k = 1; k <= 40; k += 9) {
16436 GemmMicrokernelTester()
16437 .mr(1)
16438 .nr(4)
16439 .kr(2)
16440 .sr(4)
16441 .m(1)
16442 .n(n)
16443 .k(k)
16444 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16445 }
16446 }
16447 }
16448
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)16449 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
16450 TEST_REQUIRES_X86_XOP;
16451 for (uint32_t n = 5; n < 8; n++) {
16452 for (size_t k = 1; k <= 40; k += 9) {
16453 GemmMicrokernelTester()
16454 .mr(1)
16455 .nr(4)
16456 .kr(2)
16457 .sr(4)
16458 .m(1)
16459 .n(n)
16460 .k(k)
16461 .cn_stride(7)
16462 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16463 }
16464 }
16465 }
16466
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_a)16467 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_a) {
16468 TEST_REQUIRES_X86_XOP;
16469 for (uint32_t n = 5; n < 8; n++) {
16470 for (size_t k = 1; k <= 40; k += 9) {
16471 GemmMicrokernelTester()
16472 .mr(1)
16473 .nr(4)
16474 .kr(2)
16475 .sr(4)
16476 .m(1)
16477 .n(n)
16478 .k(k)
16479 .a_stride(43)
16480 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16481 }
16482 }
16483 }
16484
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)16485 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
16486 TEST_REQUIRES_X86_XOP;
16487 for (uint32_t n = 5; n < 8; n++) {
16488 for (size_t k = 1; k <= 40; k += 9) {
16489 for (uint32_t m = 1; m <= 1; m++) {
16490 GemmMicrokernelTester()
16491 .mr(1)
16492 .nr(4)
16493 .kr(2)
16494 .sr(4)
16495 .m(m)
16496 .n(n)
16497 .k(k)
16498 .iterations(1)
16499 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16500 }
16501 }
16502 }
16503 }
16504
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)16505 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
16506 TEST_REQUIRES_X86_XOP;
16507 for (uint32_t n = 8; n <= 12; n += 4) {
16508 for (size_t k = 1; k <= 40; k += 9) {
16509 GemmMicrokernelTester()
16510 .mr(1)
16511 .nr(4)
16512 .kr(2)
16513 .sr(4)
16514 .m(1)
16515 .n(n)
16516 .k(k)
16517 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16518 }
16519 }
16520 }
16521
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)16522 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
16523 TEST_REQUIRES_X86_XOP;
16524 for (uint32_t n = 8; n <= 12; n += 4) {
16525 for (size_t k = 1; k <= 40; k += 9) {
16526 GemmMicrokernelTester()
16527 .mr(1)
16528 .nr(4)
16529 .kr(2)
16530 .sr(4)
16531 .m(1)
16532 .n(n)
16533 .k(k)
16534 .cn_stride(7)
16535 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16536 }
16537 }
16538 }
16539
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_a)16540 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_a) {
16541 TEST_REQUIRES_X86_XOP;
16542 for (uint32_t n = 8; n <= 12; n += 4) {
16543 for (size_t k = 1; k <= 40; k += 9) {
16544 GemmMicrokernelTester()
16545 .mr(1)
16546 .nr(4)
16547 .kr(2)
16548 .sr(4)
16549 .m(1)
16550 .n(n)
16551 .k(k)
16552 .a_stride(43)
16553 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16554 }
16555 }
16556 }
16557
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)16558 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
16559 TEST_REQUIRES_X86_XOP;
16560 for (uint32_t n = 8; n <= 12; n += 4) {
16561 for (size_t k = 1; k <= 40; k += 9) {
16562 for (uint32_t m = 1; m <= 1; m++) {
16563 GemmMicrokernelTester()
16564 .mr(1)
16565 .nr(4)
16566 .kr(2)
16567 .sr(4)
16568 .m(m)
16569 .n(n)
16570 .k(k)
16571 .iterations(1)
16572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16573 }
16574 }
16575 }
16576 }
16577
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)16578 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
16579 TEST_REQUIRES_X86_XOP;
16580 for (size_t k = 1; k <= 40; k += 9) {
16581 for (uint32_t n = 1; n <= 4; n++) {
16582 for (uint32_t m = 1; m <= 1; m++) {
16583 GemmMicrokernelTester()
16584 .mr(1)
16585 .nr(4)
16586 .kr(2)
16587 .sr(4)
16588 .m(m)
16589 .n(n)
16590 .k(k)
16591 .cm_stride(7)
16592 .iterations(1)
16593 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16594 }
16595 }
16596 }
16597 }
16598
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)16599 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
16600 TEST_REQUIRES_X86_XOP;
16601 GemmMicrokernelTester()
16602 .mr(1)
16603 .nr(4)
16604 .kr(2)
16605 .sr(4)
16606 .m(1)
16607 .n(4)
16608 .k(8)
16609 .qmin(128)
16610 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16611 }
16612
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)16613 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
16614 TEST_REQUIRES_X86_XOP;
16615 GemmMicrokernelTester()
16616 .mr(1)
16617 .nr(4)
16618 .kr(2)
16619 .sr(4)
16620 .m(1)
16621 .n(4)
16622 .k(8)
16623 .qmax(128)
16624 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16625 }
16626
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)16627 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
16628 TEST_REQUIRES_X86_XOP;
16629 GemmMicrokernelTester()
16630 .mr(1)
16631 .nr(4)
16632 .kr(2)
16633 .sr(4)
16634 .m(1)
16635 .n(4)
16636 .k(8)
16637 .cm_stride(7)
16638 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16639 }
16640
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_a_zero_point)16641 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_a_zero_point) {
16642 TEST_REQUIRES_X86_XOP;
16643 for (size_t k = 1; k <= 40; k += 9) {
16644 GemmMicrokernelTester()
16645 .mr(1)
16646 .nr(4)
16647 .kr(2)
16648 .sr(4)
16649 .m(1)
16650 .n(4)
16651 .k(k)
16652 .a_zero_point(0)
16653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16654 }
16655 }
16656
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_b_zero_point)16657 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_b_zero_point) {
16658 TEST_REQUIRES_X86_XOP;
16659 for (size_t k = 1; k <= 40; k += 9) {
16660 GemmMicrokernelTester()
16661 .mr(1)
16662 .nr(4)
16663 .kr(2)
16664 .sr(4)
16665 .m(1)
16666 .n(4)
16667 .k(k)
16668 .b_zero_point(0)
16669 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16670 }
16671 }
16672
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_zero_point)16673 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_zero_point) {
16674 TEST_REQUIRES_X86_XOP;
16675 for (size_t k = 1; k <= 40; k += 9) {
16676 GemmMicrokernelTester()
16677 .mr(1)
16678 .nr(4)
16679 .kr(2)
16680 .sr(4)
16681 .m(1)
16682 .n(4)
16683 .k(k)
16684 .a_zero_point(0)
16685 .b_zero_point(0)
16686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16687 }
16688 }
16689 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16690
16691
16692 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)16693 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
16694 TEST_REQUIRES_X86_XOP;
16695 GemmMicrokernelTester()
16696 .mr(2)
16697 .nr(4)
16698 .kr(2)
16699 .sr(4)
16700 .m(2)
16701 .n(4)
16702 .k(8)
16703 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16704 }
16705
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)16706 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
16707 TEST_REQUIRES_X86_XOP;
16708 GemmMicrokernelTester()
16709 .mr(2)
16710 .nr(4)
16711 .kr(2)
16712 .sr(4)
16713 .m(2)
16714 .n(4)
16715 .k(8)
16716 .cn_stride(7)
16717 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16718 }
16719
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_strided_a)16720 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_strided_a) {
16721 TEST_REQUIRES_X86_XOP;
16722 GemmMicrokernelTester()
16723 .mr(2)
16724 .nr(4)
16725 .kr(2)
16726 .sr(4)
16727 .m(2)
16728 .n(4)
16729 .k(8)
16730 .a_stride(11)
16731 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16732 }
16733
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)16734 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
16735 TEST_REQUIRES_X86_XOP;
16736 for (uint32_t n = 1; n <= 4; n++) {
16737 for (uint32_t m = 1; m <= 2; m++) {
16738 GemmMicrokernelTester()
16739 .mr(2)
16740 .nr(4)
16741 .kr(2)
16742 .sr(4)
16743 .m(m)
16744 .n(n)
16745 .k(8)
16746 .iterations(1)
16747 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16748 }
16749 }
16750 }
16751
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)16752 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
16753 TEST_REQUIRES_X86_XOP;
16754 for (uint32_t m = 1; m <= 2; m++) {
16755 GemmMicrokernelTester()
16756 .mr(2)
16757 .nr(4)
16758 .kr(2)
16759 .sr(4)
16760 .m(m)
16761 .n(4)
16762 .k(8)
16763 .iterations(1)
16764 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16765 }
16766 }
16767
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)16768 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
16769 TEST_REQUIRES_X86_XOP;
16770 for (uint32_t n = 1; n <= 4; n++) {
16771 GemmMicrokernelTester()
16772 .mr(2)
16773 .nr(4)
16774 .kr(2)
16775 .sr(4)
16776 .m(2)
16777 .n(n)
16778 .k(8)
16779 .iterations(1)
16780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16781 }
16782 }
16783
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)16784 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
16785 TEST_REQUIRES_X86_XOP;
16786 for (size_t k = 1; k < 8; k++) {
16787 GemmMicrokernelTester()
16788 .mr(2)
16789 .nr(4)
16790 .kr(2)
16791 .sr(4)
16792 .m(2)
16793 .n(4)
16794 .k(k)
16795 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16796 }
16797 }
16798
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_strided_a)16799 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_strided_a) {
16800 TEST_REQUIRES_X86_XOP;
16801 for (size_t k = 1; k < 8; k++) {
16802 GemmMicrokernelTester()
16803 .mr(2)
16804 .nr(4)
16805 .kr(2)
16806 .sr(4)
16807 .m(2)
16808 .n(4)
16809 .k(k)
16810 .a_stride(11)
16811 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16812 }
16813 }
16814
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)16815 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
16816 TEST_REQUIRES_X86_XOP;
16817 for (size_t k = 1; k < 8; k++) {
16818 for (uint32_t n = 1; n <= 4; n++) {
16819 for (uint32_t m = 1; m <= 2; m++) {
16820 GemmMicrokernelTester()
16821 .mr(2)
16822 .nr(4)
16823 .kr(2)
16824 .sr(4)
16825 .m(m)
16826 .n(n)
16827 .k(k)
16828 .iterations(1)
16829 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16830 }
16831 }
16832 }
16833 }
16834
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)16835 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
16836 TEST_REQUIRES_X86_XOP;
16837 for (size_t k = 9; k < 16; k++) {
16838 GemmMicrokernelTester()
16839 .mr(2)
16840 .nr(4)
16841 .kr(2)
16842 .sr(4)
16843 .m(2)
16844 .n(4)
16845 .k(k)
16846 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16847 }
16848 }
16849
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_strided_a)16850 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_strided_a) {
16851 TEST_REQUIRES_X86_XOP;
16852 for (size_t k = 9; k < 16; k++) {
16853 GemmMicrokernelTester()
16854 .mr(2)
16855 .nr(4)
16856 .kr(2)
16857 .sr(4)
16858 .m(2)
16859 .n(4)
16860 .k(k)
16861 .a_stride(19)
16862 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16863 }
16864 }
16865
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)16866 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
16867 TEST_REQUIRES_X86_XOP;
16868 for (size_t k = 9; k < 16; k++) {
16869 for (uint32_t n = 1; n <= 4; n++) {
16870 for (uint32_t m = 1; m <= 2; m++) {
16871 GemmMicrokernelTester()
16872 .mr(2)
16873 .nr(4)
16874 .kr(2)
16875 .sr(4)
16876 .m(m)
16877 .n(n)
16878 .k(k)
16879 .iterations(1)
16880 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16881 }
16882 }
16883 }
16884 }
16885
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)16886 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
16887 TEST_REQUIRES_X86_XOP;
16888 for (size_t k = 16; k <= 80; k += 8) {
16889 GemmMicrokernelTester()
16890 .mr(2)
16891 .nr(4)
16892 .kr(2)
16893 .sr(4)
16894 .m(2)
16895 .n(4)
16896 .k(k)
16897 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16898 }
16899 }
16900
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_strided_a)16901 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_strided_a) {
16902 TEST_REQUIRES_X86_XOP;
16903 for (size_t k = 16; k <= 80; k += 8) {
16904 GemmMicrokernelTester()
16905 .mr(2)
16906 .nr(4)
16907 .kr(2)
16908 .sr(4)
16909 .m(2)
16910 .n(4)
16911 .k(k)
16912 .a_stride(83)
16913 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16914 }
16915 }
16916
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)16917 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
16918 TEST_REQUIRES_X86_XOP;
16919 for (size_t k = 16; k <= 80; k += 8) {
16920 for (uint32_t n = 1; n <= 4; n++) {
16921 for (uint32_t m = 1; m <= 2; m++) {
16922 GemmMicrokernelTester()
16923 .mr(2)
16924 .nr(4)
16925 .kr(2)
16926 .sr(4)
16927 .m(m)
16928 .n(n)
16929 .k(k)
16930 .iterations(1)
16931 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16932 }
16933 }
16934 }
16935 }
16936
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)16937 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
16938 TEST_REQUIRES_X86_XOP;
16939 for (uint32_t n = 5; n < 8; n++) {
16940 for (size_t k = 1; k <= 40; k += 9) {
16941 GemmMicrokernelTester()
16942 .mr(2)
16943 .nr(4)
16944 .kr(2)
16945 .sr(4)
16946 .m(2)
16947 .n(n)
16948 .k(k)
16949 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16950 }
16951 }
16952 }
16953
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)16954 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
16955 TEST_REQUIRES_X86_XOP;
16956 for (uint32_t n = 5; n < 8; n++) {
16957 for (size_t k = 1; k <= 40; k += 9) {
16958 GemmMicrokernelTester()
16959 .mr(2)
16960 .nr(4)
16961 .kr(2)
16962 .sr(4)
16963 .m(2)
16964 .n(n)
16965 .k(k)
16966 .cn_stride(7)
16967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16968 }
16969 }
16970 }
16971
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_a)16972 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_a) {
16973 TEST_REQUIRES_X86_XOP;
16974 for (uint32_t n = 5; n < 8; n++) {
16975 for (size_t k = 1; k <= 40; k += 9) {
16976 GemmMicrokernelTester()
16977 .mr(2)
16978 .nr(4)
16979 .kr(2)
16980 .sr(4)
16981 .m(2)
16982 .n(n)
16983 .k(k)
16984 .a_stride(43)
16985 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16986 }
16987 }
16988 }
16989
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)16990 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
16991 TEST_REQUIRES_X86_XOP;
16992 for (uint32_t n = 5; n < 8; n++) {
16993 for (size_t k = 1; k <= 40; k += 9) {
16994 for (uint32_t m = 1; m <= 2; m++) {
16995 GemmMicrokernelTester()
16996 .mr(2)
16997 .nr(4)
16998 .kr(2)
16999 .sr(4)
17000 .m(m)
17001 .n(n)
17002 .k(k)
17003 .iterations(1)
17004 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17005 }
17006 }
17007 }
17008 }
17009
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)17010 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
17011 TEST_REQUIRES_X86_XOP;
17012 for (uint32_t n = 8; n <= 12; n += 4) {
17013 for (size_t k = 1; k <= 40; k += 9) {
17014 GemmMicrokernelTester()
17015 .mr(2)
17016 .nr(4)
17017 .kr(2)
17018 .sr(4)
17019 .m(2)
17020 .n(n)
17021 .k(k)
17022 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17023 }
17024 }
17025 }
17026
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)17027 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
17028 TEST_REQUIRES_X86_XOP;
17029 for (uint32_t n = 8; n <= 12; n += 4) {
17030 for (size_t k = 1; k <= 40; k += 9) {
17031 GemmMicrokernelTester()
17032 .mr(2)
17033 .nr(4)
17034 .kr(2)
17035 .sr(4)
17036 .m(2)
17037 .n(n)
17038 .k(k)
17039 .cn_stride(7)
17040 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17041 }
17042 }
17043 }
17044
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_a)17045 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_a) {
17046 TEST_REQUIRES_X86_XOP;
17047 for (uint32_t n = 8; n <= 12; n += 4) {
17048 for (size_t k = 1; k <= 40; k += 9) {
17049 GemmMicrokernelTester()
17050 .mr(2)
17051 .nr(4)
17052 .kr(2)
17053 .sr(4)
17054 .m(2)
17055 .n(n)
17056 .k(k)
17057 .a_stride(43)
17058 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17059 }
17060 }
17061 }
17062
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)17063 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
17064 TEST_REQUIRES_X86_XOP;
17065 for (uint32_t n = 8; n <= 12; n += 4) {
17066 for (size_t k = 1; k <= 40; k += 9) {
17067 for (uint32_t m = 1; m <= 2; m++) {
17068 GemmMicrokernelTester()
17069 .mr(2)
17070 .nr(4)
17071 .kr(2)
17072 .sr(4)
17073 .m(m)
17074 .n(n)
17075 .k(k)
17076 .iterations(1)
17077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17078 }
17079 }
17080 }
17081 }
17082
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)17083 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
17084 TEST_REQUIRES_X86_XOP;
17085 for (size_t k = 1; k <= 40; k += 9) {
17086 for (uint32_t n = 1; n <= 4; n++) {
17087 for (uint32_t m = 1; m <= 2; m++) {
17088 GemmMicrokernelTester()
17089 .mr(2)
17090 .nr(4)
17091 .kr(2)
17092 .sr(4)
17093 .m(m)
17094 .n(n)
17095 .k(k)
17096 .cm_stride(7)
17097 .iterations(1)
17098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17099 }
17100 }
17101 }
17102 }
17103
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)17104 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
17105 TEST_REQUIRES_X86_XOP;
17106 GemmMicrokernelTester()
17107 .mr(2)
17108 .nr(4)
17109 .kr(2)
17110 .sr(4)
17111 .m(2)
17112 .n(4)
17113 .k(8)
17114 .qmin(128)
17115 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17116 }
17117
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)17118 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
17119 TEST_REQUIRES_X86_XOP;
17120 GemmMicrokernelTester()
17121 .mr(2)
17122 .nr(4)
17123 .kr(2)
17124 .sr(4)
17125 .m(2)
17126 .n(4)
17127 .k(8)
17128 .qmax(128)
17129 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17130 }
17131
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)17132 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
17133 TEST_REQUIRES_X86_XOP;
17134 GemmMicrokernelTester()
17135 .mr(2)
17136 .nr(4)
17137 .kr(2)
17138 .sr(4)
17139 .m(2)
17140 .n(4)
17141 .k(8)
17142 .cm_stride(7)
17143 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17144 }
17145
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_a_zero_point)17146 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_a_zero_point) {
17147 TEST_REQUIRES_X86_XOP;
17148 for (size_t k = 1; k <= 40; k += 9) {
17149 GemmMicrokernelTester()
17150 .mr(2)
17151 .nr(4)
17152 .kr(2)
17153 .sr(4)
17154 .m(2)
17155 .n(4)
17156 .k(k)
17157 .a_zero_point(0)
17158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17159 }
17160 }
17161
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_b_zero_point)17162 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_b_zero_point) {
17163 TEST_REQUIRES_X86_XOP;
17164 for (size_t k = 1; k <= 40; k += 9) {
17165 GemmMicrokernelTester()
17166 .mr(2)
17167 .nr(4)
17168 .kr(2)
17169 .sr(4)
17170 .m(2)
17171 .n(4)
17172 .k(k)
17173 .b_zero_point(0)
17174 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17175 }
17176 }
17177
TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_zero_point)17178 TEST(QU8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_zero_point) {
17179 TEST_REQUIRES_X86_XOP;
17180 for (size_t k = 1; k <= 40; k += 9) {
17181 GemmMicrokernelTester()
17182 .mr(2)
17183 .nr(4)
17184 .kr(2)
17185 .sr(4)
17186 .m(2)
17187 .n(4)
17188 .k(k)
17189 .a_zero_point(0)
17190 .b_zero_point(0)
17191 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17192 }
17193 }
17194 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17195
17196
17197 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)17198 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
17199 TEST_REQUIRES_X86_AVX;
17200 GemmMicrokernelTester()
17201 .mr(3)
17202 .nr(4)
17203 .kr(2)
17204 .sr(4)
17205 .m(3)
17206 .n(4)
17207 .k(8)
17208 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17209 }
17210
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)17211 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
17212 TEST_REQUIRES_X86_AVX;
17213 GemmMicrokernelTester()
17214 .mr(3)
17215 .nr(4)
17216 .kr(2)
17217 .sr(4)
17218 .m(3)
17219 .n(4)
17220 .k(8)
17221 .cn_stride(7)
17222 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17223 }
17224
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_strided_a)17225 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_strided_a) {
17226 TEST_REQUIRES_X86_AVX;
17227 GemmMicrokernelTester()
17228 .mr(3)
17229 .nr(4)
17230 .kr(2)
17231 .sr(4)
17232 .m(3)
17233 .n(4)
17234 .k(8)
17235 .a_stride(11)
17236 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17237 }
17238
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)17239 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
17240 TEST_REQUIRES_X86_AVX;
17241 for (uint32_t n = 1; n <= 4; n++) {
17242 for (uint32_t m = 1; m <= 3; m++) {
17243 GemmMicrokernelTester()
17244 .mr(3)
17245 .nr(4)
17246 .kr(2)
17247 .sr(4)
17248 .m(m)
17249 .n(n)
17250 .k(8)
17251 .iterations(1)
17252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17253 }
17254 }
17255 }
17256
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)17257 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
17258 TEST_REQUIRES_X86_AVX;
17259 for (uint32_t m = 1; m <= 3; m++) {
17260 GemmMicrokernelTester()
17261 .mr(3)
17262 .nr(4)
17263 .kr(2)
17264 .sr(4)
17265 .m(m)
17266 .n(4)
17267 .k(8)
17268 .iterations(1)
17269 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17270 }
17271 }
17272
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)17273 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
17274 TEST_REQUIRES_X86_AVX;
17275 for (uint32_t n = 1; n <= 4; n++) {
17276 GemmMicrokernelTester()
17277 .mr(3)
17278 .nr(4)
17279 .kr(2)
17280 .sr(4)
17281 .m(3)
17282 .n(n)
17283 .k(8)
17284 .iterations(1)
17285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17286 }
17287 }
17288
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)17289 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
17290 TEST_REQUIRES_X86_AVX;
17291 for (size_t k = 1; k < 8; k++) {
17292 GemmMicrokernelTester()
17293 .mr(3)
17294 .nr(4)
17295 .kr(2)
17296 .sr(4)
17297 .m(3)
17298 .n(4)
17299 .k(k)
17300 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17301 }
17302 }
17303
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_strided_a)17304 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_strided_a) {
17305 TEST_REQUIRES_X86_AVX;
17306 for (size_t k = 1; k < 8; k++) {
17307 GemmMicrokernelTester()
17308 .mr(3)
17309 .nr(4)
17310 .kr(2)
17311 .sr(4)
17312 .m(3)
17313 .n(4)
17314 .k(k)
17315 .a_stride(11)
17316 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17317 }
17318 }
17319
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)17320 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
17321 TEST_REQUIRES_X86_AVX;
17322 for (size_t k = 1; k < 8; k++) {
17323 for (uint32_t n = 1; n <= 4; n++) {
17324 for (uint32_t m = 1; m <= 3; m++) {
17325 GemmMicrokernelTester()
17326 .mr(3)
17327 .nr(4)
17328 .kr(2)
17329 .sr(4)
17330 .m(m)
17331 .n(n)
17332 .k(k)
17333 .iterations(1)
17334 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17335 }
17336 }
17337 }
17338 }
17339
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)17340 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
17341 TEST_REQUIRES_X86_AVX;
17342 for (size_t k = 9; k < 16; k++) {
17343 GemmMicrokernelTester()
17344 .mr(3)
17345 .nr(4)
17346 .kr(2)
17347 .sr(4)
17348 .m(3)
17349 .n(4)
17350 .k(k)
17351 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17352 }
17353 }
17354
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_strided_a)17355 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_strided_a) {
17356 TEST_REQUIRES_X86_AVX;
17357 for (size_t k = 9; k < 16; k++) {
17358 GemmMicrokernelTester()
17359 .mr(3)
17360 .nr(4)
17361 .kr(2)
17362 .sr(4)
17363 .m(3)
17364 .n(4)
17365 .k(k)
17366 .a_stride(19)
17367 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17368 }
17369 }
17370
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)17371 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
17372 TEST_REQUIRES_X86_AVX;
17373 for (size_t k = 9; k < 16; k++) {
17374 for (uint32_t n = 1; n <= 4; n++) {
17375 for (uint32_t m = 1; m <= 3; m++) {
17376 GemmMicrokernelTester()
17377 .mr(3)
17378 .nr(4)
17379 .kr(2)
17380 .sr(4)
17381 .m(m)
17382 .n(n)
17383 .k(k)
17384 .iterations(1)
17385 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17386 }
17387 }
17388 }
17389 }
17390
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)17391 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
17392 TEST_REQUIRES_X86_AVX;
17393 for (size_t k = 16; k <= 80; k += 8) {
17394 GemmMicrokernelTester()
17395 .mr(3)
17396 .nr(4)
17397 .kr(2)
17398 .sr(4)
17399 .m(3)
17400 .n(4)
17401 .k(k)
17402 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17403 }
17404 }
17405
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_strided_a)17406 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_strided_a) {
17407 TEST_REQUIRES_X86_AVX;
17408 for (size_t k = 16; k <= 80; k += 8) {
17409 GemmMicrokernelTester()
17410 .mr(3)
17411 .nr(4)
17412 .kr(2)
17413 .sr(4)
17414 .m(3)
17415 .n(4)
17416 .k(k)
17417 .a_stride(83)
17418 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17419 }
17420 }
17421
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)17422 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
17423 TEST_REQUIRES_X86_AVX;
17424 for (size_t k = 16; k <= 80; k += 8) {
17425 for (uint32_t n = 1; n <= 4; n++) {
17426 for (uint32_t m = 1; m <= 3; m++) {
17427 GemmMicrokernelTester()
17428 .mr(3)
17429 .nr(4)
17430 .kr(2)
17431 .sr(4)
17432 .m(m)
17433 .n(n)
17434 .k(k)
17435 .iterations(1)
17436 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17437 }
17438 }
17439 }
17440 }
17441
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)17442 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
17443 TEST_REQUIRES_X86_AVX;
17444 for (uint32_t n = 5; n < 8; n++) {
17445 for (size_t k = 1; k <= 40; k += 9) {
17446 GemmMicrokernelTester()
17447 .mr(3)
17448 .nr(4)
17449 .kr(2)
17450 .sr(4)
17451 .m(3)
17452 .n(n)
17453 .k(k)
17454 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17455 }
17456 }
17457 }
17458
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)17459 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
17460 TEST_REQUIRES_X86_AVX;
17461 for (uint32_t n = 5; n < 8; n++) {
17462 for (size_t k = 1; k <= 40; k += 9) {
17463 GemmMicrokernelTester()
17464 .mr(3)
17465 .nr(4)
17466 .kr(2)
17467 .sr(4)
17468 .m(3)
17469 .n(n)
17470 .k(k)
17471 .cn_stride(7)
17472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17473 }
17474 }
17475 }
17476
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_a)17477 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_a) {
17478 TEST_REQUIRES_X86_AVX;
17479 for (uint32_t n = 5; n < 8; n++) {
17480 for (size_t k = 1; k <= 40; k += 9) {
17481 GemmMicrokernelTester()
17482 .mr(3)
17483 .nr(4)
17484 .kr(2)
17485 .sr(4)
17486 .m(3)
17487 .n(n)
17488 .k(k)
17489 .a_stride(43)
17490 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17491 }
17492 }
17493 }
17494
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)17495 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
17496 TEST_REQUIRES_X86_AVX;
17497 for (uint32_t n = 5; n < 8; n++) {
17498 for (size_t k = 1; k <= 40; k += 9) {
17499 for (uint32_t m = 1; m <= 3; m++) {
17500 GemmMicrokernelTester()
17501 .mr(3)
17502 .nr(4)
17503 .kr(2)
17504 .sr(4)
17505 .m(m)
17506 .n(n)
17507 .k(k)
17508 .iterations(1)
17509 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17510 }
17511 }
17512 }
17513 }
17514
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)17515 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
17516 TEST_REQUIRES_X86_AVX;
17517 for (uint32_t n = 8; n <= 12; n += 4) {
17518 for (size_t k = 1; k <= 40; k += 9) {
17519 GemmMicrokernelTester()
17520 .mr(3)
17521 .nr(4)
17522 .kr(2)
17523 .sr(4)
17524 .m(3)
17525 .n(n)
17526 .k(k)
17527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17528 }
17529 }
17530 }
17531
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)17532 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
17533 TEST_REQUIRES_X86_AVX;
17534 for (uint32_t n = 8; n <= 12; n += 4) {
17535 for (size_t k = 1; k <= 40; k += 9) {
17536 GemmMicrokernelTester()
17537 .mr(3)
17538 .nr(4)
17539 .kr(2)
17540 .sr(4)
17541 .m(3)
17542 .n(n)
17543 .k(k)
17544 .cn_stride(7)
17545 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17546 }
17547 }
17548 }
17549
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_a)17550 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_a) {
17551 TEST_REQUIRES_X86_AVX;
17552 for (uint32_t n = 8; n <= 12; n += 4) {
17553 for (size_t k = 1; k <= 40; k += 9) {
17554 GemmMicrokernelTester()
17555 .mr(3)
17556 .nr(4)
17557 .kr(2)
17558 .sr(4)
17559 .m(3)
17560 .n(n)
17561 .k(k)
17562 .a_stride(43)
17563 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17564 }
17565 }
17566 }
17567
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)17568 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
17569 TEST_REQUIRES_X86_AVX;
17570 for (uint32_t n = 8; n <= 12; n += 4) {
17571 for (size_t k = 1; k <= 40; k += 9) {
17572 for (uint32_t m = 1; m <= 3; m++) {
17573 GemmMicrokernelTester()
17574 .mr(3)
17575 .nr(4)
17576 .kr(2)
17577 .sr(4)
17578 .m(m)
17579 .n(n)
17580 .k(k)
17581 .iterations(1)
17582 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17583 }
17584 }
17585 }
17586 }
17587
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)17588 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
17589 TEST_REQUIRES_X86_AVX;
17590 for (size_t k = 1; k <= 40; k += 9) {
17591 for (uint32_t n = 1; n <= 4; n++) {
17592 for (uint32_t m = 1; m <= 3; m++) {
17593 GemmMicrokernelTester()
17594 .mr(3)
17595 .nr(4)
17596 .kr(2)
17597 .sr(4)
17598 .m(m)
17599 .n(n)
17600 .k(k)
17601 .cm_stride(7)
17602 .iterations(1)
17603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17604 }
17605 }
17606 }
17607 }
17608
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)17609 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
17610 TEST_REQUIRES_X86_AVX;
17611 GemmMicrokernelTester()
17612 .mr(3)
17613 .nr(4)
17614 .kr(2)
17615 .sr(4)
17616 .m(3)
17617 .n(4)
17618 .k(8)
17619 .qmin(128)
17620 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17621 }
17622
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)17623 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
17624 TEST_REQUIRES_X86_AVX;
17625 GemmMicrokernelTester()
17626 .mr(3)
17627 .nr(4)
17628 .kr(2)
17629 .sr(4)
17630 .m(3)
17631 .n(4)
17632 .k(8)
17633 .qmax(128)
17634 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17635 }
17636
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)17637 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
17638 TEST_REQUIRES_X86_AVX;
17639 GemmMicrokernelTester()
17640 .mr(3)
17641 .nr(4)
17642 .kr(2)
17643 .sr(4)
17644 .m(3)
17645 .n(4)
17646 .k(8)
17647 .cm_stride(7)
17648 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17649 }
17650
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_a_zero_point)17651 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_a_zero_point) {
17652 TEST_REQUIRES_X86_AVX;
17653 for (size_t k = 1; k <= 40; k += 9) {
17654 GemmMicrokernelTester()
17655 .mr(3)
17656 .nr(4)
17657 .kr(2)
17658 .sr(4)
17659 .m(3)
17660 .n(4)
17661 .k(k)
17662 .a_zero_point(0)
17663 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17664 }
17665 }
17666
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_b_zero_point)17667 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_b_zero_point) {
17668 TEST_REQUIRES_X86_AVX;
17669 for (size_t k = 1; k <= 40; k += 9) {
17670 GemmMicrokernelTester()
17671 .mr(3)
17672 .nr(4)
17673 .kr(2)
17674 .sr(4)
17675 .m(3)
17676 .n(4)
17677 .k(k)
17678 .b_zero_point(0)
17679 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17680 }
17681 }
17682
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_zero_point)17683 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_zero_point) {
17684 TEST_REQUIRES_X86_AVX;
17685 for (size_t k = 1; k <= 40; k += 9) {
17686 GemmMicrokernelTester()
17687 .mr(3)
17688 .nr(4)
17689 .kr(2)
17690 .sr(4)
17691 .m(3)
17692 .n(4)
17693 .k(k)
17694 .a_zero_point(0)
17695 .b_zero_point(0)
17696 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17697 }
17698 }
17699 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17700
17701
17702 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)17703 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
17704 TEST_REQUIRES_X86_AVX;
17705 GemmMicrokernelTester()
17706 .mr(4)
17707 .nr(4)
17708 .kr(2)
17709 .sr(4)
17710 .m(4)
17711 .n(4)
17712 .k(8)
17713 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17714 }
17715
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)17716 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
17717 TEST_REQUIRES_X86_AVX;
17718 GemmMicrokernelTester()
17719 .mr(4)
17720 .nr(4)
17721 .kr(2)
17722 .sr(4)
17723 .m(4)
17724 .n(4)
17725 .k(8)
17726 .cn_stride(7)
17727 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17728 }
17729
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_strided_a)17730 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_strided_a) {
17731 TEST_REQUIRES_X86_AVX;
17732 GemmMicrokernelTester()
17733 .mr(4)
17734 .nr(4)
17735 .kr(2)
17736 .sr(4)
17737 .m(4)
17738 .n(4)
17739 .k(8)
17740 .a_stride(11)
17741 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17742 }
17743
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)17744 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
17745 TEST_REQUIRES_X86_AVX;
17746 for (uint32_t n = 1; n <= 4; n++) {
17747 for (uint32_t m = 1; m <= 4; m++) {
17748 GemmMicrokernelTester()
17749 .mr(4)
17750 .nr(4)
17751 .kr(2)
17752 .sr(4)
17753 .m(m)
17754 .n(n)
17755 .k(8)
17756 .iterations(1)
17757 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17758 }
17759 }
17760 }
17761
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)17762 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
17763 TEST_REQUIRES_X86_AVX;
17764 for (uint32_t m = 1; m <= 4; m++) {
17765 GemmMicrokernelTester()
17766 .mr(4)
17767 .nr(4)
17768 .kr(2)
17769 .sr(4)
17770 .m(m)
17771 .n(4)
17772 .k(8)
17773 .iterations(1)
17774 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17775 }
17776 }
17777
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)17778 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
17779 TEST_REQUIRES_X86_AVX;
17780 for (uint32_t n = 1; n <= 4; n++) {
17781 GemmMicrokernelTester()
17782 .mr(4)
17783 .nr(4)
17784 .kr(2)
17785 .sr(4)
17786 .m(4)
17787 .n(n)
17788 .k(8)
17789 .iterations(1)
17790 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17791 }
17792 }
17793
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)17794 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
17795 TEST_REQUIRES_X86_AVX;
17796 for (size_t k = 1; k < 8; k++) {
17797 GemmMicrokernelTester()
17798 .mr(4)
17799 .nr(4)
17800 .kr(2)
17801 .sr(4)
17802 .m(4)
17803 .n(4)
17804 .k(k)
17805 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17806 }
17807 }
17808
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_strided_a)17809 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_strided_a) {
17810 TEST_REQUIRES_X86_AVX;
17811 for (size_t k = 1; k < 8; k++) {
17812 GemmMicrokernelTester()
17813 .mr(4)
17814 .nr(4)
17815 .kr(2)
17816 .sr(4)
17817 .m(4)
17818 .n(4)
17819 .k(k)
17820 .a_stride(11)
17821 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17822 }
17823 }
17824
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)17825 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
17826 TEST_REQUIRES_X86_AVX;
17827 for (size_t k = 1; k < 8; k++) {
17828 for (uint32_t n = 1; n <= 4; n++) {
17829 for (uint32_t m = 1; m <= 4; m++) {
17830 GemmMicrokernelTester()
17831 .mr(4)
17832 .nr(4)
17833 .kr(2)
17834 .sr(4)
17835 .m(m)
17836 .n(n)
17837 .k(k)
17838 .iterations(1)
17839 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17840 }
17841 }
17842 }
17843 }
17844
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)17845 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
17846 TEST_REQUIRES_X86_AVX;
17847 for (size_t k = 9; k < 16; k++) {
17848 GemmMicrokernelTester()
17849 .mr(4)
17850 .nr(4)
17851 .kr(2)
17852 .sr(4)
17853 .m(4)
17854 .n(4)
17855 .k(k)
17856 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17857 }
17858 }
17859
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_strided_a)17860 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_strided_a) {
17861 TEST_REQUIRES_X86_AVX;
17862 for (size_t k = 9; k < 16; k++) {
17863 GemmMicrokernelTester()
17864 .mr(4)
17865 .nr(4)
17866 .kr(2)
17867 .sr(4)
17868 .m(4)
17869 .n(4)
17870 .k(k)
17871 .a_stride(19)
17872 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17873 }
17874 }
17875
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)17876 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
17877 TEST_REQUIRES_X86_AVX;
17878 for (size_t k = 9; k < 16; k++) {
17879 for (uint32_t n = 1; n <= 4; n++) {
17880 for (uint32_t m = 1; m <= 4; m++) {
17881 GemmMicrokernelTester()
17882 .mr(4)
17883 .nr(4)
17884 .kr(2)
17885 .sr(4)
17886 .m(m)
17887 .n(n)
17888 .k(k)
17889 .iterations(1)
17890 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17891 }
17892 }
17893 }
17894 }
17895
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)17896 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
17897 TEST_REQUIRES_X86_AVX;
17898 for (size_t k = 16; k <= 80; k += 8) {
17899 GemmMicrokernelTester()
17900 .mr(4)
17901 .nr(4)
17902 .kr(2)
17903 .sr(4)
17904 .m(4)
17905 .n(4)
17906 .k(k)
17907 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17908 }
17909 }
17910
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_strided_a)17911 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_strided_a) {
17912 TEST_REQUIRES_X86_AVX;
17913 for (size_t k = 16; k <= 80; k += 8) {
17914 GemmMicrokernelTester()
17915 .mr(4)
17916 .nr(4)
17917 .kr(2)
17918 .sr(4)
17919 .m(4)
17920 .n(4)
17921 .k(k)
17922 .a_stride(83)
17923 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17924 }
17925 }
17926
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)17927 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
17928 TEST_REQUIRES_X86_AVX;
17929 for (size_t k = 16; k <= 80; k += 8) {
17930 for (uint32_t n = 1; n <= 4; n++) {
17931 for (uint32_t m = 1; m <= 4; m++) {
17932 GemmMicrokernelTester()
17933 .mr(4)
17934 .nr(4)
17935 .kr(2)
17936 .sr(4)
17937 .m(m)
17938 .n(n)
17939 .k(k)
17940 .iterations(1)
17941 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17942 }
17943 }
17944 }
17945 }
17946
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)17947 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
17948 TEST_REQUIRES_X86_AVX;
17949 for (uint32_t n = 5; n < 8; n++) {
17950 for (size_t k = 1; k <= 40; k += 9) {
17951 GemmMicrokernelTester()
17952 .mr(4)
17953 .nr(4)
17954 .kr(2)
17955 .sr(4)
17956 .m(4)
17957 .n(n)
17958 .k(k)
17959 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17960 }
17961 }
17962 }
17963
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)17964 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
17965 TEST_REQUIRES_X86_AVX;
17966 for (uint32_t n = 5; n < 8; n++) {
17967 for (size_t k = 1; k <= 40; k += 9) {
17968 GemmMicrokernelTester()
17969 .mr(4)
17970 .nr(4)
17971 .kr(2)
17972 .sr(4)
17973 .m(4)
17974 .n(n)
17975 .k(k)
17976 .cn_stride(7)
17977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17978 }
17979 }
17980 }
17981
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_a)17982 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_a) {
17983 TEST_REQUIRES_X86_AVX;
17984 for (uint32_t n = 5; n < 8; n++) {
17985 for (size_t k = 1; k <= 40; k += 9) {
17986 GemmMicrokernelTester()
17987 .mr(4)
17988 .nr(4)
17989 .kr(2)
17990 .sr(4)
17991 .m(4)
17992 .n(n)
17993 .k(k)
17994 .a_stride(43)
17995 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17996 }
17997 }
17998 }
17999
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)18000 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
18001 TEST_REQUIRES_X86_AVX;
18002 for (uint32_t n = 5; n < 8; n++) {
18003 for (size_t k = 1; k <= 40; k += 9) {
18004 for (uint32_t m = 1; m <= 4; m++) {
18005 GemmMicrokernelTester()
18006 .mr(4)
18007 .nr(4)
18008 .kr(2)
18009 .sr(4)
18010 .m(m)
18011 .n(n)
18012 .k(k)
18013 .iterations(1)
18014 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18015 }
18016 }
18017 }
18018 }
18019
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)18020 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
18021 TEST_REQUIRES_X86_AVX;
18022 for (uint32_t n = 8; n <= 12; n += 4) {
18023 for (size_t k = 1; k <= 40; k += 9) {
18024 GemmMicrokernelTester()
18025 .mr(4)
18026 .nr(4)
18027 .kr(2)
18028 .sr(4)
18029 .m(4)
18030 .n(n)
18031 .k(k)
18032 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18033 }
18034 }
18035 }
18036
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)18037 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
18038 TEST_REQUIRES_X86_AVX;
18039 for (uint32_t n = 8; n <= 12; n += 4) {
18040 for (size_t k = 1; k <= 40; k += 9) {
18041 GemmMicrokernelTester()
18042 .mr(4)
18043 .nr(4)
18044 .kr(2)
18045 .sr(4)
18046 .m(4)
18047 .n(n)
18048 .k(k)
18049 .cn_stride(7)
18050 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18051 }
18052 }
18053 }
18054
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_a)18055 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_a) {
18056 TEST_REQUIRES_X86_AVX;
18057 for (uint32_t n = 8; n <= 12; n += 4) {
18058 for (size_t k = 1; k <= 40; k += 9) {
18059 GemmMicrokernelTester()
18060 .mr(4)
18061 .nr(4)
18062 .kr(2)
18063 .sr(4)
18064 .m(4)
18065 .n(n)
18066 .k(k)
18067 .a_stride(43)
18068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18069 }
18070 }
18071 }
18072
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)18073 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
18074 TEST_REQUIRES_X86_AVX;
18075 for (uint32_t n = 8; n <= 12; n += 4) {
18076 for (size_t k = 1; k <= 40; k += 9) {
18077 for (uint32_t m = 1; m <= 4; m++) {
18078 GemmMicrokernelTester()
18079 .mr(4)
18080 .nr(4)
18081 .kr(2)
18082 .sr(4)
18083 .m(m)
18084 .n(n)
18085 .k(k)
18086 .iterations(1)
18087 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18088 }
18089 }
18090 }
18091 }
18092
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)18093 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
18094 TEST_REQUIRES_X86_AVX;
18095 for (size_t k = 1; k <= 40; k += 9) {
18096 for (uint32_t n = 1; n <= 4; n++) {
18097 for (uint32_t m = 1; m <= 4; m++) {
18098 GemmMicrokernelTester()
18099 .mr(4)
18100 .nr(4)
18101 .kr(2)
18102 .sr(4)
18103 .m(m)
18104 .n(n)
18105 .k(k)
18106 .cm_stride(7)
18107 .iterations(1)
18108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18109 }
18110 }
18111 }
18112 }
18113
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)18114 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
18115 TEST_REQUIRES_X86_AVX;
18116 GemmMicrokernelTester()
18117 .mr(4)
18118 .nr(4)
18119 .kr(2)
18120 .sr(4)
18121 .m(4)
18122 .n(4)
18123 .k(8)
18124 .qmin(128)
18125 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18126 }
18127
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)18128 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
18129 TEST_REQUIRES_X86_AVX;
18130 GemmMicrokernelTester()
18131 .mr(4)
18132 .nr(4)
18133 .kr(2)
18134 .sr(4)
18135 .m(4)
18136 .n(4)
18137 .k(8)
18138 .qmax(128)
18139 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18140 }
18141
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)18142 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
18143 TEST_REQUIRES_X86_AVX;
18144 GemmMicrokernelTester()
18145 .mr(4)
18146 .nr(4)
18147 .kr(2)
18148 .sr(4)
18149 .m(4)
18150 .n(4)
18151 .k(8)
18152 .cm_stride(7)
18153 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18154 }
18155
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_a_zero_point)18156 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_a_zero_point) {
18157 TEST_REQUIRES_X86_AVX;
18158 for (size_t k = 1; k <= 40; k += 9) {
18159 GemmMicrokernelTester()
18160 .mr(4)
18161 .nr(4)
18162 .kr(2)
18163 .sr(4)
18164 .m(4)
18165 .n(4)
18166 .k(k)
18167 .a_zero_point(0)
18168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18169 }
18170 }
18171
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_b_zero_point)18172 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_b_zero_point) {
18173 TEST_REQUIRES_X86_AVX;
18174 for (size_t k = 1; k <= 40; k += 9) {
18175 GemmMicrokernelTester()
18176 .mr(4)
18177 .nr(4)
18178 .kr(2)
18179 .sr(4)
18180 .m(4)
18181 .n(4)
18182 .k(k)
18183 .b_zero_point(0)
18184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18185 }
18186 }
18187
TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_zero_point)18188 TEST(QU8_GEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_zero_point) {
18189 TEST_REQUIRES_X86_AVX;
18190 for (size_t k = 1; k <= 40; k += 9) {
18191 GemmMicrokernelTester()
18192 .mr(4)
18193 .nr(4)
18194 .kr(2)
18195 .sr(4)
18196 .m(4)
18197 .n(4)
18198 .k(k)
18199 .a_zero_point(0)
18200 .b_zero_point(0)
18201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18202 }
18203 }
18204 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18205
18206
18207 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)18208 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
18209 TEST_REQUIRES_X86_SSE2;
18210 GemmMicrokernelTester()
18211 .mr(1)
18212 .nr(4)
18213 .kr(8)
18214 .sr(1)
18215 .m(1)
18216 .n(4)
18217 .k(8)
18218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18219 }
18220
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)18221 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
18222 TEST_REQUIRES_X86_SSE2;
18223 GemmMicrokernelTester()
18224 .mr(1)
18225 .nr(4)
18226 .kr(8)
18227 .sr(1)
18228 .m(1)
18229 .n(4)
18230 .k(8)
18231 .cn_stride(7)
18232 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18233 }
18234
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_strided_a)18235 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
18236 TEST_REQUIRES_X86_SSE2;
18237 GemmMicrokernelTester()
18238 .mr(1)
18239 .nr(4)
18240 .kr(8)
18241 .sr(1)
18242 .m(1)
18243 .n(4)
18244 .k(8)
18245 .a_stride(11)
18246 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18247 }
18248
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)18249 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
18250 TEST_REQUIRES_X86_SSE2;
18251 for (uint32_t n = 1; n <= 4; n++) {
18252 for (uint32_t m = 1; m <= 1; m++) {
18253 GemmMicrokernelTester()
18254 .mr(1)
18255 .nr(4)
18256 .kr(8)
18257 .sr(1)
18258 .m(m)
18259 .n(n)
18260 .k(8)
18261 .iterations(1)
18262 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18263 }
18264 }
18265 }
18266
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)18267 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
18268 TEST_REQUIRES_X86_SSE2;
18269 for (uint32_t m = 1; m <= 1; m++) {
18270 GemmMicrokernelTester()
18271 .mr(1)
18272 .nr(4)
18273 .kr(8)
18274 .sr(1)
18275 .m(m)
18276 .n(4)
18277 .k(8)
18278 .iterations(1)
18279 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18280 }
18281 }
18282
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)18283 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
18284 TEST_REQUIRES_X86_SSE2;
18285 for (uint32_t n = 1; n <= 4; n++) {
18286 GemmMicrokernelTester()
18287 .mr(1)
18288 .nr(4)
18289 .kr(8)
18290 .sr(1)
18291 .m(1)
18292 .n(n)
18293 .k(8)
18294 .iterations(1)
18295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18296 }
18297 }
18298
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)18299 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
18300 TEST_REQUIRES_X86_SSE2;
18301 for (size_t k = 1; k < 8; k++) {
18302 GemmMicrokernelTester()
18303 .mr(1)
18304 .nr(4)
18305 .kr(8)
18306 .sr(1)
18307 .m(1)
18308 .n(4)
18309 .k(k)
18310 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18311 }
18312 }
18313
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_strided_a)18314 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_strided_a) {
18315 TEST_REQUIRES_X86_SSE2;
18316 for (size_t k = 1; k < 8; k++) {
18317 GemmMicrokernelTester()
18318 .mr(1)
18319 .nr(4)
18320 .kr(8)
18321 .sr(1)
18322 .m(1)
18323 .n(4)
18324 .k(k)
18325 .a_stride(11)
18326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18327 }
18328 }
18329
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)18330 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
18331 TEST_REQUIRES_X86_SSE2;
18332 for (size_t k = 1; k < 8; k++) {
18333 for (uint32_t n = 1; n <= 4; n++) {
18334 for (uint32_t m = 1; m <= 1; m++) {
18335 GemmMicrokernelTester()
18336 .mr(1)
18337 .nr(4)
18338 .kr(8)
18339 .sr(1)
18340 .m(m)
18341 .n(n)
18342 .k(k)
18343 .iterations(1)
18344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18345 }
18346 }
18347 }
18348 }
18349
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)18350 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
18351 TEST_REQUIRES_X86_SSE2;
18352 for (size_t k = 9; k < 16; k++) {
18353 GemmMicrokernelTester()
18354 .mr(1)
18355 .nr(4)
18356 .kr(8)
18357 .sr(1)
18358 .m(1)
18359 .n(4)
18360 .k(k)
18361 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18362 }
18363 }
18364
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_strided_a)18365 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_strided_a) {
18366 TEST_REQUIRES_X86_SSE2;
18367 for (size_t k = 9; k < 16; k++) {
18368 GemmMicrokernelTester()
18369 .mr(1)
18370 .nr(4)
18371 .kr(8)
18372 .sr(1)
18373 .m(1)
18374 .n(4)
18375 .k(k)
18376 .a_stride(19)
18377 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18378 }
18379 }
18380
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)18381 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
18382 TEST_REQUIRES_X86_SSE2;
18383 for (size_t k = 9; k < 16; k++) {
18384 for (uint32_t n = 1; n <= 4; n++) {
18385 for (uint32_t m = 1; m <= 1; m++) {
18386 GemmMicrokernelTester()
18387 .mr(1)
18388 .nr(4)
18389 .kr(8)
18390 .sr(1)
18391 .m(m)
18392 .n(n)
18393 .k(k)
18394 .iterations(1)
18395 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18396 }
18397 }
18398 }
18399 }
18400
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)18401 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
18402 TEST_REQUIRES_X86_SSE2;
18403 for (size_t k = 16; k <= 80; k += 8) {
18404 GemmMicrokernelTester()
18405 .mr(1)
18406 .nr(4)
18407 .kr(8)
18408 .sr(1)
18409 .m(1)
18410 .n(4)
18411 .k(k)
18412 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18413 }
18414 }
18415
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_strided_a)18416 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_strided_a) {
18417 TEST_REQUIRES_X86_SSE2;
18418 for (size_t k = 16; k <= 80; k += 8) {
18419 GemmMicrokernelTester()
18420 .mr(1)
18421 .nr(4)
18422 .kr(8)
18423 .sr(1)
18424 .m(1)
18425 .n(4)
18426 .k(k)
18427 .a_stride(83)
18428 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18429 }
18430 }
18431
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)18432 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
18433 TEST_REQUIRES_X86_SSE2;
18434 for (size_t k = 16; k <= 80; k += 8) {
18435 for (uint32_t n = 1; n <= 4; n++) {
18436 for (uint32_t m = 1; m <= 1; m++) {
18437 GemmMicrokernelTester()
18438 .mr(1)
18439 .nr(4)
18440 .kr(8)
18441 .sr(1)
18442 .m(m)
18443 .n(n)
18444 .k(k)
18445 .iterations(1)
18446 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18447 }
18448 }
18449 }
18450 }
18451
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)18452 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
18453 TEST_REQUIRES_X86_SSE2;
18454 for (uint32_t n = 5; n < 8; n++) {
18455 for (size_t k = 1; k <= 40; k += 9) {
18456 GemmMicrokernelTester()
18457 .mr(1)
18458 .nr(4)
18459 .kr(8)
18460 .sr(1)
18461 .m(1)
18462 .n(n)
18463 .k(k)
18464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18465 }
18466 }
18467 }
18468
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)18469 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
18470 TEST_REQUIRES_X86_SSE2;
18471 for (uint32_t n = 5; n < 8; n++) {
18472 for (size_t k = 1; k <= 40; k += 9) {
18473 GemmMicrokernelTester()
18474 .mr(1)
18475 .nr(4)
18476 .kr(8)
18477 .sr(1)
18478 .m(1)
18479 .n(n)
18480 .k(k)
18481 .cn_stride(7)
18482 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18483 }
18484 }
18485 }
18486
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_a)18487 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_a) {
18488 TEST_REQUIRES_X86_SSE2;
18489 for (uint32_t n = 5; n < 8; n++) {
18490 for (size_t k = 1; k <= 40; k += 9) {
18491 GemmMicrokernelTester()
18492 .mr(1)
18493 .nr(4)
18494 .kr(8)
18495 .sr(1)
18496 .m(1)
18497 .n(n)
18498 .k(k)
18499 .a_stride(43)
18500 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18501 }
18502 }
18503 }
18504
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)18505 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
18506 TEST_REQUIRES_X86_SSE2;
18507 for (uint32_t n = 5; n < 8; n++) {
18508 for (size_t k = 1; k <= 40; k += 9) {
18509 for (uint32_t m = 1; m <= 1; m++) {
18510 GemmMicrokernelTester()
18511 .mr(1)
18512 .nr(4)
18513 .kr(8)
18514 .sr(1)
18515 .m(m)
18516 .n(n)
18517 .k(k)
18518 .iterations(1)
18519 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18520 }
18521 }
18522 }
18523 }
18524
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)18525 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
18526 TEST_REQUIRES_X86_SSE2;
18527 for (uint32_t n = 8; n <= 12; n += 4) {
18528 for (size_t k = 1; k <= 40; k += 9) {
18529 GemmMicrokernelTester()
18530 .mr(1)
18531 .nr(4)
18532 .kr(8)
18533 .sr(1)
18534 .m(1)
18535 .n(n)
18536 .k(k)
18537 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18538 }
18539 }
18540 }
18541
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)18542 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
18543 TEST_REQUIRES_X86_SSE2;
18544 for (uint32_t n = 8; n <= 12; n += 4) {
18545 for (size_t k = 1; k <= 40; k += 9) {
18546 GemmMicrokernelTester()
18547 .mr(1)
18548 .nr(4)
18549 .kr(8)
18550 .sr(1)
18551 .m(1)
18552 .n(n)
18553 .k(k)
18554 .cn_stride(7)
18555 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18556 }
18557 }
18558 }
18559
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_a)18560 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_a) {
18561 TEST_REQUIRES_X86_SSE2;
18562 for (uint32_t n = 8; n <= 12; n += 4) {
18563 for (size_t k = 1; k <= 40; k += 9) {
18564 GemmMicrokernelTester()
18565 .mr(1)
18566 .nr(4)
18567 .kr(8)
18568 .sr(1)
18569 .m(1)
18570 .n(n)
18571 .k(k)
18572 .a_stride(43)
18573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18574 }
18575 }
18576 }
18577
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)18578 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
18579 TEST_REQUIRES_X86_SSE2;
18580 for (uint32_t n = 8; n <= 12; n += 4) {
18581 for (size_t k = 1; k <= 40; k += 9) {
18582 for (uint32_t m = 1; m <= 1; m++) {
18583 GemmMicrokernelTester()
18584 .mr(1)
18585 .nr(4)
18586 .kr(8)
18587 .sr(1)
18588 .m(m)
18589 .n(n)
18590 .k(k)
18591 .iterations(1)
18592 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18593 }
18594 }
18595 }
18596 }
18597
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)18598 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
18599 TEST_REQUIRES_X86_SSE2;
18600 for (size_t k = 1; k <= 40; k += 9) {
18601 for (uint32_t n = 1; n <= 4; n++) {
18602 for (uint32_t m = 1; m <= 1; m++) {
18603 GemmMicrokernelTester()
18604 .mr(1)
18605 .nr(4)
18606 .kr(8)
18607 .sr(1)
18608 .m(m)
18609 .n(n)
18610 .k(k)
18611 .cm_stride(7)
18612 .iterations(1)
18613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18614 }
18615 }
18616 }
18617 }
18618
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)18619 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
18620 TEST_REQUIRES_X86_SSE2;
18621 GemmMicrokernelTester()
18622 .mr(1)
18623 .nr(4)
18624 .kr(8)
18625 .sr(1)
18626 .m(1)
18627 .n(4)
18628 .k(8)
18629 .qmin(128)
18630 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18631 }
18632
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)18633 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
18634 TEST_REQUIRES_X86_SSE2;
18635 GemmMicrokernelTester()
18636 .mr(1)
18637 .nr(4)
18638 .kr(8)
18639 .sr(1)
18640 .m(1)
18641 .n(4)
18642 .k(8)
18643 .qmax(128)
18644 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18645 }
18646
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)18647 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
18648 TEST_REQUIRES_X86_SSE2;
18649 GemmMicrokernelTester()
18650 .mr(1)
18651 .nr(4)
18652 .kr(8)
18653 .sr(1)
18654 .m(1)
18655 .n(4)
18656 .k(8)
18657 .cm_stride(7)
18658 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18659 }
18660
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_a_zero_point)18661 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
18662 TEST_REQUIRES_X86_SSE2;
18663 for (size_t k = 1; k <= 40; k += 9) {
18664 GemmMicrokernelTester()
18665 .mr(1)
18666 .nr(4)
18667 .kr(8)
18668 .sr(1)
18669 .m(1)
18670 .n(4)
18671 .k(k)
18672 .a_zero_point(0)
18673 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18674 }
18675 }
18676
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_b_zero_point)18677 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
18678 TEST_REQUIRES_X86_SSE2;
18679 for (size_t k = 1; k <= 40; k += 9) {
18680 GemmMicrokernelTester()
18681 .mr(1)
18682 .nr(4)
18683 .kr(8)
18684 .sr(1)
18685 .m(1)
18686 .n(4)
18687 .k(k)
18688 .b_zero_point(0)
18689 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18690 }
18691 }
18692
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_zero_point)18693 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
18694 TEST_REQUIRES_X86_SSE2;
18695 for (size_t k = 1; k <= 40; k += 9) {
18696 GemmMicrokernelTester()
18697 .mr(1)
18698 .nr(4)
18699 .kr(8)
18700 .sr(1)
18701 .m(1)
18702 .n(4)
18703 .k(k)
18704 .a_zero_point(0)
18705 .b_zero_point(0)
18706 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18707 }
18708 }
18709 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18710
18711
18712 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)18713 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
18714 TEST_REQUIRES_X86_SSE2;
18715 GemmMicrokernelTester()
18716 .mr(2)
18717 .nr(4)
18718 .kr(8)
18719 .sr(1)
18720 .m(2)
18721 .n(4)
18722 .k(8)
18723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18724 }
18725
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)18726 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
18727 TEST_REQUIRES_X86_SSE2;
18728 GemmMicrokernelTester()
18729 .mr(2)
18730 .nr(4)
18731 .kr(8)
18732 .sr(1)
18733 .m(2)
18734 .n(4)
18735 .k(8)
18736 .cn_stride(7)
18737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18738 }
18739
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_strided_a)18740 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
18741 TEST_REQUIRES_X86_SSE2;
18742 GemmMicrokernelTester()
18743 .mr(2)
18744 .nr(4)
18745 .kr(8)
18746 .sr(1)
18747 .m(2)
18748 .n(4)
18749 .k(8)
18750 .a_stride(11)
18751 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18752 }
18753
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)18754 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
18755 TEST_REQUIRES_X86_SSE2;
18756 for (uint32_t n = 1; n <= 4; n++) {
18757 for (uint32_t m = 1; m <= 2; m++) {
18758 GemmMicrokernelTester()
18759 .mr(2)
18760 .nr(4)
18761 .kr(8)
18762 .sr(1)
18763 .m(m)
18764 .n(n)
18765 .k(8)
18766 .iterations(1)
18767 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18768 }
18769 }
18770 }
18771
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)18772 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
18773 TEST_REQUIRES_X86_SSE2;
18774 for (uint32_t m = 1; m <= 2; m++) {
18775 GemmMicrokernelTester()
18776 .mr(2)
18777 .nr(4)
18778 .kr(8)
18779 .sr(1)
18780 .m(m)
18781 .n(4)
18782 .k(8)
18783 .iterations(1)
18784 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18785 }
18786 }
18787
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)18788 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
18789 TEST_REQUIRES_X86_SSE2;
18790 for (uint32_t n = 1; n <= 4; n++) {
18791 GemmMicrokernelTester()
18792 .mr(2)
18793 .nr(4)
18794 .kr(8)
18795 .sr(1)
18796 .m(2)
18797 .n(n)
18798 .k(8)
18799 .iterations(1)
18800 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18801 }
18802 }
18803
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)18804 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
18805 TEST_REQUIRES_X86_SSE2;
18806 for (size_t k = 1; k < 8; k++) {
18807 GemmMicrokernelTester()
18808 .mr(2)
18809 .nr(4)
18810 .kr(8)
18811 .sr(1)
18812 .m(2)
18813 .n(4)
18814 .k(k)
18815 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18816 }
18817 }
18818
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_strided_a)18819 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_strided_a) {
18820 TEST_REQUIRES_X86_SSE2;
18821 for (size_t k = 1; k < 8; k++) {
18822 GemmMicrokernelTester()
18823 .mr(2)
18824 .nr(4)
18825 .kr(8)
18826 .sr(1)
18827 .m(2)
18828 .n(4)
18829 .k(k)
18830 .a_stride(11)
18831 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18832 }
18833 }
18834
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)18835 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
18836 TEST_REQUIRES_X86_SSE2;
18837 for (size_t k = 1; k < 8; k++) {
18838 for (uint32_t n = 1; n <= 4; n++) {
18839 for (uint32_t m = 1; m <= 2; m++) {
18840 GemmMicrokernelTester()
18841 .mr(2)
18842 .nr(4)
18843 .kr(8)
18844 .sr(1)
18845 .m(m)
18846 .n(n)
18847 .k(k)
18848 .iterations(1)
18849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18850 }
18851 }
18852 }
18853 }
18854
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)18855 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
18856 TEST_REQUIRES_X86_SSE2;
18857 for (size_t k = 9; k < 16; k++) {
18858 GemmMicrokernelTester()
18859 .mr(2)
18860 .nr(4)
18861 .kr(8)
18862 .sr(1)
18863 .m(2)
18864 .n(4)
18865 .k(k)
18866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18867 }
18868 }
18869
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_strided_a)18870 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_strided_a) {
18871 TEST_REQUIRES_X86_SSE2;
18872 for (size_t k = 9; k < 16; k++) {
18873 GemmMicrokernelTester()
18874 .mr(2)
18875 .nr(4)
18876 .kr(8)
18877 .sr(1)
18878 .m(2)
18879 .n(4)
18880 .k(k)
18881 .a_stride(19)
18882 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18883 }
18884 }
18885
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)18886 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
18887 TEST_REQUIRES_X86_SSE2;
18888 for (size_t k = 9; k < 16; k++) {
18889 for (uint32_t n = 1; n <= 4; n++) {
18890 for (uint32_t m = 1; m <= 2; m++) {
18891 GemmMicrokernelTester()
18892 .mr(2)
18893 .nr(4)
18894 .kr(8)
18895 .sr(1)
18896 .m(m)
18897 .n(n)
18898 .k(k)
18899 .iterations(1)
18900 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18901 }
18902 }
18903 }
18904 }
18905
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)18906 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
18907 TEST_REQUIRES_X86_SSE2;
18908 for (size_t k = 16; k <= 80; k += 8) {
18909 GemmMicrokernelTester()
18910 .mr(2)
18911 .nr(4)
18912 .kr(8)
18913 .sr(1)
18914 .m(2)
18915 .n(4)
18916 .k(k)
18917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18918 }
18919 }
18920
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_strided_a)18921 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_strided_a) {
18922 TEST_REQUIRES_X86_SSE2;
18923 for (size_t k = 16; k <= 80; k += 8) {
18924 GemmMicrokernelTester()
18925 .mr(2)
18926 .nr(4)
18927 .kr(8)
18928 .sr(1)
18929 .m(2)
18930 .n(4)
18931 .k(k)
18932 .a_stride(83)
18933 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18934 }
18935 }
18936
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)18937 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
18938 TEST_REQUIRES_X86_SSE2;
18939 for (size_t k = 16; k <= 80; k += 8) {
18940 for (uint32_t n = 1; n <= 4; n++) {
18941 for (uint32_t m = 1; m <= 2; m++) {
18942 GemmMicrokernelTester()
18943 .mr(2)
18944 .nr(4)
18945 .kr(8)
18946 .sr(1)
18947 .m(m)
18948 .n(n)
18949 .k(k)
18950 .iterations(1)
18951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18952 }
18953 }
18954 }
18955 }
18956
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)18957 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
18958 TEST_REQUIRES_X86_SSE2;
18959 for (uint32_t n = 5; n < 8; n++) {
18960 for (size_t k = 1; k <= 40; k += 9) {
18961 GemmMicrokernelTester()
18962 .mr(2)
18963 .nr(4)
18964 .kr(8)
18965 .sr(1)
18966 .m(2)
18967 .n(n)
18968 .k(k)
18969 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18970 }
18971 }
18972 }
18973
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)18974 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
18975 TEST_REQUIRES_X86_SSE2;
18976 for (uint32_t n = 5; n < 8; n++) {
18977 for (size_t k = 1; k <= 40; k += 9) {
18978 GemmMicrokernelTester()
18979 .mr(2)
18980 .nr(4)
18981 .kr(8)
18982 .sr(1)
18983 .m(2)
18984 .n(n)
18985 .k(k)
18986 .cn_stride(7)
18987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18988 }
18989 }
18990 }
18991
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_a)18992 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_a) {
18993 TEST_REQUIRES_X86_SSE2;
18994 for (uint32_t n = 5; n < 8; n++) {
18995 for (size_t k = 1; k <= 40; k += 9) {
18996 GemmMicrokernelTester()
18997 .mr(2)
18998 .nr(4)
18999 .kr(8)
19000 .sr(1)
19001 .m(2)
19002 .n(n)
19003 .k(k)
19004 .a_stride(43)
19005 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19006 }
19007 }
19008 }
19009
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)19010 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
19011 TEST_REQUIRES_X86_SSE2;
19012 for (uint32_t n = 5; n < 8; n++) {
19013 for (size_t k = 1; k <= 40; k += 9) {
19014 for (uint32_t m = 1; m <= 2; m++) {
19015 GemmMicrokernelTester()
19016 .mr(2)
19017 .nr(4)
19018 .kr(8)
19019 .sr(1)
19020 .m(m)
19021 .n(n)
19022 .k(k)
19023 .iterations(1)
19024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19025 }
19026 }
19027 }
19028 }
19029
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)19030 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
19031 TEST_REQUIRES_X86_SSE2;
19032 for (uint32_t n = 8; n <= 12; n += 4) {
19033 for (size_t k = 1; k <= 40; k += 9) {
19034 GemmMicrokernelTester()
19035 .mr(2)
19036 .nr(4)
19037 .kr(8)
19038 .sr(1)
19039 .m(2)
19040 .n(n)
19041 .k(k)
19042 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19043 }
19044 }
19045 }
19046
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)19047 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
19048 TEST_REQUIRES_X86_SSE2;
19049 for (uint32_t n = 8; n <= 12; n += 4) {
19050 for (size_t k = 1; k <= 40; k += 9) {
19051 GemmMicrokernelTester()
19052 .mr(2)
19053 .nr(4)
19054 .kr(8)
19055 .sr(1)
19056 .m(2)
19057 .n(n)
19058 .k(k)
19059 .cn_stride(7)
19060 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19061 }
19062 }
19063 }
19064
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_a)19065 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_a) {
19066 TEST_REQUIRES_X86_SSE2;
19067 for (uint32_t n = 8; n <= 12; n += 4) {
19068 for (size_t k = 1; k <= 40; k += 9) {
19069 GemmMicrokernelTester()
19070 .mr(2)
19071 .nr(4)
19072 .kr(8)
19073 .sr(1)
19074 .m(2)
19075 .n(n)
19076 .k(k)
19077 .a_stride(43)
19078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19079 }
19080 }
19081 }
19082
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)19083 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
19084 TEST_REQUIRES_X86_SSE2;
19085 for (uint32_t n = 8; n <= 12; n += 4) {
19086 for (size_t k = 1; k <= 40; k += 9) {
19087 for (uint32_t m = 1; m <= 2; m++) {
19088 GemmMicrokernelTester()
19089 .mr(2)
19090 .nr(4)
19091 .kr(8)
19092 .sr(1)
19093 .m(m)
19094 .n(n)
19095 .k(k)
19096 .iterations(1)
19097 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19098 }
19099 }
19100 }
19101 }
19102
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)19103 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
19104 TEST_REQUIRES_X86_SSE2;
19105 for (size_t k = 1; k <= 40; k += 9) {
19106 for (uint32_t n = 1; n <= 4; n++) {
19107 for (uint32_t m = 1; m <= 2; m++) {
19108 GemmMicrokernelTester()
19109 .mr(2)
19110 .nr(4)
19111 .kr(8)
19112 .sr(1)
19113 .m(m)
19114 .n(n)
19115 .k(k)
19116 .cm_stride(7)
19117 .iterations(1)
19118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19119 }
19120 }
19121 }
19122 }
19123
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)19124 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
19125 TEST_REQUIRES_X86_SSE2;
19126 GemmMicrokernelTester()
19127 .mr(2)
19128 .nr(4)
19129 .kr(8)
19130 .sr(1)
19131 .m(2)
19132 .n(4)
19133 .k(8)
19134 .qmin(128)
19135 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19136 }
19137
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)19138 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
19139 TEST_REQUIRES_X86_SSE2;
19140 GemmMicrokernelTester()
19141 .mr(2)
19142 .nr(4)
19143 .kr(8)
19144 .sr(1)
19145 .m(2)
19146 .n(4)
19147 .k(8)
19148 .qmax(128)
19149 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19150 }
19151
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)19152 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
19153 TEST_REQUIRES_X86_SSE2;
19154 GemmMicrokernelTester()
19155 .mr(2)
19156 .nr(4)
19157 .kr(8)
19158 .sr(1)
19159 .m(2)
19160 .n(4)
19161 .k(8)
19162 .cm_stride(7)
19163 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19164 }
19165
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_a_zero_point)19166 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
19167 TEST_REQUIRES_X86_SSE2;
19168 for (size_t k = 1; k <= 40; k += 9) {
19169 GemmMicrokernelTester()
19170 .mr(2)
19171 .nr(4)
19172 .kr(8)
19173 .sr(1)
19174 .m(2)
19175 .n(4)
19176 .k(k)
19177 .a_zero_point(0)
19178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19179 }
19180 }
19181
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_b_zero_point)19182 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
19183 TEST_REQUIRES_X86_SSE2;
19184 for (size_t k = 1; k <= 40; k += 9) {
19185 GemmMicrokernelTester()
19186 .mr(2)
19187 .nr(4)
19188 .kr(8)
19189 .sr(1)
19190 .m(2)
19191 .n(4)
19192 .k(k)
19193 .b_zero_point(0)
19194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19195 }
19196 }
19197
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_zero_point)19198 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
19199 TEST_REQUIRES_X86_SSE2;
19200 for (size_t k = 1; k <= 40; k += 9) {
19201 GemmMicrokernelTester()
19202 .mr(2)
19203 .nr(4)
19204 .kr(8)
19205 .sr(1)
19206 .m(2)
19207 .n(4)
19208 .k(k)
19209 .a_zero_point(0)
19210 .b_zero_point(0)
19211 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19212 }
19213 }
19214 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19215
19216
19217 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)19218 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
19219 TEST_REQUIRES_X86_SSE41;
19220 GemmMicrokernelTester()
19221 .mr(3)
19222 .nr(4)
19223 .kr(8)
19224 .sr(1)
19225 .m(3)
19226 .n(4)
19227 .k(8)
19228 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19229 }
19230
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)19231 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
19232 TEST_REQUIRES_X86_SSE41;
19233 GemmMicrokernelTester()
19234 .mr(3)
19235 .nr(4)
19236 .kr(8)
19237 .sr(1)
19238 .m(3)
19239 .n(4)
19240 .k(8)
19241 .cn_stride(7)
19242 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19243 }
19244
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_strided_a)19245 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
19246 TEST_REQUIRES_X86_SSE41;
19247 GemmMicrokernelTester()
19248 .mr(3)
19249 .nr(4)
19250 .kr(8)
19251 .sr(1)
19252 .m(3)
19253 .n(4)
19254 .k(8)
19255 .a_stride(11)
19256 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19257 }
19258
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)19259 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
19260 TEST_REQUIRES_X86_SSE41;
19261 for (uint32_t n = 1; n <= 4; n++) {
19262 for (uint32_t m = 1; m <= 3; m++) {
19263 GemmMicrokernelTester()
19264 .mr(3)
19265 .nr(4)
19266 .kr(8)
19267 .sr(1)
19268 .m(m)
19269 .n(n)
19270 .k(8)
19271 .iterations(1)
19272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19273 }
19274 }
19275 }
19276
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)19277 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
19278 TEST_REQUIRES_X86_SSE41;
19279 for (uint32_t m = 1; m <= 3; m++) {
19280 GemmMicrokernelTester()
19281 .mr(3)
19282 .nr(4)
19283 .kr(8)
19284 .sr(1)
19285 .m(m)
19286 .n(4)
19287 .k(8)
19288 .iterations(1)
19289 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19290 }
19291 }
19292
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)19293 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
19294 TEST_REQUIRES_X86_SSE41;
19295 for (uint32_t n = 1; n <= 4; n++) {
19296 GemmMicrokernelTester()
19297 .mr(3)
19298 .nr(4)
19299 .kr(8)
19300 .sr(1)
19301 .m(3)
19302 .n(n)
19303 .k(8)
19304 .iterations(1)
19305 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19306 }
19307 }
19308
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)19309 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
19310 TEST_REQUIRES_X86_SSE41;
19311 for (size_t k = 1; k < 8; k++) {
19312 GemmMicrokernelTester()
19313 .mr(3)
19314 .nr(4)
19315 .kr(8)
19316 .sr(1)
19317 .m(3)
19318 .n(4)
19319 .k(k)
19320 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19321 }
19322 }
19323
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_strided_a)19324 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_strided_a) {
19325 TEST_REQUIRES_X86_SSE41;
19326 for (size_t k = 1; k < 8; k++) {
19327 GemmMicrokernelTester()
19328 .mr(3)
19329 .nr(4)
19330 .kr(8)
19331 .sr(1)
19332 .m(3)
19333 .n(4)
19334 .k(k)
19335 .a_stride(11)
19336 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19337 }
19338 }
19339
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)19340 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
19341 TEST_REQUIRES_X86_SSE41;
19342 for (size_t k = 1; k < 8; k++) {
19343 for (uint32_t n = 1; n <= 4; n++) {
19344 for (uint32_t m = 1; m <= 3; m++) {
19345 GemmMicrokernelTester()
19346 .mr(3)
19347 .nr(4)
19348 .kr(8)
19349 .sr(1)
19350 .m(m)
19351 .n(n)
19352 .k(k)
19353 .iterations(1)
19354 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19355 }
19356 }
19357 }
19358 }
19359
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)19360 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
19361 TEST_REQUIRES_X86_SSE41;
19362 for (size_t k = 9; k < 16; k++) {
19363 GemmMicrokernelTester()
19364 .mr(3)
19365 .nr(4)
19366 .kr(8)
19367 .sr(1)
19368 .m(3)
19369 .n(4)
19370 .k(k)
19371 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19372 }
19373 }
19374
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_strided_a)19375 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_strided_a) {
19376 TEST_REQUIRES_X86_SSE41;
19377 for (size_t k = 9; k < 16; k++) {
19378 GemmMicrokernelTester()
19379 .mr(3)
19380 .nr(4)
19381 .kr(8)
19382 .sr(1)
19383 .m(3)
19384 .n(4)
19385 .k(k)
19386 .a_stride(19)
19387 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19388 }
19389 }
19390
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)19391 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
19392 TEST_REQUIRES_X86_SSE41;
19393 for (size_t k = 9; k < 16; k++) {
19394 for (uint32_t n = 1; n <= 4; n++) {
19395 for (uint32_t m = 1; m <= 3; m++) {
19396 GemmMicrokernelTester()
19397 .mr(3)
19398 .nr(4)
19399 .kr(8)
19400 .sr(1)
19401 .m(m)
19402 .n(n)
19403 .k(k)
19404 .iterations(1)
19405 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19406 }
19407 }
19408 }
19409 }
19410
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)19411 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
19412 TEST_REQUIRES_X86_SSE41;
19413 for (size_t k = 16; k <= 80; k += 8) {
19414 GemmMicrokernelTester()
19415 .mr(3)
19416 .nr(4)
19417 .kr(8)
19418 .sr(1)
19419 .m(3)
19420 .n(4)
19421 .k(k)
19422 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19423 }
19424 }
19425
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_strided_a)19426 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_strided_a) {
19427 TEST_REQUIRES_X86_SSE41;
19428 for (size_t k = 16; k <= 80; k += 8) {
19429 GemmMicrokernelTester()
19430 .mr(3)
19431 .nr(4)
19432 .kr(8)
19433 .sr(1)
19434 .m(3)
19435 .n(4)
19436 .k(k)
19437 .a_stride(83)
19438 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19439 }
19440 }
19441
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)19442 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
19443 TEST_REQUIRES_X86_SSE41;
19444 for (size_t k = 16; k <= 80; k += 8) {
19445 for (uint32_t n = 1; n <= 4; n++) {
19446 for (uint32_t m = 1; m <= 3; m++) {
19447 GemmMicrokernelTester()
19448 .mr(3)
19449 .nr(4)
19450 .kr(8)
19451 .sr(1)
19452 .m(m)
19453 .n(n)
19454 .k(k)
19455 .iterations(1)
19456 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19457 }
19458 }
19459 }
19460 }
19461
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)19462 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
19463 TEST_REQUIRES_X86_SSE41;
19464 for (uint32_t n = 5; n < 8; n++) {
19465 for (size_t k = 1; k <= 40; k += 9) {
19466 GemmMicrokernelTester()
19467 .mr(3)
19468 .nr(4)
19469 .kr(8)
19470 .sr(1)
19471 .m(3)
19472 .n(n)
19473 .k(k)
19474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19475 }
19476 }
19477 }
19478
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)19479 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
19480 TEST_REQUIRES_X86_SSE41;
19481 for (uint32_t n = 5; n < 8; n++) {
19482 for (size_t k = 1; k <= 40; k += 9) {
19483 GemmMicrokernelTester()
19484 .mr(3)
19485 .nr(4)
19486 .kr(8)
19487 .sr(1)
19488 .m(3)
19489 .n(n)
19490 .k(k)
19491 .cn_stride(7)
19492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19493 }
19494 }
19495 }
19496
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_a)19497 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_a) {
19498 TEST_REQUIRES_X86_SSE41;
19499 for (uint32_t n = 5; n < 8; n++) {
19500 for (size_t k = 1; k <= 40; k += 9) {
19501 GemmMicrokernelTester()
19502 .mr(3)
19503 .nr(4)
19504 .kr(8)
19505 .sr(1)
19506 .m(3)
19507 .n(n)
19508 .k(k)
19509 .a_stride(43)
19510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19511 }
19512 }
19513 }
19514
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)19515 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
19516 TEST_REQUIRES_X86_SSE41;
19517 for (uint32_t n = 5; n < 8; n++) {
19518 for (size_t k = 1; k <= 40; k += 9) {
19519 for (uint32_t m = 1; m <= 3; m++) {
19520 GemmMicrokernelTester()
19521 .mr(3)
19522 .nr(4)
19523 .kr(8)
19524 .sr(1)
19525 .m(m)
19526 .n(n)
19527 .k(k)
19528 .iterations(1)
19529 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19530 }
19531 }
19532 }
19533 }
19534
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)19535 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
19536 TEST_REQUIRES_X86_SSE41;
19537 for (uint32_t n = 8; n <= 12; n += 4) {
19538 for (size_t k = 1; k <= 40; k += 9) {
19539 GemmMicrokernelTester()
19540 .mr(3)
19541 .nr(4)
19542 .kr(8)
19543 .sr(1)
19544 .m(3)
19545 .n(n)
19546 .k(k)
19547 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19548 }
19549 }
19550 }
19551
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)19552 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
19553 TEST_REQUIRES_X86_SSE41;
19554 for (uint32_t n = 8; n <= 12; n += 4) {
19555 for (size_t k = 1; k <= 40; k += 9) {
19556 GemmMicrokernelTester()
19557 .mr(3)
19558 .nr(4)
19559 .kr(8)
19560 .sr(1)
19561 .m(3)
19562 .n(n)
19563 .k(k)
19564 .cn_stride(7)
19565 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19566 }
19567 }
19568 }
19569
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_a)19570 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_a) {
19571 TEST_REQUIRES_X86_SSE41;
19572 for (uint32_t n = 8; n <= 12; n += 4) {
19573 for (size_t k = 1; k <= 40; k += 9) {
19574 GemmMicrokernelTester()
19575 .mr(3)
19576 .nr(4)
19577 .kr(8)
19578 .sr(1)
19579 .m(3)
19580 .n(n)
19581 .k(k)
19582 .a_stride(43)
19583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19584 }
19585 }
19586 }
19587
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)19588 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
19589 TEST_REQUIRES_X86_SSE41;
19590 for (uint32_t n = 8; n <= 12; n += 4) {
19591 for (size_t k = 1; k <= 40; k += 9) {
19592 for (uint32_t m = 1; m <= 3; m++) {
19593 GemmMicrokernelTester()
19594 .mr(3)
19595 .nr(4)
19596 .kr(8)
19597 .sr(1)
19598 .m(m)
19599 .n(n)
19600 .k(k)
19601 .iterations(1)
19602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19603 }
19604 }
19605 }
19606 }
19607
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)19608 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
19609 TEST_REQUIRES_X86_SSE41;
19610 for (size_t k = 1; k <= 40; k += 9) {
19611 for (uint32_t n = 1; n <= 4; n++) {
19612 for (uint32_t m = 1; m <= 3; m++) {
19613 GemmMicrokernelTester()
19614 .mr(3)
19615 .nr(4)
19616 .kr(8)
19617 .sr(1)
19618 .m(m)
19619 .n(n)
19620 .k(k)
19621 .cm_stride(7)
19622 .iterations(1)
19623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19624 }
19625 }
19626 }
19627 }
19628
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)19629 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
19630 TEST_REQUIRES_X86_SSE41;
19631 GemmMicrokernelTester()
19632 .mr(3)
19633 .nr(4)
19634 .kr(8)
19635 .sr(1)
19636 .m(3)
19637 .n(4)
19638 .k(8)
19639 .qmin(128)
19640 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19641 }
19642
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)19643 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
19644 TEST_REQUIRES_X86_SSE41;
19645 GemmMicrokernelTester()
19646 .mr(3)
19647 .nr(4)
19648 .kr(8)
19649 .sr(1)
19650 .m(3)
19651 .n(4)
19652 .k(8)
19653 .qmax(128)
19654 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19655 }
19656
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)19657 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
19658 TEST_REQUIRES_X86_SSE41;
19659 GemmMicrokernelTester()
19660 .mr(3)
19661 .nr(4)
19662 .kr(8)
19663 .sr(1)
19664 .m(3)
19665 .n(4)
19666 .k(8)
19667 .cm_stride(7)
19668 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19669 }
19670
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_a_zero_point)19671 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_a_zero_point) {
19672 TEST_REQUIRES_X86_SSE41;
19673 for (size_t k = 1; k <= 40; k += 9) {
19674 GemmMicrokernelTester()
19675 .mr(3)
19676 .nr(4)
19677 .kr(8)
19678 .sr(1)
19679 .m(3)
19680 .n(4)
19681 .k(k)
19682 .a_zero_point(0)
19683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19684 }
19685 }
19686
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_b_zero_point)19687 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_b_zero_point) {
19688 TEST_REQUIRES_X86_SSE41;
19689 for (size_t k = 1; k <= 40; k += 9) {
19690 GemmMicrokernelTester()
19691 .mr(3)
19692 .nr(4)
19693 .kr(8)
19694 .sr(1)
19695 .m(3)
19696 .n(4)
19697 .k(k)
19698 .b_zero_point(0)
19699 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19700 }
19701 }
19702
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_zero_point)19703 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_zero_point) {
19704 TEST_REQUIRES_X86_SSE41;
19705 for (size_t k = 1; k <= 40; k += 9) {
19706 GemmMicrokernelTester()
19707 .mr(3)
19708 .nr(4)
19709 .kr(8)
19710 .sr(1)
19711 .m(3)
19712 .n(4)
19713 .k(k)
19714 .a_zero_point(0)
19715 .b_zero_point(0)
19716 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19717 }
19718 }
19719 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19720
19721
19722 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)19723 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
19724 TEST_REQUIRES_X86_AVX;
19725 GemmMicrokernelTester()
19726 .mr(2)
19727 .nr(4)
19728 .kr(8)
19729 .sr(1)
19730 .m(2)
19731 .n(4)
19732 .k(8)
19733 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19734 }
19735
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)19736 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
19737 TEST_REQUIRES_X86_AVX;
19738 GemmMicrokernelTester()
19739 .mr(2)
19740 .nr(4)
19741 .kr(8)
19742 .sr(1)
19743 .m(2)
19744 .n(4)
19745 .k(8)
19746 .cn_stride(7)
19747 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19748 }
19749
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_strided_a)19750 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_strided_a) {
19751 TEST_REQUIRES_X86_AVX;
19752 GemmMicrokernelTester()
19753 .mr(2)
19754 .nr(4)
19755 .kr(8)
19756 .sr(1)
19757 .m(2)
19758 .n(4)
19759 .k(8)
19760 .a_stride(11)
19761 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19762 }
19763
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)19764 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
19765 TEST_REQUIRES_X86_AVX;
19766 for (uint32_t n = 1; n <= 4; n++) {
19767 for (uint32_t m = 1; m <= 2; m++) {
19768 GemmMicrokernelTester()
19769 .mr(2)
19770 .nr(4)
19771 .kr(8)
19772 .sr(1)
19773 .m(m)
19774 .n(n)
19775 .k(8)
19776 .iterations(1)
19777 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19778 }
19779 }
19780 }
19781
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)19782 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
19783 TEST_REQUIRES_X86_AVX;
19784 for (uint32_t m = 1; m <= 2; m++) {
19785 GemmMicrokernelTester()
19786 .mr(2)
19787 .nr(4)
19788 .kr(8)
19789 .sr(1)
19790 .m(m)
19791 .n(4)
19792 .k(8)
19793 .iterations(1)
19794 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19795 }
19796 }
19797
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)19798 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
19799 TEST_REQUIRES_X86_AVX;
19800 for (uint32_t n = 1; n <= 4; n++) {
19801 GemmMicrokernelTester()
19802 .mr(2)
19803 .nr(4)
19804 .kr(8)
19805 .sr(1)
19806 .m(2)
19807 .n(n)
19808 .k(8)
19809 .iterations(1)
19810 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19811 }
19812 }
19813
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)19814 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
19815 TEST_REQUIRES_X86_AVX;
19816 for (size_t k = 1; k < 8; k++) {
19817 GemmMicrokernelTester()
19818 .mr(2)
19819 .nr(4)
19820 .kr(8)
19821 .sr(1)
19822 .m(2)
19823 .n(4)
19824 .k(k)
19825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19826 }
19827 }
19828
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_strided_a)19829 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_strided_a) {
19830 TEST_REQUIRES_X86_AVX;
19831 for (size_t k = 1; k < 8; k++) {
19832 GemmMicrokernelTester()
19833 .mr(2)
19834 .nr(4)
19835 .kr(8)
19836 .sr(1)
19837 .m(2)
19838 .n(4)
19839 .k(k)
19840 .a_stride(11)
19841 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19842 }
19843 }
19844
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)19845 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
19846 TEST_REQUIRES_X86_AVX;
19847 for (size_t k = 1; k < 8; k++) {
19848 for (uint32_t n = 1; n <= 4; n++) {
19849 for (uint32_t m = 1; m <= 2; m++) {
19850 GemmMicrokernelTester()
19851 .mr(2)
19852 .nr(4)
19853 .kr(8)
19854 .sr(1)
19855 .m(m)
19856 .n(n)
19857 .k(k)
19858 .iterations(1)
19859 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19860 }
19861 }
19862 }
19863 }
19864
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)19865 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
19866 TEST_REQUIRES_X86_AVX;
19867 for (size_t k = 9; k < 16; k++) {
19868 GemmMicrokernelTester()
19869 .mr(2)
19870 .nr(4)
19871 .kr(8)
19872 .sr(1)
19873 .m(2)
19874 .n(4)
19875 .k(k)
19876 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19877 }
19878 }
19879
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_strided_a)19880 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_strided_a) {
19881 TEST_REQUIRES_X86_AVX;
19882 for (size_t k = 9; k < 16; k++) {
19883 GemmMicrokernelTester()
19884 .mr(2)
19885 .nr(4)
19886 .kr(8)
19887 .sr(1)
19888 .m(2)
19889 .n(4)
19890 .k(k)
19891 .a_stride(19)
19892 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19893 }
19894 }
19895
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)19896 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
19897 TEST_REQUIRES_X86_AVX;
19898 for (size_t k = 9; k < 16; k++) {
19899 for (uint32_t n = 1; n <= 4; n++) {
19900 for (uint32_t m = 1; m <= 2; m++) {
19901 GemmMicrokernelTester()
19902 .mr(2)
19903 .nr(4)
19904 .kr(8)
19905 .sr(1)
19906 .m(m)
19907 .n(n)
19908 .k(k)
19909 .iterations(1)
19910 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19911 }
19912 }
19913 }
19914 }
19915
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)19916 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
19917 TEST_REQUIRES_X86_AVX;
19918 for (size_t k = 16; k <= 80; k += 8) {
19919 GemmMicrokernelTester()
19920 .mr(2)
19921 .nr(4)
19922 .kr(8)
19923 .sr(1)
19924 .m(2)
19925 .n(4)
19926 .k(k)
19927 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19928 }
19929 }
19930
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_strided_a)19931 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_strided_a) {
19932 TEST_REQUIRES_X86_AVX;
19933 for (size_t k = 16; k <= 80; k += 8) {
19934 GemmMicrokernelTester()
19935 .mr(2)
19936 .nr(4)
19937 .kr(8)
19938 .sr(1)
19939 .m(2)
19940 .n(4)
19941 .k(k)
19942 .a_stride(83)
19943 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19944 }
19945 }
19946
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)19947 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
19948 TEST_REQUIRES_X86_AVX;
19949 for (size_t k = 16; k <= 80; k += 8) {
19950 for (uint32_t n = 1; n <= 4; n++) {
19951 for (uint32_t m = 1; m <= 2; m++) {
19952 GemmMicrokernelTester()
19953 .mr(2)
19954 .nr(4)
19955 .kr(8)
19956 .sr(1)
19957 .m(m)
19958 .n(n)
19959 .k(k)
19960 .iterations(1)
19961 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19962 }
19963 }
19964 }
19965 }
19966
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)19967 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
19968 TEST_REQUIRES_X86_AVX;
19969 for (uint32_t n = 5; n < 8; n++) {
19970 for (size_t k = 1; k <= 40; k += 9) {
19971 GemmMicrokernelTester()
19972 .mr(2)
19973 .nr(4)
19974 .kr(8)
19975 .sr(1)
19976 .m(2)
19977 .n(n)
19978 .k(k)
19979 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19980 }
19981 }
19982 }
19983
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)19984 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
19985 TEST_REQUIRES_X86_AVX;
19986 for (uint32_t n = 5; n < 8; n++) {
19987 for (size_t k = 1; k <= 40; k += 9) {
19988 GemmMicrokernelTester()
19989 .mr(2)
19990 .nr(4)
19991 .kr(8)
19992 .sr(1)
19993 .m(2)
19994 .n(n)
19995 .k(k)
19996 .cn_stride(7)
19997 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19998 }
19999 }
20000 }
20001
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_a)20002 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_a) {
20003 TEST_REQUIRES_X86_AVX;
20004 for (uint32_t n = 5; n < 8; n++) {
20005 for (size_t k = 1; k <= 40; k += 9) {
20006 GemmMicrokernelTester()
20007 .mr(2)
20008 .nr(4)
20009 .kr(8)
20010 .sr(1)
20011 .m(2)
20012 .n(n)
20013 .k(k)
20014 .a_stride(43)
20015 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20016 }
20017 }
20018 }
20019
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)20020 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
20021 TEST_REQUIRES_X86_AVX;
20022 for (uint32_t n = 5; n < 8; n++) {
20023 for (size_t k = 1; k <= 40; k += 9) {
20024 for (uint32_t m = 1; m <= 2; m++) {
20025 GemmMicrokernelTester()
20026 .mr(2)
20027 .nr(4)
20028 .kr(8)
20029 .sr(1)
20030 .m(m)
20031 .n(n)
20032 .k(k)
20033 .iterations(1)
20034 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20035 }
20036 }
20037 }
20038 }
20039
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)20040 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
20041 TEST_REQUIRES_X86_AVX;
20042 for (uint32_t n = 8; n <= 12; n += 4) {
20043 for (size_t k = 1; k <= 40; k += 9) {
20044 GemmMicrokernelTester()
20045 .mr(2)
20046 .nr(4)
20047 .kr(8)
20048 .sr(1)
20049 .m(2)
20050 .n(n)
20051 .k(k)
20052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20053 }
20054 }
20055 }
20056
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)20057 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
20058 TEST_REQUIRES_X86_AVX;
20059 for (uint32_t n = 8; n <= 12; n += 4) {
20060 for (size_t k = 1; k <= 40; k += 9) {
20061 GemmMicrokernelTester()
20062 .mr(2)
20063 .nr(4)
20064 .kr(8)
20065 .sr(1)
20066 .m(2)
20067 .n(n)
20068 .k(k)
20069 .cn_stride(7)
20070 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20071 }
20072 }
20073 }
20074
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_a)20075 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_a) {
20076 TEST_REQUIRES_X86_AVX;
20077 for (uint32_t n = 8; n <= 12; n += 4) {
20078 for (size_t k = 1; k <= 40; k += 9) {
20079 GemmMicrokernelTester()
20080 .mr(2)
20081 .nr(4)
20082 .kr(8)
20083 .sr(1)
20084 .m(2)
20085 .n(n)
20086 .k(k)
20087 .a_stride(43)
20088 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20089 }
20090 }
20091 }
20092
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)20093 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
20094 TEST_REQUIRES_X86_AVX;
20095 for (uint32_t n = 8; n <= 12; n += 4) {
20096 for (size_t k = 1; k <= 40; k += 9) {
20097 for (uint32_t m = 1; m <= 2; m++) {
20098 GemmMicrokernelTester()
20099 .mr(2)
20100 .nr(4)
20101 .kr(8)
20102 .sr(1)
20103 .m(m)
20104 .n(n)
20105 .k(k)
20106 .iterations(1)
20107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20108 }
20109 }
20110 }
20111 }
20112
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)20113 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
20114 TEST_REQUIRES_X86_AVX;
20115 for (size_t k = 1; k <= 40; k += 9) {
20116 for (uint32_t n = 1; n <= 4; n++) {
20117 for (uint32_t m = 1; m <= 2; m++) {
20118 GemmMicrokernelTester()
20119 .mr(2)
20120 .nr(4)
20121 .kr(8)
20122 .sr(1)
20123 .m(m)
20124 .n(n)
20125 .k(k)
20126 .cm_stride(7)
20127 .iterations(1)
20128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20129 }
20130 }
20131 }
20132 }
20133
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)20134 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
20135 TEST_REQUIRES_X86_AVX;
20136 GemmMicrokernelTester()
20137 .mr(2)
20138 .nr(4)
20139 .kr(8)
20140 .sr(1)
20141 .m(2)
20142 .n(4)
20143 .k(8)
20144 .qmin(128)
20145 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20146 }
20147
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)20148 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
20149 TEST_REQUIRES_X86_AVX;
20150 GemmMicrokernelTester()
20151 .mr(2)
20152 .nr(4)
20153 .kr(8)
20154 .sr(1)
20155 .m(2)
20156 .n(4)
20157 .k(8)
20158 .qmax(128)
20159 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20160 }
20161
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)20162 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
20163 TEST_REQUIRES_X86_AVX;
20164 GemmMicrokernelTester()
20165 .mr(2)
20166 .nr(4)
20167 .kr(8)
20168 .sr(1)
20169 .m(2)
20170 .n(4)
20171 .k(8)
20172 .cm_stride(7)
20173 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20174 }
20175
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_a_zero_point)20176 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
20177 TEST_REQUIRES_X86_AVX;
20178 for (size_t k = 1; k <= 40; k += 9) {
20179 GemmMicrokernelTester()
20180 .mr(2)
20181 .nr(4)
20182 .kr(8)
20183 .sr(1)
20184 .m(2)
20185 .n(4)
20186 .k(k)
20187 .a_zero_point(0)
20188 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20189 }
20190 }
20191
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_b_zero_point)20192 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
20193 TEST_REQUIRES_X86_AVX;
20194 for (size_t k = 1; k <= 40; k += 9) {
20195 GemmMicrokernelTester()
20196 .mr(2)
20197 .nr(4)
20198 .kr(8)
20199 .sr(1)
20200 .m(2)
20201 .n(4)
20202 .k(k)
20203 .b_zero_point(0)
20204 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20205 }
20206 }
20207
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_zero_point)20208 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
20209 TEST_REQUIRES_X86_AVX;
20210 for (size_t k = 1; k <= 40; k += 9) {
20211 GemmMicrokernelTester()
20212 .mr(2)
20213 .nr(4)
20214 .kr(8)
20215 .sr(1)
20216 .m(2)
20217 .n(4)
20218 .k(k)
20219 .a_zero_point(0)
20220 .b_zero_point(0)
20221 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20222 }
20223 }
20224 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20225
20226
20227 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)20228 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
20229 TEST_REQUIRES_X86_XOP;
20230 GemmMicrokernelTester()
20231 .mr(2)
20232 .nr(4)
20233 .kr(8)
20234 .sr(1)
20235 .m(2)
20236 .n(4)
20237 .k(8)
20238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20239 }
20240
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)20241 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
20242 TEST_REQUIRES_X86_XOP;
20243 GemmMicrokernelTester()
20244 .mr(2)
20245 .nr(4)
20246 .kr(8)
20247 .sr(1)
20248 .m(2)
20249 .n(4)
20250 .k(8)
20251 .cn_stride(7)
20252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20253 }
20254
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_strided_a)20255 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_strided_a) {
20256 TEST_REQUIRES_X86_XOP;
20257 GemmMicrokernelTester()
20258 .mr(2)
20259 .nr(4)
20260 .kr(8)
20261 .sr(1)
20262 .m(2)
20263 .n(4)
20264 .k(8)
20265 .a_stride(11)
20266 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20267 }
20268
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)20269 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
20270 TEST_REQUIRES_X86_XOP;
20271 for (uint32_t n = 1; n <= 4; n++) {
20272 for (uint32_t m = 1; m <= 2; m++) {
20273 GemmMicrokernelTester()
20274 .mr(2)
20275 .nr(4)
20276 .kr(8)
20277 .sr(1)
20278 .m(m)
20279 .n(n)
20280 .k(8)
20281 .iterations(1)
20282 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20283 }
20284 }
20285 }
20286
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)20287 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
20288 TEST_REQUIRES_X86_XOP;
20289 for (uint32_t m = 1; m <= 2; m++) {
20290 GemmMicrokernelTester()
20291 .mr(2)
20292 .nr(4)
20293 .kr(8)
20294 .sr(1)
20295 .m(m)
20296 .n(4)
20297 .k(8)
20298 .iterations(1)
20299 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20300 }
20301 }
20302
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)20303 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
20304 TEST_REQUIRES_X86_XOP;
20305 for (uint32_t n = 1; n <= 4; n++) {
20306 GemmMicrokernelTester()
20307 .mr(2)
20308 .nr(4)
20309 .kr(8)
20310 .sr(1)
20311 .m(2)
20312 .n(n)
20313 .k(8)
20314 .iterations(1)
20315 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20316 }
20317 }
20318
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)20319 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
20320 TEST_REQUIRES_X86_XOP;
20321 for (size_t k = 1; k < 8; k++) {
20322 GemmMicrokernelTester()
20323 .mr(2)
20324 .nr(4)
20325 .kr(8)
20326 .sr(1)
20327 .m(2)
20328 .n(4)
20329 .k(k)
20330 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20331 }
20332 }
20333
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_strided_a)20334 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_strided_a) {
20335 TEST_REQUIRES_X86_XOP;
20336 for (size_t k = 1; k < 8; k++) {
20337 GemmMicrokernelTester()
20338 .mr(2)
20339 .nr(4)
20340 .kr(8)
20341 .sr(1)
20342 .m(2)
20343 .n(4)
20344 .k(k)
20345 .a_stride(11)
20346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20347 }
20348 }
20349
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)20350 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
20351 TEST_REQUIRES_X86_XOP;
20352 for (size_t k = 1; k < 8; k++) {
20353 for (uint32_t n = 1; n <= 4; n++) {
20354 for (uint32_t m = 1; m <= 2; m++) {
20355 GemmMicrokernelTester()
20356 .mr(2)
20357 .nr(4)
20358 .kr(8)
20359 .sr(1)
20360 .m(m)
20361 .n(n)
20362 .k(k)
20363 .iterations(1)
20364 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20365 }
20366 }
20367 }
20368 }
20369
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)20370 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
20371 TEST_REQUIRES_X86_XOP;
20372 for (size_t k = 9; k < 16; k++) {
20373 GemmMicrokernelTester()
20374 .mr(2)
20375 .nr(4)
20376 .kr(8)
20377 .sr(1)
20378 .m(2)
20379 .n(4)
20380 .k(k)
20381 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20382 }
20383 }
20384
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_strided_a)20385 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_strided_a) {
20386 TEST_REQUIRES_X86_XOP;
20387 for (size_t k = 9; k < 16; k++) {
20388 GemmMicrokernelTester()
20389 .mr(2)
20390 .nr(4)
20391 .kr(8)
20392 .sr(1)
20393 .m(2)
20394 .n(4)
20395 .k(k)
20396 .a_stride(19)
20397 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20398 }
20399 }
20400
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)20401 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
20402 TEST_REQUIRES_X86_XOP;
20403 for (size_t k = 9; k < 16; k++) {
20404 for (uint32_t n = 1; n <= 4; n++) {
20405 for (uint32_t m = 1; m <= 2; m++) {
20406 GemmMicrokernelTester()
20407 .mr(2)
20408 .nr(4)
20409 .kr(8)
20410 .sr(1)
20411 .m(m)
20412 .n(n)
20413 .k(k)
20414 .iterations(1)
20415 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20416 }
20417 }
20418 }
20419 }
20420
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)20421 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
20422 TEST_REQUIRES_X86_XOP;
20423 for (size_t k = 16; k <= 80; k += 8) {
20424 GemmMicrokernelTester()
20425 .mr(2)
20426 .nr(4)
20427 .kr(8)
20428 .sr(1)
20429 .m(2)
20430 .n(4)
20431 .k(k)
20432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20433 }
20434 }
20435
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_strided_a)20436 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_strided_a) {
20437 TEST_REQUIRES_X86_XOP;
20438 for (size_t k = 16; k <= 80; k += 8) {
20439 GemmMicrokernelTester()
20440 .mr(2)
20441 .nr(4)
20442 .kr(8)
20443 .sr(1)
20444 .m(2)
20445 .n(4)
20446 .k(k)
20447 .a_stride(83)
20448 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20449 }
20450 }
20451
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)20452 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
20453 TEST_REQUIRES_X86_XOP;
20454 for (size_t k = 16; k <= 80; k += 8) {
20455 for (uint32_t n = 1; n <= 4; n++) {
20456 for (uint32_t m = 1; m <= 2; m++) {
20457 GemmMicrokernelTester()
20458 .mr(2)
20459 .nr(4)
20460 .kr(8)
20461 .sr(1)
20462 .m(m)
20463 .n(n)
20464 .k(k)
20465 .iterations(1)
20466 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20467 }
20468 }
20469 }
20470 }
20471
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)20472 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
20473 TEST_REQUIRES_X86_XOP;
20474 for (uint32_t n = 5; n < 8; n++) {
20475 for (size_t k = 1; k <= 40; k += 9) {
20476 GemmMicrokernelTester()
20477 .mr(2)
20478 .nr(4)
20479 .kr(8)
20480 .sr(1)
20481 .m(2)
20482 .n(n)
20483 .k(k)
20484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20485 }
20486 }
20487 }
20488
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)20489 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
20490 TEST_REQUIRES_X86_XOP;
20491 for (uint32_t n = 5; n < 8; n++) {
20492 for (size_t k = 1; k <= 40; k += 9) {
20493 GemmMicrokernelTester()
20494 .mr(2)
20495 .nr(4)
20496 .kr(8)
20497 .sr(1)
20498 .m(2)
20499 .n(n)
20500 .k(k)
20501 .cn_stride(7)
20502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20503 }
20504 }
20505 }
20506
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_a)20507 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_a) {
20508 TEST_REQUIRES_X86_XOP;
20509 for (uint32_t n = 5; n < 8; n++) {
20510 for (size_t k = 1; k <= 40; k += 9) {
20511 GemmMicrokernelTester()
20512 .mr(2)
20513 .nr(4)
20514 .kr(8)
20515 .sr(1)
20516 .m(2)
20517 .n(n)
20518 .k(k)
20519 .a_stride(43)
20520 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20521 }
20522 }
20523 }
20524
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)20525 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
20526 TEST_REQUIRES_X86_XOP;
20527 for (uint32_t n = 5; n < 8; n++) {
20528 for (size_t k = 1; k <= 40; k += 9) {
20529 for (uint32_t m = 1; m <= 2; m++) {
20530 GemmMicrokernelTester()
20531 .mr(2)
20532 .nr(4)
20533 .kr(8)
20534 .sr(1)
20535 .m(m)
20536 .n(n)
20537 .k(k)
20538 .iterations(1)
20539 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20540 }
20541 }
20542 }
20543 }
20544
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)20545 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
20546 TEST_REQUIRES_X86_XOP;
20547 for (uint32_t n = 8; n <= 12; n += 4) {
20548 for (size_t k = 1; k <= 40; k += 9) {
20549 GemmMicrokernelTester()
20550 .mr(2)
20551 .nr(4)
20552 .kr(8)
20553 .sr(1)
20554 .m(2)
20555 .n(n)
20556 .k(k)
20557 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20558 }
20559 }
20560 }
20561
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)20562 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
20563 TEST_REQUIRES_X86_XOP;
20564 for (uint32_t n = 8; n <= 12; n += 4) {
20565 for (size_t k = 1; k <= 40; k += 9) {
20566 GemmMicrokernelTester()
20567 .mr(2)
20568 .nr(4)
20569 .kr(8)
20570 .sr(1)
20571 .m(2)
20572 .n(n)
20573 .k(k)
20574 .cn_stride(7)
20575 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20576 }
20577 }
20578 }
20579
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_a)20580 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_a) {
20581 TEST_REQUIRES_X86_XOP;
20582 for (uint32_t n = 8; n <= 12; n += 4) {
20583 for (size_t k = 1; k <= 40; k += 9) {
20584 GemmMicrokernelTester()
20585 .mr(2)
20586 .nr(4)
20587 .kr(8)
20588 .sr(1)
20589 .m(2)
20590 .n(n)
20591 .k(k)
20592 .a_stride(43)
20593 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20594 }
20595 }
20596 }
20597
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)20598 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
20599 TEST_REQUIRES_X86_XOP;
20600 for (uint32_t n = 8; n <= 12; n += 4) {
20601 for (size_t k = 1; k <= 40; k += 9) {
20602 for (uint32_t m = 1; m <= 2; m++) {
20603 GemmMicrokernelTester()
20604 .mr(2)
20605 .nr(4)
20606 .kr(8)
20607 .sr(1)
20608 .m(m)
20609 .n(n)
20610 .k(k)
20611 .iterations(1)
20612 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20613 }
20614 }
20615 }
20616 }
20617
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)20618 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
20619 TEST_REQUIRES_X86_XOP;
20620 for (size_t k = 1; k <= 40; k += 9) {
20621 for (uint32_t n = 1; n <= 4; n++) {
20622 for (uint32_t m = 1; m <= 2; m++) {
20623 GemmMicrokernelTester()
20624 .mr(2)
20625 .nr(4)
20626 .kr(8)
20627 .sr(1)
20628 .m(m)
20629 .n(n)
20630 .k(k)
20631 .cm_stride(7)
20632 .iterations(1)
20633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20634 }
20635 }
20636 }
20637 }
20638
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)20639 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
20640 TEST_REQUIRES_X86_XOP;
20641 GemmMicrokernelTester()
20642 .mr(2)
20643 .nr(4)
20644 .kr(8)
20645 .sr(1)
20646 .m(2)
20647 .n(4)
20648 .k(8)
20649 .qmin(128)
20650 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20651 }
20652
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)20653 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
20654 TEST_REQUIRES_X86_XOP;
20655 GemmMicrokernelTester()
20656 .mr(2)
20657 .nr(4)
20658 .kr(8)
20659 .sr(1)
20660 .m(2)
20661 .n(4)
20662 .k(8)
20663 .qmax(128)
20664 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20665 }
20666
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)20667 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
20668 TEST_REQUIRES_X86_XOP;
20669 GemmMicrokernelTester()
20670 .mr(2)
20671 .nr(4)
20672 .kr(8)
20673 .sr(1)
20674 .m(2)
20675 .n(4)
20676 .k(8)
20677 .cm_stride(7)
20678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20679 }
20680
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_a_zero_point)20681 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
20682 TEST_REQUIRES_X86_XOP;
20683 for (size_t k = 1; k <= 40; k += 9) {
20684 GemmMicrokernelTester()
20685 .mr(2)
20686 .nr(4)
20687 .kr(8)
20688 .sr(1)
20689 .m(2)
20690 .n(4)
20691 .k(k)
20692 .a_zero_point(0)
20693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20694 }
20695 }
20696
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_b_zero_point)20697 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
20698 TEST_REQUIRES_X86_XOP;
20699 for (size_t k = 1; k <= 40; k += 9) {
20700 GemmMicrokernelTester()
20701 .mr(2)
20702 .nr(4)
20703 .kr(8)
20704 .sr(1)
20705 .m(2)
20706 .n(4)
20707 .k(k)
20708 .b_zero_point(0)
20709 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20710 }
20711 }
20712
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_zero_point)20713 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
20714 TEST_REQUIRES_X86_XOP;
20715 for (size_t k = 1; k <= 40; k += 9) {
20716 GemmMicrokernelTester()
20717 .mr(2)
20718 .nr(4)
20719 .kr(8)
20720 .sr(1)
20721 .m(2)
20722 .n(4)
20723 .k(k)
20724 .a_zero_point(0)
20725 .b_zero_point(0)
20726 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20727 }
20728 }
20729 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20730
20731
20732 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)20733 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
20734 TEST_REQUIRES_X86_AVX;
20735 GemmMicrokernelTester()
20736 .mr(3)
20737 .nr(4)
20738 .kr(8)
20739 .sr(1)
20740 .m(3)
20741 .n(4)
20742 .k(8)
20743 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20744 }
20745
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)20746 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
20747 TEST_REQUIRES_X86_AVX;
20748 GemmMicrokernelTester()
20749 .mr(3)
20750 .nr(4)
20751 .kr(8)
20752 .sr(1)
20753 .m(3)
20754 .n(4)
20755 .k(8)
20756 .cn_stride(7)
20757 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20758 }
20759
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_strided_a)20760 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_strided_a) {
20761 TEST_REQUIRES_X86_AVX;
20762 GemmMicrokernelTester()
20763 .mr(3)
20764 .nr(4)
20765 .kr(8)
20766 .sr(1)
20767 .m(3)
20768 .n(4)
20769 .k(8)
20770 .a_stride(11)
20771 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20772 }
20773
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)20774 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
20775 TEST_REQUIRES_X86_AVX;
20776 for (uint32_t n = 1; n <= 4; n++) {
20777 for (uint32_t m = 1; m <= 3; m++) {
20778 GemmMicrokernelTester()
20779 .mr(3)
20780 .nr(4)
20781 .kr(8)
20782 .sr(1)
20783 .m(m)
20784 .n(n)
20785 .k(8)
20786 .iterations(1)
20787 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20788 }
20789 }
20790 }
20791
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)20792 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
20793 TEST_REQUIRES_X86_AVX;
20794 for (uint32_t m = 1; m <= 3; m++) {
20795 GemmMicrokernelTester()
20796 .mr(3)
20797 .nr(4)
20798 .kr(8)
20799 .sr(1)
20800 .m(m)
20801 .n(4)
20802 .k(8)
20803 .iterations(1)
20804 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20805 }
20806 }
20807
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)20808 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
20809 TEST_REQUIRES_X86_AVX;
20810 for (uint32_t n = 1; n <= 4; n++) {
20811 GemmMicrokernelTester()
20812 .mr(3)
20813 .nr(4)
20814 .kr(8)
20815 .sr(1)
20816 .m(3)
20817 .n(n)
20818 .k(8)
20819 .iterations(1)
20820 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20821 }
20822 }
20823
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)20824 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
20825 TEST_REQUIRES_X86_AVX;
20826 for (size_t k = 1; k < 8; k++) {
20827 GemmMicrokernelTester()
20828 .mr(3)
20829 .nr(4)
20830 .kr(8)
20831 .sr(1)
20832 .m(3)
20833 .n(4)
20834 .k(k)
20835 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20836 }
20837 }
20838
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_strided_a)20839 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_strided_a) {
20840 TEST_REQUIRES_X86_AVX;
20841 for (size_t k = 1; k < 8; k++) {
20842 GemmMicrokernelTester()
20843 .mr(3)
20844 .nr(4)
20845 .kr(8)
20846 .sr(1)
20847 .m(3)
20848 .n(4)
20849 .k(k)
20850 .a_stride(11)
20851 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20852 }
20853 }
20854
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)20855 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
20856 TEST_REQUIRES_X86_AVX;
20857 for (size_t k = 1; k < 8; k++) {
20858 for (uint32_t n = 1; n <= 4; n++) {
20859 for (uint32_t m = 1; m <= 3; m++) {
20860 GemmMicrokernelTester()
20861 .mr(3)
20862 .nr(4)
20863 .kr(8)
20864 .sr(1)
20865 .m(m)
20866 .n(n)
20867 .k(k)
20868 .iterations(1)
20869 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20870 }
20871 }
20872 }
20873 }
20874
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)20875 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
20876 TEST_REQUIRES_X86_AVX;
20877 for (size_t k = 9; k < 16; k++) {
20878 GemmMicrokernelTester()
20879 .mr(3)
20880 .nr(4)
20881 .kr(8)
20882 .sr(1)
20883 .m(3)
20884 .n(4)
20885 .k(k)
20886 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20887 }
20888 }
20889
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_strided_a)20890 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_strided_a) {
20891 TEST_REQUIRES_X86_AVX;
20892 for (size_t k = 9; k < 16; k++) {
20893 GemmMicrokernelTester()
20894 .mr(3)
20895 .nr(4)
20896 .kr(8)
20897 .sr(1)
20898 .m(3)
20899 .n(4)
20900 .k(k)
20901 .a_stride(19)
20902 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20903 }
20904 }
20905
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)20906 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
20907 TEST_REQUIRES_X86_AVX;
20908 for (size_t k = 9; k < 16; k++) {
20909 for (uint32_t n = 1; n <= 4; n++) {
20910 for (uint32_t m = 1; m <= 3; m++) {
20911 GemmMicrokernelTester()
20912 .mr(3)
20913 .nr(4)
20914 .kr(8)
20915 .sr(1)
20916 .m(m)
20917 .n(n)
20918 .k(k)
20919 .iterations(1)
20920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20921 }
20922 }
20923 }
20924 }
20925
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)20926 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
20927 TEST_REQUIRES_X86_AVX;
20928 for (size_t k = 16; k <= 80; k += 8) {
20929 GemmMicrokernelTester()
20930 .mr(3)
20931 .nr(4)
20932 .kr(8)
20933 .sr(1)
20934 .m(3)
20935 .n(4)
20936 .k(k)
20937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20938 }
20939 }
20940
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_strided_a)20941 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_strided_a) {
20942 TEST_REQUIRES_X86_AVX;
20943 for (size_t k = 16; k <= 80; k += 8) {
20944 GemmMicrokernelTester()
20945 .mr(3)
20946 .nr(4)
20947 .kr(8)
20948 .sr(1)
20949 .m(3)
20950 .n(4)
20951 .k(k)
20952 .a_stride(83)
20953 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20954 }
20955 }
20956
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)20957 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
20958 TEST_REQUIRES_X86_AVX;
20959 for (size_t k = 16; k <= 80; k += 8) {
20960 for (uint32_t n = 1; n <= 4; n++) {
20961 for (uint32_t m = 1; m <= 3; m++) {
20962 GemmMicrokernelTester()
20963 .mr(3)
20964 .nr(4)
20965 .kr(8)
20966 .sr(1)
20967 .m(m)
20968 .n(n)
20969 .k(k)
20970 .iterations(1)
20971 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20972 }
20973 }
20974 }
20975 }
20976
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)20977 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
20978 TEST_REQUIRES_X86_AVX;
20979 for (uint32_t n = 5; n < 8; n++) {
20980 for (size_t k = 1; k <= 40; k += 9) {
20981 GemmMicrokernelTester()
20982 .mr(3)
20983 .nr(4)
20984 .kr(8)
20985 .sr(1)
20986 .m(3)
20987 .n(n)
20988 .k(k)
20989 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20990 }
20991 }
20992 }
20993
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)20994 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
20995 TEST_REQUIRES_X86_AVX;
20996 for (uint32_t n = 5; n < 8; n++) {
20997 for (size_t k = 1; k <= 40; k += 9) {
20998 GemmMicrokernelTester()
20999 .mr(3)
21000 .nr(4)
21001 .kr(8)
21002 .sr(1)
21003 .m(3)
21004 .n(n)
21005 .k(k)
21006 .cn_stride(7)
21007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21008 }
21009 }
21010 }
21011
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_a)21012 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_a) {
21013 TEST_REQUIRES_X86_AVX;
21014 for (uint32_t n = 5; n < 8; n++) {
21015 for (size_t k = 1; k <= 40; k += 9) {
21016 GemmMicrokernelTester()
21017 .mr(3)
21018 .nr(4)
21019 .kr(8)
21020 .sr(1)
21021 .m(3)
21022 .n(n)
21023 .k(k)
21024 .a_stride(43)
21025 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21026 }
21027 }
21028 }
21029
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)21030 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
21031 TEST_REQUIRES_X86_AVX;
21032 for (uint32_t n = 5; n < 8; n++) {
21033 for (size_t k = 1; k <= 40; k += 9) {
21034 for (uint32_t m = 1; m <= 3; m++) {
21035 GemmMicrokernelTester()
21036 .mr(3)
21037 .nr(4)
21038 .kr(8)
21039 .sr(1)
21040 .m(m)
21041 .n(n)
21042 .k(k)
21043 .iterations(1)
21044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21045 }
21046 }
21047 }
21048 }
21049
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)21050 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
21051 TEST_REQUIRES_X86_AVX;
21052 for (uint32_t n = 8; n <= 12; n += 4) {
21053 for (size_t k = 1; k <= 40; k += 9) {
21054 GemmMicrokernelTester()
21055 .mr(3)
21056 .nr(4)
21057 .kr(8)
21058 .sr(1)
21059 .m(3)
21060 .n(n)
21061 .k(k)
21062 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21063 }
21064 }
21065 }
21066
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)21067 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
21068 TEST_REQUIRES_X86_AVX;
21069 for (uint32_t n = 8; n <= 12; n += 4) {
21070 for (size_t k = 1; k <= 40; k += 9) {
21071 GemmMicrokernelTester()
21072 .mr(3)
21073 .nr(4)
21074 .kr(8)
21075 .sr(1)
21076 .m(3)
21077 .n(n)
21078 .k(k)
21079 .cn_stride(7)
21080 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21081 }
21082 }
21083 }
21084
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_a)21085 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_a) {
21086 TEST_REQUIRES_X86_AVX;
21087 for (uint32_t n = 8; n <= 12; n += 4) {
21088 for (size_t k = 1; k <= 40; k += 9) {
21089 GemmMicrokernelTester()
21090 .mr(3)
21091 .nr(4)
21092 .kr(8)
21093 .sr(1)
21094 .m(3)
21095 .n(n)
21096 .k(k)
21097 .a_stride(43)
21098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21099 }
21100 }
21101 }
21102
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)21103 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
21104 TEST_REQUIRES_X86_AVX;
21105 for (uint32_t n = 8; n <= 12; n += 4) {
21106 for (size_t k = 1; k <= 40; k += 9) {
21107 for (uint32_t m = 1; m <= 3; m++) {
21108 GemmMicrokernelTester()
21109 .mr(3)
21110 .nr(4)
21111 .kr(8)
21112 .sr(1)
21113 .m(m)
21114 .n(n)
21115 .k(k)
21116 .iterations(1)
21117 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21118 }
21119 }
21120 }
21121 }
21122
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)21123 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
21124 TEST_REQUIRES_X86_AVX;
21125 for (size_t k = 1; k <= 40; k += 9) {
21126 for (uint32_t n = 1; n <= 4; n++) {
21127 for (uint32_t m = 1; m <= 3; m++) {
21128 GemmMicrokernelTester()
21129 .mr(3)
21130 .nr(4)
21131 .kr(8)
21132 .sr(1)
21133 .m(m)
21134 .n(n)
21135 .k(k)
21136 .cm_stride(7)
21137 .iterations(1)
21138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21139 }
21140 }
21141 }
21142 }
21143
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)21144 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
21145 TEST_REQUIRES_X86_AVX;
21146 GemmMicrokernelTester()
21147 .mr(3)
21148 .nr(4)
21149 .kr(8)
21150 .sr(1)
21151 .m(3)
21152 .n(4)
21153 .k(8)
21154 .qmin(128)
21155 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21156 }
21157
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)21158 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
21159 TEST_REQUIRES_X86_AVX;
21160 GemmMicrokernelTester()
21161 .mr(3)
21162 .nr(4)
21163 .kr(8)
21164 .sr(1)
21165 .m(3)
21166 .n(4)
21167 .k(8)
21168 .qmax(128)
21169 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21170 }
21171
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)21172 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
21173 TEST_REQUIRES_X86_AVX;
21174 GemmMicrokernelTester()
21175 .mr(3)
21176 .nr(4)
21177 .kr(8)
21178 .sr(1)
21179 .m(3)
21180 .n(4)
21181 .k(8)
21182 .cm_stride(7)
21183 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21184 }
21185
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_a_zero_point)21186 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
21187 TEST_REQUIRES_X86_AVX;
21188 for (size_t k = 1; k <= 40; k += 9) {
21189 GemmMicrokernelTester()
21190 .mr(3)
21191 .nr(4)
21192 .kr(8)
21193 .sr(1)
21194 .m(3)
21195 .n(4)
21196 .k(k)
21197 .a_zero_point(0)
21198 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21199 }
21200 }
21201
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_b_zero_point)21202 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
21203 TEST_REQUIRES_X86_AVX;
21204 for (size_t k = 1; k <= 40; k += 9) {
21205 GemmMicrokernelTester()
21206 .mr(3)
21207 .nr(4)
21208 .kr(8)
21209 .sr(1)
21210 .m(3)
21211 .n(4)
21212 .k(k)
21213 .b_zero_point(0)
21214 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21215 }
21216 }
21217
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_zero_point)21218 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
21219 TEST_REQUIRES_X86_AVX;
21220 for (size_t k = 1; k <= 40; k += 9) {
21221 GemmMicrokernelTester()
21222 .mr(3)
21223 .nr(4)
21224 .kr(8)
21225 .sr(1)
21226 .m(3)
21227 .n(4)
21228 .k(k)
21229 .a_zero_point(0)
21230 .b_zero_point(0)
21231 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21232 }
21233 }
21234 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21235
21236
21237 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)21238 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
21239 TEST_REQUIRES_X86_XOP;
21240 GemmMicrokernelTester()
21241 .mr(3)
21242 .nr(4)
21243 .kr(8)
21244 .sr(1)
21245 .m(3)
21246 .n(4)
21247 .k(8)
21248 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21249 }
21250
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)21251 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
21252 TEST_REQUIRES_X86_XOP;
21253 GemmMicrokernelTester()
21254 .mr(3)
21255 .nr(4)
21256 .kr(8)
21257 .sr(1)
21258 .m(3)
21259 .n(4)
21260 .k(8)
21261 .cn_stride(7)
21262 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21263 }
21264
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_strided_a)21265 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_strided_a) {
21266 TEST_REQUIRES_X86_XOP;
21267 GemmMicrokernelTester()
21268 .mr(3)
21269 .nr(4)
21270 .kr(8)
21271 .sr(1)
21272 .m(3)
21273 .n(4)
21274 .k(8)
21275 .a_stride(11)
21276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21277 }
21278
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)21279 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
21280 TEST_REQUIRES_X86_XOP;
21281 for (uint32_t n = 1; n <= 4; n++) {
21282 for (uint32_t m = 1; m <= 3; m++) {
21283 GemmMicrokernelTester()
21284 .mr(3)
21285 .nr(4)
21286 .kr(8)
21287 .sr(1)
21288 .m(m)
21289 .n(n)
21290 .k(8)
21291 .iterations(1)
21292 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21293 }
21294 }
21295 }
21296
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)21297 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
21298 TEST_REQUIRES_X86_XOP;
21299 for (uint32_t m = 1; m <= 3; m++) {
21300 GemmMicrokernelTester()
21301 .mr(3)
21302 .nr(4)
21303 .kr(8)
21304 .sr(1)
21305 .m(m)
21306 .n(4)
21307 .k(8)
21308 .iterations(1)
21309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21310 }
21311 }
21312
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)21313 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
21314 TEST_REQUIRES_X86_XOP;
21315 for (uint32_t n = 1; n <= 4; n++) {
21316 GemmMicrokernelTester()
21317 .mr(3)
21318 .nr(4)
21319 .kr(8)
21320 .sr(1)
21321 .m(3)
21322 .n(n)
21323 .k(8)
21324 .iterations(1)
21325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21326 }
21327 }
21328
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)21329 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
21330 TEST_REQUIRES_X86_XOP;
21331 for (size_t k = 1; k < 8; k++) {
21332 GemmMicrokernelTester()
21333 .mr(3)
21334 .nr(4)
21335 .kr(8)
21336 .sr(1)
21337 .m(3)
21338 .n(4)
21339 .k(k)
21340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21341 }
21342 }
21343
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_strided_a)21344 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_strided_a) {
21345 TEST_REQUIRES_X86_XOP;
21346 for (size_t k = 1; k < 8; k++) {
21347 GemmMicrokernelTester()
21348 .mr(3)
21349 .nr(4)
21350 .kr(8)
21351 .sr(1)
21352 .m(3)
21353 .n(4)
21354 .k(k)
21355 .a_stride(11)
21356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21357 }
21358 }
21359
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)21360 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
21361 TEST_REQUIRES_X86_XOP;
21362 for (size_t k = 1; k < 8; k++) {
21363 for (uint32_t n = 1; n <= 4; n++) {
21364 for (uint32_t m = 1; m <= 3; m++) {
21365 GemmMicrokernelTester()
21366 .mr(3)
21367 .nr(4)
21368 .kr(8)
21369 .sr(1)
21370 .m(m)
21371 .n(n)
21372 .k(k)
21373 .iterations(1)
21374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21375 }
21376 }
21377 }
21378 }
21379
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)21380 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
21381 TEST_REQUIRES_X86_XOP;
21382 for (size_t k = 9; k < 16; k++) {
21383 GemmMicrokernelTester()
21384 .mr(3)
21385 .nr(4)
21386 .kr(8)
21387 .sr(1)
21388 .m(3)
21389 .n(4)
21390 .k(k)
21391 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21392 }
21393 }
21394
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_strided_a)21395 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_strided_a) {
21396 TEST_REQUIRES_X86_XOP;
21397 for (size_t k = 9; k < 16; k++) {
21398 GemmMicrokernelTester()
21399 .mr(3)
21400 .nr(4)
21401 .kr(8)
21402 .sr(1)
21403 .m(3)
21404 .n(4)
21405 .k(k)
21406 .a_stride(19)
21407 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21408 }
21409 }
21410
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)21411 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
21412 TEST_REQUIRES_X86_XOP;
21413 for (size_t k = 9; k < 16; k++) {
21414 for (uint32_t n = 1; n <= 4; n++) {
21415 for (uint32_t m = 1; m <= 3; m++) {
21416 GemmMicrokernelTester()
21417 .mr(3)
21418 .nr(4)
21419 .kr(8)
21420 .sr(1)
21421 .m(m)
21422 .n(n)
21423 .k(k)
21424 .iterations(1)
21425 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21426 }
21427 }
21428 }
21429 }
21430
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)21431 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
21432 TEST_REQUIRES_X86_XOP;
21433 for (size_t k = 16; k <= 80; k += 8) {
21434 GemmMicrokernelTester()
21435 .mr(3)
21436 .nr(4)
21437 .kr(8)
21438 .sr(1)
21439 .m(3)
21440 .n(4)
21441 .k(k)
21442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21443 }
21444 }
21445
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_strided_a)21446 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_strided_a) {
21447 TEST_REQUIRES_X86_XOP;
21448 for (size_t k = 16; k <= 80; k += 8) {
21449 GemmMicrokernelTester()
21450 .mr(3)
21451 .nr(4)
21452 .kr(8)
21453 .sr(1)
21454 .m(3)
21455 .n(4)
21456 .k(k)
21457 .a_stride(83)
21458 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21459 }
21460 }
21461
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)21462 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
21463 TEST_REQUIRES_X86_XOP;
21464 for (size_t k = 16; k <= 80; k += 8) {
21465 for (uint32_t n = 1; n <= 4; n++) {
21466 for (uint32_t m = 1; m <= 3; m++) {
21467 GemmMicrokernelTester()
21468 .mr(3)
21469 .nr(4)
21470 .kr(8)
21471 .sr(1)
21472 .m(m)
21473 .n(n)
21474 .k(k)
21475 .iterations(1)
21476 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21477 }
21478 }
21479 }
21480 }
21481
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)21482 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
21483 TEST_REQUIRES_X86_XOP;
21484 for (uint32_t n = 5; n < 8; n++) {
21485 for (size_t k = 1; k <= 40; k += 9) {
21486 GemmMicrokernelTester()
21487 .mr(3)
21488 .nr(4)
21489 .kr(8)
21490 .sr(1)
21491 .m(3)
21492 .n(n)
21493 .k(k)
21494 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21495 }
21496 }
21497 }
21498
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)21499 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
21500 TEST_REQUIRES_X86_XOP;
21501 for (uint32_t n = 5; n < 8; n++) {
21502 for (size_t k = 1; k <= 40; k += 9) {
21503 GemmMicrokernelTester()
21504 .mr(3)
21505 .nr(4)
21506 .kr(8)
21507 .sr(1)
21508 .m(3)
21509 .n(n)
21510 .k(k)
21511 .cn_stride(7)
21512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21513 }
21514 }
21515 }
21516
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_a)21517 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_a) {
21518 TEST_REQUIRES_X86_XOP;
21519 for (uint32_t n = 5; n < 8; n++) {
21520 for (size_t k = 1; k <= 40; k += 9) {
21521 GemmMicrokernelTester()
21522 .mr(3)
21523 .nr(4)
21524 .kr(8)
21525 .sr(1)
21526 .m(3)
21527 .n(n)
21528 .k(k)
21529 .a_stride(43)
21530 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21531 }
21532 }
21533 }
21534
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)21535 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
21536 TEST_REQUIRES_X86_XOP;
21537 for (uint32_t n = 5; n < 8; n++) {
21538 for (size_t k = 1; k <= 40; k += 9) {
21539 for (uint32_t m = 1; m <= 3; m++) {
21540 GemmMicrokernelTester()
21541 .mr(3)
21542 .nr(4)
21543 .kr(8)
21544 .sr(1)
21545 .m(m)
21546 .n(n)
21547 .k(k)
21548 .iterations(1)
21549 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21550 }
21551 }
21552 }
21553 }
21554
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)21555 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
21556 TEST_REQUIRES_X86_XOP;
21557 for (uint32_t n = 8; n <= 12; n += 4) {
21558 for (size_t k = 1; k <= 40; k += 9) {
21559 GemmMicrokernelTester()
21560 .mr(3)
21561 .nr(4)
21562 .kr(8)
21563 .sr(1)
21564 .m(3)
21565 .n(n)
21566 .k(k)
21567 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21568 }
21569 }
21570 }
21571
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)21572 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
21573 TEST_REQUIRES_X86_XOP;
21574 for (uint32_t n = 8; n <= 12; n += 4) {
21575 for (size_t k = 1; k <= 40; k += 9) {
21576 GemmMicrokernelTester()
21577 .mr(3)
21578 .nr(4)
21579 .kr(8)
21580 .sr(1)
21581 .m(3)
21582 .n(n)
21583 .k(k)
21584 .cn_stride(7)
21585 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21586 }
21587 }
21588 }
21589
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_a)21590 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_a) {
21591 TEST_REQUIRES_X86_XOP;
21592 for (uint32_t n = 8; n <= 12; n += 4) {
21593 for (size_t k = 1; k <= 40; k += 9) {
21594 GemmMicrokernelTester()
21595 .mr(3)
21596 .nr(4)
21597 .kr(8)
21598 .sr(1)
21599 .m(3)
21600 .n(n)
21601 .k(k)
21602 .a_stride(43)
21603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21604 }
21605 }
21606 }
21607
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)21608 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
21609 TEST_REQUIRES_X86_XOP;
21610 for (uint32_t n = 8; n <= 12; n += 4) {
21611 for (size_t k = 1; k <= 40; k += 9) {
21612 for (uint32_t m = 1; m <= 3; m++) {
21613 GemmMicrokernelTester()
21614 .mr(3)
21615 .nr(4)
21616 .kr(8)
21617 .sr(1)
21618 .m(m)
21619 .n(n)
21620 .k(k)
21621 .iterations(1)
21622 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21623 }
21624 }
21625 }
21626 }
21627
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)21628 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
21629 TEST_REQUIRES_X86_XOP;
21630 for (size_t k = 1; k <= 40; k += 9) {
21631 for (uint32_t n = 1; n <= 4; n++) {
21632 for (uint32_t m = 1; m <= 3; m++) {
21633 GemmMicrokernelTester()
21634 .mr(3)
21635 .nr(4)
21636 .kr(8)
21637 .sr(1)
21638 .m(m)
21639 .n(n)
21640 .k(k)
21641 .cm_stride(7)
21642 .iterations(1)
21643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21644 }
21645 }
21646 }
21647 }
21648
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)21649 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
21650 TEST_REQUIRES_X86_XOP;
21651 GemmMicrokernelTester()
21652 .mr(3)
21653 .nr(4)
21654 .kr(8)
21655 .sr(1)
21656 .m(3)
21657 .n(4)
21658 .k(8)
21659 .qmin(128)
21660 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21661 }
21662
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)21663 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
21664 TEST_REQUIRES_X86_XOP;
21665 GemmMicrokernelTester()
21666 .mr(3)
21667 .nr(4)
21668 .kr(8)
21669 .sr(1)
21670 .m(3)
21671 .n(4)
21672 .k(8)
21673 .qmax(128)
21674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21675 }
21676
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)21677 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
21678 TEST_REQUIRES_X86_XOP;
21679 GemmMicrokernelTester()
21680 .mr(3)
21681 .nr(4)
21682 .kr(8)
21683 .sr(1)
21684 .m(3)
21685 .n(4)
21686 .k(8)
21687 .cm_stride(7)
21688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21689 }
21690
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_a_zero_point)21691 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
21692 TEST_REQUIRES_X86_XOP;
21693 for (size_t k = 1; k <= 40; k += 9) {
21694 GemmMicrokernelTester()
21695 .mr(3)
21696 .nr(4)
21697 .kr(8)
21698 .sr(1)
21699 .m(3)
21700 .n(4)
21701 .k(k)
21702 .a_zero_point(0)
21703 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21704 }
21705 }
21706
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_b_zero_point)21707 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
21708 TEST_REQUIRES_X86_XOP;
21709 for (size_t k = 1; k <= 40; k += 9) {
21710 GemmMicrokernelTester()
21711 .mr(3)
21712 .nr(4)
21713 .kr(8)
21714 .sr(1)
21715 .m(3)
21716 .n(4)
21717 .k(k)
21718 .b_zero_point(0)
21719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21720 }
21721 }
21722
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_zero_point)21723 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
21724 TEST_REQUIRES_X86_XOP;
21725 for (size_t k = 1; k <= 40; k += 9) {
21726 GemmMicrokernelTester()
21727 .mr(3)
21728 .nr(4)
21729 .kr(8)
21730 .sr(1)
21731 .m(3)
21732 .n(4)
21733 .k(k)
21734 .a_zero_point(0)
21735 .b_zero_point(0)
21736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21737 }
21738 }
21739 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21740
21741
21742 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)21743 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
21744 TEST_REQUIRES_X86_SSE2;
21745 GemmMicrokernelTester()
21746 .mr(1)
21747 .nr(4)
21748 .kr(8)
21749 .sr(1)
21750 .m(1)
21751 .n(4)
21752 .k(8)
21753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21754 }
21755
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)21756 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
21757 TEST_REQUIRES_X86_SSE2;
21758 GemmMicrokernelTester()
21759 .mr(1)
21760 .nr(4)
21761 .kr(8)
21762 .sr(1)
21763 .m(1)
21764 .n(4)
21765 .k(8)
21766 .cn_stride(7)
21767 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21768 }
21769
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_strided_a)21770 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
21771 TEST_REQUIRES_X86_SSE2;
21772 GemmMicrokernelTester()
21773 .mr(1)
21774 .nr(4)
21775 .kr(8)
21776 .sr(1)
21777 .m(1)
21778 .n(4)
21779 .k(8)
21780 .a_stride(11)
21781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21782 }
21783
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)21784 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
21785 TEST_REQUIRES_X86_SSE2;
21786 for (uint32_t n = 1; n <= 4; n++) {
21787 for (uint32_t m = 1; m <= 1; m++) {
21788 GemmMicrokernelTester()
21789 .mr(1)
21790 .nr(4)
21791 .kr(8)
21792 .sr(1)
21793 .m(m)
21794 .n(n)
21795 .k(8)
21796 .iterations(1)
21797 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21798 }
21799 }
21800 }
21801
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)21802 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
21803 TEST_REQUIRES_X86_SSE2;
21804 for (uint32_t m = 1; m <= 1; m++) {
21805 GemmMicrokernelTester()
21806 .mr(1)
21807 .nr(4)
21808 .kr(8)
21809 .sr(1)
21810 .m(m)
21811 .n(4)
21812 .k(8)
21813 .iterations(1)
21814 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21815 }
21816 }
21817
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)21818 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
21819 TEST_REQUIRES_X86_SSE2;
21820 for (uint32_t n = 1; n <= 4; n++) {
21821 GemmMicrokernelTester()
21822 .mr(1)
21823 .nr(4)
21824 .kr(8)
21825 .sr(1)
21826 .m(1)
21827 .n(n)
21828 .k(8)
21829 .iterations(1)
21830 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21831 }
21832 }
21833
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)21834 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
21835 TEST_REQUIRES_X86_SSE2;
21836 for (size_t k = 1; k < 8; k++) {
21837 GemmMicrokernelTester()
21838 .mr(1)
21839 .nr(4)
21840 .kr(8)
21841 .sr(1)
21842 .m(1)
21843 .n(4)
21844 .k(k)
21845 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21846 }
21847 }
21848
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_strided_a)21849 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_strided_a) {
21850 TEST_REQUIRES_X86_SSE2;
21851 for (size_t k = 1; k < 8; k++) {
21852 GemmMicrokernelTester()
21853 .mr(1)
21854 .nr(4)
21855 .kr(8)
21856 .sr(1)
21857 .m(1)
21858 .n(4)
21859 .k(k)
21860 .a_stride(11)
21861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21862 }
21863 }
21864
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)21865 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
21866 TEST_REQUIRES_X86_SSE2;
21867 for (size_t k = 1; k < 8; k++) {
21868 for (uint32_t n = 1; n <= 4; n++) {
21869 for (uint32_t m = 1; m <= 1; m++) {
21870 GemmMicrokernelTester()
21871 .mr(1)
21872 .nr(4)
21873 .kr(8)
21874 .sr(1)
21875 .m(m)
21876 .n(n)
21877 .k(k)
21878 .iterations(1)
21879 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21880 }
21881 }
21882 }
21883 }
21884
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)21885 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
21886 TEST_REQUIRES_X86_SSE2;
21887 for (size_t k = 9; k < 16; k++) {
21888 GemmMicrokernelTester()
21889 .mr(1)
21890 .nr(4)
21891 .kr(8)
21892 .sr(1)
21893 .m(1)
21894 .n(4)
21895 .k(k)
21896 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21897 }
21898 }
21899
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_strided_a)21900 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_strided_a) {
21901 TEST_REQUIRES_X86_SSE2;
21902 for (size_t k = 9; k < 16; k++) {
21903 GemmMicrokernelTester()
21904 .mr(1)
21905 .nr(4)
21906 .kr(8)
21907 .sr(1)
21908 .m(1)
21909 .n(4)
21910 .k(k)
21911 .a_stride(19)
21912 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21913 }
21914 }
21915
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)21916 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
21917 TEST_REQUIRES_X86_SSE2;
21918 for (size_t k = 9; k < 16; k++) {
21919 for (uint32_t n = 1; n <= 4; n++) {
21920 for (uint32_t m = 1; m <= 1; m++) {
21921 GemmMicrokernelTester()
21922 .mr(1)
21923 .nr(4)
21924 .kr(8)
21925 .sr(1)
21926 .m(m)
21927 .n(n)
21928 .k(k)
21929 .iterations(1)
21930 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21931 }
21932 }
21933 }
21934 }
21935
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)21936 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
21937 TEST_REQUIRES_X86_SSE2;
21938 for (size_t k = 16; k <= 80; k += 8) {
21939 GemmMicrokernelTester()
21940 .mr(1)
21941 .nr(4)
21942 .kr(8)
21943 .sr(1)
21944 .m(1)
21945 .n(4)
21946 .k(k)
21947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21948 }
21949 }
21950
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_strided_a)21951 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_strided_a) {
21952 TEST_REQUIRES_X86_SSE2;
21953 for (size_t k = 16; k <= 80; k += 8) {
21954 GemmMicrokernelTester()
21955 .mr(1)
21956 .nr(4)
21957 .kr(8)
21958 .sr(1)
21959 .m(1)
21960 .n(4)
21961 .k(k)
21962 .a_stride(83)
21963 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21964 }
21965 }
21966
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)21967 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
21968 TEST_REQUIRES_X86_SSE2;
21969 for (size_t k = 16; k <= 80; k += 8) {
21970 for (uint32_t n = 1; n <= 4; n++) {
21971 for (uint32_t m = 1; m <= 1; m++) {
21972 GemmMicrokernelTester()
21973 .mr(1)
21974 .nr(4)
21975 .kr(8)
21976 .sr(1)
21977 .m(m)
21978 .n(n)
21979 .k(k)
21980 .iterations(1)
21981 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21982 }
21983 }
21984 }
21985 }
21986
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)21987 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
21988 TEST_REQUIRES_X86_SSE2;
21989 for (uint32_t n = 5; n < 8; n++) {
21990 for (size_t k = 1; k <= 40; k += 9) {
21991 GemmMicrokernelTester()
21992 .mr(1)
21993 .nr(4)
21994 .kr(8)
21995 .sr(1)
21996 .m(1)
21997 .n(n)
21998 .k(k)
21999 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22000 }
22001 }
22002 }
22003
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)22004 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
22005 TEST_REQUIRES_X86_SSE2;
22006 for (uint32_t n = 5; n < 8; n++) {
22007 for (size_t k = 1; k <= 40; k += 9) {
22008 GemmMicrokernelTester()
22009 .mr(1)
22010 .nr(4)
22011 .kr(8)
22012 .sr(1)
22013 .m(1)
22014 .n(n)
22015 .k(k)
22016 .cn_stride(7)
22017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22018 }
22019 }
22020 }
22021
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_a)22022 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_a) {
22023 TEST_REQUIRES_X86_SSE2;
22024 for (uint32_t n = 5; n < 8; n++) {
22025 for (size_t k = 1; k <= 40; k += 9) {
22026 GemmMicrokernelTester()
22027 .mr(1)
22028 .nr(4)
22029 .kr(8)
22030 .sr(1)
22031 .m(1)
22032 .n(n)
22033 .k(k)
22034 .a_stride(43)
22035 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22036 }
22037 }
22038 }
22039
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)22040 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
22041 TEST_REQUIRES_X86_SSE2;
22042 for (uint32_t n = 5; n < 8; n++) {
22043 for (size_t k = 1; k <= 40; k += 9) {
22044 for (uint32_t m = 1; m <= 1; m++) {
22045 GemmMicrokernelTester()
22046 .mr(1)
22047 .nr(4)
22048 .kr(8)
22049 .sr(1)
22050 .m(m)
22051 .n(n)
22052 .k(k)
22053 .iterations(1)
22054 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22055 }
22056 }
22057 }
22058 }
22059
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)22060 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
22061 TEST_REQUIRES_X86_SSE2;
22062 for (uint32_t n = 8; n <= 12; n += 4) {
22063 for (size_t k = 1; k <= 40; k += 9) {
22064 GemmMicrokernelTester()
22065 .mr(1)
22066 .nr(4)
22067 .kr(8)
22068 .sr(1)
22069 .m(1)
22070 .n(n)
22071 .k(k)
22072 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22073 }
22074 }
22075 }
22076
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)22077 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
22078 TEST_REQUIRES_X86_SSE2;
22079 for (uint32_t n = 8; n <= 12; n += 4) {
22080 for (size_t k = 1; k <= 40; k += 9) {
22081 GemmMicrokernelTester()
22082 .mr(1)
22083 .nr(4)
22084 .kr(8)
22085 .sr(1)
22086 .m(1)
22087 .n(n)
22088 .k(k)
22089 .cn_stride(7)
22090 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22091 }
22092 }
22093 }
22094
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_a)22095 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_a) {
22096 TEST_REQUIRES_X86_SSE2;
22097 for (uint32_t n = 8; n <= 12; n += 4) {
22098 for (size_t k = 1; k <= 40; k += 9) {
22099 GemmMicrokernelTester()
22100 .mr(1)
22101 .nr(4)
22102 .kr(8)
22103 .sr(1)
22104 .m(1)
22105 .n(n)
22106 .k(k)
22107 .a_stride(43)
22108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22109 }
22110 }
22111 }
22112
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)22113 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
22114 TEST_REQUIRES_X86_SSE2;
22115 for (uint32_t n = 8; n <= 12; n += 4) {
22116 for (size_t k = 1; k <= 40; k += 9) {
22117 for (uint32_t m = 1; m <= 1; m++) {
22118 GemmMicrokernelTester()
22119 .mr(1)
22120 .nr(4)
22121 .kr(8)
22122 .sr(1)
22123 .m(m)
22124 .n(n)
22125 .k(k)
22126 .iterations(1)
22127 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22128 }
22129 }
22130 }
22131 }
22132
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)22133 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
22134 TEST_REQUIRES_X86_SSE2;
22135 for (size_t k = 1; k <= 40; k += 9) {
22136 for (uint32_t n = 1; n <= 4; n++) {
22137 for (uint32_t m = 1; m <= 1; m++) {
22138 GemmMicrokernelTester()
22139 .mr(1)
22140 .nr(4)
22141 .kr(8)
22142 .sr(1)
22143 .m(m)
22144 .n(n)
22145 .k(k)
22146 .cm_stride(7)
22147 .iterations(1)
22148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22149 }
22150 }
22151 }
22152 }
22153
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)22154 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
22155 TEST_REQUIRES_X86_SSE2;
22156 GemmMicrokernelTester()
22157 .mr(1)
22158 .nr(4)
22159 .kr(8)
22160 .sr(1)
22161 .m(1)
22162 .n(4)
22163 .k(8)
22164 .qmin(128)
22165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22166 }
22167
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)22168 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
22169 TEST_REQUIRES_X86_SSE2;
22170 GemmMicrokernelTester()
22171 .mr(1)
22172 .nr(4)
22173 .kr(8)
22174 .sr(1)
22175 .m(1)
22176 .n(4)
22177 .k(8)
22178 .qmax(128)
22179 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22180 }
22181
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)22182 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
22183 TEST_REQUIRES_X86_SSE2;
22184 GemmMicrokernelTester()
22185 .mr(1)
22186 .nr(4)
22187 .kr(8)
22188 .sr(1)
22189 .m(1)
22190 .n(4)
22191 .k(8)
22192 .cm_stride(7)
22193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22194 }
22195
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_a_zero_point)22196 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_a_zero_point) {
22197 TEST_REQUIRES_X86_SSE2;
22198 for (size_t k = 1; k <= 40; k += 9) {
22199 GemmMicrokernelTester()
22200 .mr(1)
22201 .nr(4)
22202 .kr(8)
22203 .sr(1)
22204 .m(1)
22205 .n(4)
22206 .k(k)
22207 .a_zero_point(0)
22208 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22209 }
22210 }
22211
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_b_zero_point)22212 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_b_zero_point) {
22213 TEST_REQUIRES_X86_SSE2;
22214 for (size_t k = 1; k <= 40; k += 9) {
22215 GemmMicrokernelTester()
22216 .mr(1)
22217 .nr(4)
22218 .kr(8)
22219 .sr(1)
22220 .m(1)
22221 .n(4)
22222 .k(k)
22223 .b_zero_point(0)
22224 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22225 }
22226 }
22227
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_zero_point)22228 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_zero_point) {
22229 TEST_REQUIRES_X86_SSE2;
22230 for (size_t k = 1; k <= 40; k += 9) {
22231 GemmMicrokernelTester()
22232 .mr(1)
22233 .nr(4)
22234 .kr(8)
22235 .sr(1)
22236 .m(1)
22237 .n(4)
22238 .k(k)
22239 .a_zero_point(0)
22240 .b_zero_point(0)
22241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22242 }
22243 }
22244 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22245
22246
22247 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)22248 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
22249 TEST_REQUIRES_X86_SSE41;
22250 GemmMicrokernelTester()
22251 .mr(1)
22252 .nr(4)
22253 .kr(8)
22254 .sr(1)
22255 .m(1)
22256 .n(4)
22257 .k(8)
22258 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22259 }
22260
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)22261 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
22262 TEST_REQUIRES_X86_SSE41;
22263 GemmMicrokernelTester()
22264 .mr(1)
22265 .nr(4)
22266 .kr(8)
22267 .sr(1)
22268 .m(1)
22269 .n(4)
22270 .k(8)
22271 .cn_stride(7)
22272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22273 }
22274
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_strided_a)22275 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
22276 TEST_REQUIRES_X86_SSE41;
22277 GemmMicrokernelTester()
22278 .mr(1)
22279 .nr(4)
22280 .kr(8)
22281 .sr(1)
22282 .m(1)
22283 .n(4)
22284 .k(8)
22285 .a_stride(11)
22286 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22287 }
22288
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)22289 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
22290 TEST_REQUIRES_X86_SSE41;
22291 for (uint32_t n = 1; n <= 4; n++) {
22292 for (uint32_t m = 1; m <= 1; m++) {
22293 GemmMicrokernelTester()
22294 .mr(1)
22295 .nr(4)
22296 .kr(8)
22297 .sr(1)
22298 .m(m)
22299 .n(n)
22300 .k(8)
22301 .iterations(1)
22302 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22303 }
22304 }
22305 }
22306
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)22307 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
22308 TEST_REQUIRES_X86_SSE41;
22309 for (uint32_t m = 1; m <= 1; m++) {
22310 GemmMicrokernelTester()
22311 .mr(1)
22312 .nr(4)
22313 .kr(8)
22314 .sr(1)
22315 .m(m)
22316 .n(4)
22317 .k(8)
22318 .iterations(1)
22319 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22320 }
22321 }
22322
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)22323 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
22324 TEST_REQUIRES_X86_SSE41;
22325 for (uint32_t n = 1; n <= 4; n++) {
22326 GemmMicrokernelTester()
22327 .mr(1)
22328 .nr(4)
22329 .kr(8)
22330 .sr(1)
22331 .m(1)
22332 .n(n)
22333 .k(8)
22334 .iterations(1)
22335 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22336 }
22337 }
22338
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)22339 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
22340 TEST_REQUIRES_X86_SSE41;
22341 for (size_t k = 1; k < 8; k++) {
22342 GemmMicrokernelTester()
22343 .mr(1)
22344 .nr(4)
22345 .kr(8)
22346 .sr(1)
22347 .m(1)
22348 .n(4)
22349 .k(k)
22350 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22351 }
22352 }
22353
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_strided_a)22354 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
22355 TEST_REQUIRES_X86_SSE41;
22356 for (size_t k = 1; k < 8; k++) {
22357 GemmMicrokernelTester()
22358 .mr(1)
22359 .nr(4)
22360 .kr(8)
22361 .sr(1)
22362 .m(1)
22363 .n(4)
22364 .k(k)
22365 .a_stride(11)
22366 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22367 }
22368 }
22369
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)22370 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
22371 TEST_REQUIRES_X86_SSE41;
22372 for (size_t k = 1; k < 8; k++) {
22373 for (uint32_t n = 1; n <= 4; n++) {
22374 for (uint32_t m = 1; m <= 1; m++) {
22375 GemmMicrokernelTester()
22376 .mr(1)
22377 .nr(4)
22378 .kr(8)
22379 .sr(1)
22380 .m(m)
22381 .n(n)
22382 .k(k)
22383 .iterations(1)
22384 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22385 }
22386 }
22387 }
22388 }
22389
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)22390 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
22391 TEST_REQUIRES_X86_SSE41;
22392 for (size_t k = 9; k < 16; k++) {
22393 GemmMicrokernelTester()
22394 .mr(1)
22395 .nr(4)
22396 .kr(8)
22397 .sr(1)
22398 .m(1)
22399 .n(4)
22400 .k(k)
22401 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22402 }
22403 }
22404
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_strided_a)22405 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
22406 TEST_REQUIRES_X86_SSE41;
22407 for (size_t k = 9; k < 16; k++) {
22408 GemmMicrokernelTester()
22409 .mr(1)
22410 .nr(4)
22411 .kr(8)
22412 .sr(1)
22413 .m(1)
22414 .n(4)
22415 .k(k)
22416 .a_stride(19)
22417 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22418 }
22419 }
22420
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)22421 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
22422 TEST_REQUIRES_X86_SSE41;
22423 for (size_t k = 9; k < 16; k++) {
22424 for (uint32_t n = 1; n <= 4; n++) {
22425 for (uint32_t m = 1; m <= 1; m++) {
22426 GemmMicrokernelTester()
22427 .mr(1)
22428 .nr(4)
22429 .kr(8)
22430 .sr(1)
22431 .m(m)
22432 .n(n)
22433 .k(k)
22434 .iterations(1)
22435 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22436 }
22437 }
22438 }
22439 }
22440
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)22441 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
22442 TEST_REQUIRES_X86_SSE41;
22443 for (size_t k = 16; k <= 80; k += 8) {
22444 GemmMicrokernelTester()
22445 .mr(1)
22446 .nr(4)
22447 .kr(8)
22448 .sr(1)
22449 .m(1)
22450 .n(4)
22451 .k(k)
22452 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22453 }
22454 }
22455
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_strided_a)22456 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_strided_a) {
22457 TEST_REQUIRES_X86_SSE41;
22458 for (size_t k = 16; k <= 80; k += 8) {
22459 GemmMicrokernelTester()
22460 .mr(1)
22461 .nr(4)
22462 .kr(8)
22463 .sr(1)
22464 .m(1)
22465 .n(4)
22466 .k(k)
22467 .a_stride(83)
22468 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22469 }
22470 }
22471
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)22472 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
22473 TEST_REQUIRES_X86_SSE41;
22474 for (size_t k = 16; k <= 80; k += 8) {
22475 for (uint32_t n = 1; n <= 4; n++) {
22476 for (uint32_t m = 1; m <= 1; m++) {
22477 GemmMicrokernelTester()
22478 .mr(1)
22479 .nr(4)
22480 .kr(8)
22481 .sr(1)
22482 .m(m)
22483 .n(n)
22484 .k(k)
22485 .iterations(1)
22486 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22487 }
22488 }
22489 }
22490 }
22491
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)22492 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
22493 TEST_REQUIRES_X86_SSE41;
22494 for (uint32_t n = 5; n < 8; n++) {
22495 for (size_t k = 1; k <= 40; k += 9) {
22496 GemmMicrokernelTester()
22497 .mr(1)
22498 .nr(4)
22499 .kr(8)
22500 .sr(1)
22501 .m(1)
22502 .n(n)
22503 .k(k)
22504 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22505 }
22506 }
22507 }
22508
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)22509 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
22510 TEST_REQUIRES_X86_SSE41;
22511 for (uint32_t n = 5; n < 8; n++) {
22512 for (size_t k = 1; k <= 40; k += 9) {
22513 GemmMicrokernelTester()
22514 .mr(1)
22515 .nr(4)
22516 .kr(8)
22517 .sr(1)
22518 .m(1)
22519 .n(n)
22520 .k(k)
22521 .cn_stride(7)
22522 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22523 }
22524 }
22525 }
22526
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_a)22527 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
22528 TEST_REQUIRES_X86_SSE41;
22529 for (uint32_t n = 5; n < 8; n++) {
22530 for (size_t k = 1; k <= 40; k += 9) {
22531 GemmMicrokernelTester()
22532 .mr(1)
22533 .nr(4)
22534 .kr(8)
22535 .sr(1)
22536 .m(1)
22537 .n(n)
22538 .k(k)
22539 .a_stride(43)
22540 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22541 }
22542 }
22543 }
22544
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)22545 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
22546 TEST_REQUIRES_X86_SSE41;
22547 for (uint32_t n = 5; n < 8; n++) {
22548 for (size_t k = 1; k <= 40; k += 9) {
22549 for (uint32_t m = 1; m <= 1; m++) {
22550 GemmMicrokernelTester()
22551 .mr(1)
22552 .nr(4)
22553 .kr(8)
22554 .sr(1)
22555 .m(m)
22556 .n(n)
22557 .k(k)
22558 .iterations(1)
22559 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22560 }
22561 }
22562 }
22563 }
22564
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)22565 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
22566 TEST_REQUIRES_X86_SSE41;
22567 for (uint32_t n = 8; n <= 12; n += 4) {
22568 for (size_t k = 1; k <= 40; k += 9) {
22569 GemmMicrokernelTester()
22570 .mr(1)
22571 .nr(4)
22572 .kr(8)
22573 .sr(1)
22574 .m(1)
22575 .n(n)
22576 .k(k)
22577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22578 }
22579 }
22580 }
22581
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)22582 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
22583 TEST_REQUIRES_X86_SSE41;
22584 for (uint32_t n = 8; n <= 12; n += 4) {
22585 for (size_t k = 1; k <= 40; k += 9) {
22586 GemmMicrokernelTester()
22587 .mr(1)
22588 .nr(4)
22589 .kr(8)
22590 .sr(1)
22591 .m(1)
22592 .n(n)
22593 .k(k)
22594 .cn_stride(7)
22595 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22596 }
22597 }
22598 }
22599
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_a)22600 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_a) {
22601 TEST_REQUIRES_X86_SSE41;
22602 for (uint32_t n = 8; n <= 12; n += 4) {
22603 for (size_t k = 1; k <= 40; k += 9) {
22604 GemmMicrokernelTester()
22605 .mr(1)
22606 .nr(4)
22607 .kr(8)
22608 .sr(1)
22609 .m(1)
22610 .n(n)
22611 .k(k)
22612 .a_stride(43)
22613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22614 }
22615 }
22616 }
22617
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)22618 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
22619 TEST_REQUIRES_X86_SSE41;
22620 for (uint32_t n = 8; n <= 12; n += 4) {
22621 for (size_t k = 1; k <= 40; k += 9) {
22622 for (uint32_t m = 1; m <= 1; m++) {
22623 GemmMicrokernelTester()
22624 .mr(1)
22625 .nr(4)
22626 .kr(8)
22627 .sr(1)
22628 .m(m)
22629 .n(n)
22630 .k(k)
22631 .iterations(1)
22632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22633 }
22634 }
22635 }
22636 }
22637
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)22638 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
22639 TEST_REQUIRES_X86_SSE41;
22640 for (size_t k = 1; k <= 40; k += 9) {
22641 for (uint32_t n = 1; n <= 4; n++) {
22642 for (uint32_t m = 1; m <= 1; m++) {
22643 GemmMicrokernelTester()
22644 .mr(1)
22645 .nr(4)
22646 .kr(8)
22647 .sr(1)
22648 .m(m)
22649 .n(n)
22650 .k(k)
22651 .cm_stride(7)
22652 .iterations(1)
22653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22654 }
22655 }
22656 }
22657 }
22658
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)22659 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
22660 TEST_REQUIRES_X86_SSE41;
22661 GemmMicrokernelTester()
22662 .mr(1)
22663 .nr(4)
22664 .kr(8)
22665 .sr(1)
22666 .m(1)
22667 .n(4)
22668 .k(8)
22669 .qmin(128)
22670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22671 }
22672
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)22673 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
22674 TEST_REQUIRES_X86_SSE41;
22675 GemmMicrokernelTester()
22676 .mr(1)
22677 .nr(4)
22678 .kr(8)
22679 .sr(1)
22680 .m(1)
22681 .n(4)
22682 .k(8)
22683 .qmax(128)
22684 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22685 }
22686
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)22687 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
22688 TEST_REQUIRES_X86_SSE41;
22689 GemmMicrokernelTester()
22690 .mr(1)
22691 .nr(4)
22692 .kr(8)
22693 .sr(1)
22694 .m(1)
22695 .n(4)
22696 .k(8)
22697 .cm_stride(7)
22698 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22699 }
22700
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_a_zero_point)22701 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
22702 TEST_REQUIRES_X86_SSE41;
22703 for (size_t k = 1; k <= 40; k += 9) {
22704 GemmMicrokernelTester()
22705 .mr(1)
22706 .nr(4)
22707 .kr(8)
22708 .sr(1)
22709 .m(1)
22710 .n(4)
22711 .k(k)
22712 .a_zero_point(0)
22713 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22714 }
22715 }
22716
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_b_zero_point)22717 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
22718 TEST_REQUIRES_X86_SSE41;
22719 for (size_t k = 1; k <= 40; k += 9) {
22720 GemmMicrokernelTester()
22721 .mr(1)
22722 .nr(4)
22723 .kr(8)
22724 .sr(1)
22725 .m(1)
22726 .n(4)
22727 .k(k)
22728 .b_zero_point(0)
22729 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22730 }
22731 }
22732
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_zero_point)22733 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
22734 TEST_REQUIRES_X86_SSE41;
22735 for (size_t k = 1; k <= 40; k += 9) {
22736 GemmMicrokernelTester()
22737 .mr(1)
22738 .nr(4)
22739 .kr(8)
22740 .sr(1)
22741 .m(1)
22742 .n(4)
22743 .k(k)
22744 .a_zero_point(0)
22745 .b_zero_point(0)
22746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22747 }
22748 }
22749 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22750
22751
22752 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)22753 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
22754 TEST_REQUIRES_X86_SSE2;
22755 GemmMicrokernelTester()
22756 .mr(2)
22757 .nr(4)
22758 .kr(8)
22759 .sr(1)
22760 .m(2)
22761 .n(4)
22762 .k(8)
22763 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22764 }
22765
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)22766 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
22767 TEST_REQUIRES_X86_SSE2;
22768 GemmMicrokernelTester()
22769 .mr(2)
22770 .nr(4)
22771 .kr(8)
22772 .sr(1)
22773 .m(2)
22774 .n(4)
22775 .k(8)
22776 .cn_stride(7)
22777 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22778 }
22779
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_strided_a)22780 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
22781 TEST_REQUIRES_X86_SSE2;
22782 GemmMicrokernelTester()
22783 .mr(2)
22784 .nr(4)
22785 .kr(8)
22786 .sr(1)
22787 .m(2)
22788 .n(4)
22789 .k(8)
22790 .a_stride(11)
22791 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22792 }
22793
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)22794 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
22795 TEST_REQUIRES_X86_SSE2;
22796 for (uint32_t n = 1; n <= 4; n++) {
22797 for (uint32_t m = 1; m <= 2; m++) {
22798 GemmMicrokernelTester()
22799 .mr(2)
22800 .nr(4)
22801 .kr(8)
22802 .sr(1)
22803 .m(m)
22804 .n(n)
22805 .k(8)
22806 .iterations(1)
22807 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22808 }
22809 }
22810 }
22811
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)22812 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
22813 TEST_REQUIRES_X86_SSE2;
22814 for (uint32_t m = 1; m <= 2; m++) {
22815 GemmMicrokernelTester()
22816 .mr(2)
22817 .nr(4)
22818 .kr(8)
22819 .sr(1)
22820 .m(m)
22821 .n(4)
22822 .k(8)
22823 .iterations(1)
22824 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22825 }
22826 }
22827
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)22828 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
22829 TEST_REQUIRES_X86_SSE2;
22830 for (uint32_t n = 1; n <= 4; n++) {
22831 GemmMicrokernelTester()
22832 .mr(2)
22833 .nr(4)
22834 .kr(8)
22835 .sr(1)
22836 .m(2)
22837 .n(n)
22838 .k(8)
22839 .iterations(1)
22840 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22841 }
22842 }
22843
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)22844 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
22845 TEST_REQUIRES_X86_SSE2;
22846 for (size_t k = 1; k < 8; k++) {
22847 GemmMicrokernelTester()
22848 .mr(2)
22849 .nr(4)
22850 .kr(8)
22851 .sr(1)
22852 .m(2)
22853 .n(4)
22854 .k(k)
22855 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22856 }
22857 }
22858
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_strided_a)22859 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_strided_a) {
22860 TEST_REQUIRES_X86_SSE2;
22861 for (size_t k = 1; k < 8; k++) {
22862 GemmMicrokernelTester()
22863 .mr(2)
22864 .nr(4)
22865 .kr(8)
22866 .sr(1)
22867 .m(2)
22868 .n(4)
22869 .k(k)
22870 .a_stride(11)
22871 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22872 }
22873 }
22874
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)22875 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
22876 TEST_REQUIRES_X86_SSE2;
22877 for (size_t k = 1; k < 8; k++) {
22878 for (uint32_t n = 1; n <= 4; n++) {
22879 for (uint32_t m = 1; m <= 2; m++) {
22880 GemmMicrokernelTester()
22881 .mr(2)
22882 .nr(4)
22883 .kr(8)
22884 .sr(1)
22885 .m(m)
22886 .n(n)
22887 .k(k)
22888 .iterations(1)
22889 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22890 }
22891 }
22892 }
22893 }
22894
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)22895 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
22896 TEST_REQUIRES_X86_SSE2;
22897 for (size_t k = 9; k < 16; k++) {
22898 GemmMicrokernelTester()
22899 .mr(2)
22900 .nr(4)
22901 .kr(8)
22902 .sr(1)
22903 .m(2)
22904 .n(4)
22905 .k(k)
22906 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22907 }
22908 }
22909
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_strided_a)22910 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_strided_a) {
22911 TEST_REQUIRES_X86_SSE2;
22912 for (size_t k = 9; k < 16; k++) {
22913 GemmMicrokernelTester()
22914 .mr(2)
22915 .nr(4)
22916 .kr(8)
22917 .sr(1)
22918 .m(2)
22919 .n(4)
22920 .k(k)
22921 .a_stride(19)
22922 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22923 }
22924 }
22925
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)22926 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
22927 TEST_REQUIRES_X86_SSE2;
22928 for (size_t k = 9; k < 16; k++) {
22929 for (uint32_t n = 1; n <= 4; n++) {
22930 for (uint32_t m = 1; m <= 2; m++) {
22931 GemmMicrokernelTester()
22932 .mr(2)
22933 .nr(4)
22934 .kr(8)
22935 .sr(1)
22936 .m(m)
22937 .n(n)
22938 .k(k)
22939 .iterations(1)
22940 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22941 }
22942 }
22943 }
22944 }
22945
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)22946 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
22947 TEST_REQUIRES_X86_SSE2;
22948 for (size_t k = 16; k <= 80; k += 8) {
22949 GemmMicrokernelTester()
22950 .mr(2)
22951 .nr(4)
22952 .kr(8)
22953 .sr(1)
22954 .m(2)
22955 .n(4)
22956 .k(k)
22957 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22958 }
22959 }
22960
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_strided_a)22961 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_strided_a) {
22962 TEST_REQUIRES_X86_SSE2;
22963 for (size_t k = 16; k <= 80; k += 8) {
22964 GemmMicrokernelTester()
22965 .mr(2)
22966 .nr(4)
22967 .kr(8)
22968 .sr(1)
22969 .m(2)
22970 .n(4)
22971 .k(k)
22972 .a_stride(83)
22973 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22974 }
22975 }
22976
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)22977 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
22978 TEST_REQUIRES_X86_SSE2;
22979 for (size_t k = 16; k <= 80; k += 8) {
22980 for (uint32_t n = 1; n <= 4; n++) {
22981 for (uint32_t m = 1; m <= 2; m++) {
22982 GemmMicrokernelTester()
22983 .mr(2)
22984 .nr(4)
22985 .kr(8)
22986 .sr(1)
22987 .m(m)
22988 .n(n)
22989 .k(k)
22990 .iterations(1)
22991 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22992 }
22993 }
22994 }
22995 }
22996
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)22997 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
22998 TEST_REQUIRES_X86_SSE2;
22999 for (uint32_t n = 5; n < 8; n++) {
23000 for (size_t k = 1; k <= 40; k += 9) {
23001 GemmMicrokernelTester()
23002 .mr(2)
23003 .nr(4)
23004 .kr(8)
23005 .sr(1)
23006 .m(2)
23007 .n(n)
23008 .k(k)
23009 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23010 }
23011 }
23012 }
23013
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)23014 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
23015 TEST_REQUIRES_X86_SSE2;
23016 for (uint32_t n = 5; n < 8; n++) {
23017 for (size_t k = 1; k <= 40; k += 9) {
23018 GemmMicrokernelTester()
23019 .mr(2)
23020 .nr(4)
23021 .kr(8)
23022 .sr(1)
23023 .m(2)
23024 .n(n)
23025 .k(k)
23026 .cn_stride(7)
23027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23028 }
23029 }
23030 }
23031
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_a)23032 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_a) {
23033 TEST_REQUIRES_X86_SSE2;
23034 for (uint32_t n = 5; n < 8; n++) {
23035 for (size_t k = 1; k <= 40; k += 9) {
23036 GemmMicrokernelTester()
23037 .mr(2)
23038 .nr(4)
23039 .kr(8)
23040 .sr(1)
23041 .m(2)
23042 .n(n)
23043 .k(k)
23044 .a_stride(43)
23045 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23046 }
23047 }
23048 }
23049
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)23050 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
23051 TEST_REQUIRES_X86_SSE2;
23052 for (uint32_t n = 5; n < 8; n++) {
23053 for (size_t k = 1; k <= 40; k += 9) {
23054 for (uint32_t m = 1; m <= 2; m++) {
23055 GemmMicrokernelTester()
23056 .mr(2)
23057 .nr(4)
23058 .kr(8)
23059 .sr(1)
23060 .m(m)
23061 .n(n)
23062 .k(k)
23063 .iterations(1)
23064 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23065 }
23066 }
23067 }
23068 }
23069
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)23070 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
23071 TEST_REQUIRES_X86_SSE2;
23072 for (uint32_t n = 8; n <= 12; n += 4) {
23073 for (size_t k = 1; k <= 40; k += 9) {
23074 GemmMicrokernelTester()
23075 .mr(2)
23076 .nr(4)
23077 .kr(8)
23078 .sr(1)
23079 .m(2)
23080 .n(n)
23081 .k(k)
23082 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23083 }
23084 }
23085 }
23086
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)23087 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
23088 TEST_REQUIRES_X86_SSE2;
23089 for (uint32_t n = 8; n <= 12; n += 4) {
23090 for (size_t k = 1; k <= 40; k += 9) {
23091 GemmMicrokernelTester()
23092 .mr(2)
23093 .nr(4)
23094 .kr(8)
23095 .sr(1)
23096 .m(2)
23097 .n(n)
23098 .k(k)
23099 .cn_stride(7)
23100 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23101 }
23102 }
23103 }
23104
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_a)23105 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_a) {
23106 TEST_REQUIRES_X86_SSE2;
23107 for (uint32_t n = 8; n <= 12; n += 4) {
23108 for (size_t k = 1; k <= 40; k += 9) {
23109 GemmMicrokernelTester()
23110 .mr(2)
23111 .nr(4)
23112 .kr(8)
23113 .sr(1)
23114 .m(2)
23115 .n(n)
23116 .k(k)
23117 .a_stride(43)
23118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23119 }
23120 }
23121 }
23122
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)23123 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
23124 TEST_REQUIRES_X86_SSE2;
23125 for (uint32_t n = 8; n <= 12; n += 4) {
23126 for (size_t k = 1; k <= 40; k += 9) {
23127 for (uint32_t m = 1; m <= 2; m++) {
23128 GemmMicrokernelTester()
23129 .mr(2)
23130 .nr(4)
23131 .kr(8)
23132 .sr(1)
23133 .m(m)
23134 .n(n)
23135 .k(k)
23136 .iterations(1)
23137 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23138 }
23139 }
23140 }
23141 }
23142
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)23143 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
23144 TEST_REQUIRES_X86_SSE2;
23145 for (size_t k = 1; k <= 40; k += 9) {
23146 for (uint32_t n = 1; n <= 4; n++) {
23147 for (uint32_t m = 1; m <= 2; m++) {
23148 GemmMicrokernelTester()
23149 .mr(2)
23150 .nr(4)
23151 .kr(8)
23152 .sr(1)
23153 .m(m)
23154 .n(n)
23155 .k(k)
23156 .cm_stride(7)
23157 .iterations(1)
23158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23159 }
23160 }
23161 }
23162 }
23163
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)23164 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
23165 TEST_REQUIRES_X86_SSE2;
23166 GemmMicrokernelTester()
23167 .mr(2)
23168 .nr(4)
23169 .kr(8)
23170 .sr(1)
23171 .m(2)
23172 .n(4)
23173 .k(8)
23174 .qmin(128)
23175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23176 }
23177
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)23178 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
23179 TEST_REQUIRES_X86_SSE2;
23180 GemmMicrokernelTester()
23181 .mr(2)
23182 .nr(4)
23183 .kr(8)
23184 .sr(1)
23185 .m(2)
23186 .n(4)
23187 .k(8)
23188 .qmax(128)
23189 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23190 }
23191
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)23192 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
23193 TEST_REQUIRES_X86_SSE2;
23194 GemmMicrokernelTester()
23195 .mr(2)
23196 .nr(4)
23197 .kr(8)
23198 .sr(1)
23199 .m(2)
23200 .n(4)
23201 .k(8)
23202 .cm_stride(7)
23203 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23204 }
23205
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_a_zero_point)23206 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_a_zero_point) {
23207 TEST_REQUIRES_X86_SSE2;
23208 for (size_t k = 1; k <= 40; k += 9) {
23209 GemmMicrokernelTester()
23210 .mr(2)
23211 .nr(4)
23212 .kr(8)
23213 .sr(1)
23214 .m(2)
23215 .n(4)
23216 .k(k)
23217 .a_zero_point(0)
23218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23219 }
23220 }
23221
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_b_zero_point)23222 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_b_zero_point) {
23223 TEST_REQUIRES_X86_SSE2;
23224 for (size_t k = 1; k <= 40; k += 9) {
23225 GemmMicrokernelTester()
23226 .mr(2)
23227 .nr(4)
23228 .kr(8)
23229 .sr(1)
23230 .m(2)
23231 .n(4)
23232 .k(k)
23233 .b_zero_point(0)
23234 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23235 }
23236 }
23237
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_zero_point)23238 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_zero_point) {
23239 TEST_REQUIRES_X86_SSE2;
23240 for (size_t k = 1; k <= 40; k += 9) {
23241 GemmMicrokernelTester()
23242 .mr(2)
23243 .nr(4)
23244 .kr(8)
23245 .sr(1)
23246 .m(2)
23247 .n(4)
23248 .k(k)
23249 .a_zero_point(0)
23250 .b_zero_point(0)
23251 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23252 }
23253 }
23254 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23255
23256
23257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)23258 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
23259 TEST_REQUIRES_X86_SSE41;
23260 GemmMicrokernelTester()
23261 .mr(2)
23262 .nr(4)
23263 .kr(8)
23264 .sr(1)
23265 .m(2)
23266 .n(4)
23267 .k(8)
23268 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23269 }
23270
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)23271 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
23272 TEST_REQUIRES_X86_SSE41;
23273 GemmMicrokernelTester()
23274 .mr(2)
23275 .nr(4)
23276 .kr(8)
23277 .sr(1)
23278 .m(2)
23279 .n(4)
23280 .k(8)
23281 .cn_stride(7)
23282 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23283 }
23284
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_strided_a)23285 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
23286 TEST_REQUIRES_X86_SSE41;
23287 GemmMicrokernelTester()
23288 .mr(2)
23289 .nr(4)
23290 .kr(8)
23291 .sr(1)
23292 .m(2)
23293 .n(4)
23294 .k(8)
23295 .a_stride(11)
23296 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23297 }
23298
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)23299 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
23300 TEST_REQUIRES_X86_SSE41;
23301 for (uint32_t n = 1; n <= 4; n++) {
23302 for (uint32_t m = 1; m <= 2; m++) {
23303 GemmMicrokernelTester()
23304 .mr(2)
23305 .nr(4)
23306 .kr(8)
23307 .sr(1)
23308 .m(m)
23309 .n(n)
23310 .k(8)
23311 .iterations(1)
23312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23313 }
23314 }
23315 }
23316
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)23317 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
23318 TEST_REQUIRES_X86_SSE41;
23319 for (uint32_t m = 1; m <= 2; m++) {
23320 GemmMicrokernelTester()
23321 .mr(2)
23322 .nr(4)
23323 .kr(8)
23324 .sr(1)
23325 .m(m)
23326 .n(4)
23327 .k(8)
23328 .iterations(1)
23329 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23330 }
23331 }
23332
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)23333 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
23334 TEST_REQUIRES_X86_SSE41;
23335 for (uint32_t n = 1; n <= 4; n++) {
23336 GemmMicrokernelTester()
23337 .mr(2)
23338 .nr(4)
23339 .kr(8)
23340 .sr(1)
23341 .m(2)
23342 .n(n)
23343 .k(8)
23344 .iterations(1)
23345 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23346 }
23347 }
23348
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)23349 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
23350 TEST_REQUIRES_X86_SSE41;
23351 for (size_t k = 1; k < 8; k++) {
23352 GemmMicrokernelTester()
23353 .mr(2)
23354 .nr(4)
23355 .kr(8)
23356 .sr(1)
23357 .m(2)
23358 .n(4)
23359 .k(k)
23360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23361 }
23362 }
23363
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_strided_a)23364 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
23365 TEST_REQUIRES_X86_SSE41;
23366 for (size_t k = 1; k < 8; k++) {
23367 GemmMicrokernelTester()
23368 .mr(2)
23369 .nr(4)
23370 .kr(8)
23371 .sr(1)
23372 .m(2)
23373 .n(4)
23374 .k(k)
23375 .a_stride(11)
23376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23377 }
23378 }
23379
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)23380 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
23381 TEST_REQUIRES_X86_SSE41;
23382 for (size_t k = 1; k < 8; k++) {
23383 for (uint32_t n = 1; n <= 4; n++) {
23384 for (uint32_t m = 1; m <= 2; m++) {
23385 GemmMicrokernelTester()
23386 .mr(2)
23387 .nr(4)
23388 .kr(8)
23389 .sr(1)
23390 .m(m)
23391 .n(n)
23392 .k(k)
23393 .iterations(1)
23394 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23395 }
23396 }
23397 }
23398 }
23399
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)23400 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
23401 TEST_REQUIRES_X86_SSE41;
23402 for (size_t k = 9; k < 16; k++) {
23403 GemmMicrokernelTester()
23404 .mr(2)
23405 .nr(4)
23406 .kr(8)
23407 .sr(1)
23408 .m(2)
23409 .n(4)
23410 .k(k)
23411 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23412 }
23413 }
23414
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_strided_a)23415 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
23416 TEST_REQUIRES_X86_SSE41;
23417 for (size_t k = 9; k < 16; k++) {
23418 GemmMicrokernelTester()
23419 .mr(2)
23420 .nr(4)
23421 .kr(8)
23422 .sr(1)
23423 .m(2)
23424 .n(4)
23425 .k(k)
23426 .a_stride(19)
23427 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23428 }
23429 }
23430
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)23431 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
23432 TEST_REQUIRES_X86_SSE41;
23433 for (size_t k = 9; k < 16; k++) {
23434 for (uint32_t n = 1; n <= 4; n++) {
23435 for (uint32_t m = 1; m <= 2; m++) {
23436 GemmMicrokernelTester()
23437 .mr(2)
23438 .nr(4)
23439 .kr(8)
23440 .sr(1)
23441 .m(m)
23442 .n(n)
23443 .k(k)
23444 .iterations(1)
23445 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23446 }
23447 }
23448 }
23449 }
23450
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)23451 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
23452 TEST_REQUIRES_X86_SSE41;
23453 for (size_t k = 16; k <= 80; k += 8) {
23454 GemmMicrokernelTester()
23455 .mr(2)
23456 .nr(4)
23457 .kr(8)
23458 .sr(1)
23459 .m(2)
23460 .n(4)
23461 .k(k)
23462 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23463 }
23464 }
23465
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_strided_a)23466 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_strided_a) {
23467 TEST_REQUIRES_X86_SSE41;
23468 for (size_t k = 16; k <= 80; k += 8) {
23469 GemmMicrokernelTester()
23470 .mr(2)
23471 .nr(4)
23472 .kr(8)
23473 .sr(1)
23474 .m(2)
23475 .n(4)
23476 .k(k)
23477 .a_stride(83)
23478 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23479 }
23480 }
23481
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)23482 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
23483 TEST_REQUIRES_X86_SSE41;
23484 for (size_t k = 16; k <= 80; k += 8) {
23485 for (uint32_t n = 1; n <= 4; n++) {
23486 for (uint32_t m = 1; m <= 2; m++) {
23487 GemmMicrokernelTester()
23488 .mr(2)
23489 .nr(4)
23490 .kr(8)
23491 .sr(1)
23492 .m(m)
23493 .n(n)
23494 .k(k)
23495 .iterations(1)
23496 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23497 }
23498 }
23499 }
23500 }
23501
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)23502 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
23503 TEST_REQUIRES_X86_SSE41;
23504 for (uint32_t n = 5; n < 8; n++) {
23505 for (size_t k = 1; k <= 40; k += 9) {
23506 GemmMicrokernelTester()
23507 .mr(2)
23508 .nr(4)
23509 .kr(8)
23510 .sr(1)
23511 .m(2)
23512 .n(n)
23513 .k(k)
23514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23515 }
23516 }
23517 }
23518
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)23519 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
23520 TEST_REQUIRES_X86_SSE41;
23521 for (uint32_t n = 5; n < 8; n++) {
23522 for (size_t k = 1; k <= 40; k += 9) {
23523 GemmMicrokernelTester()
23524 .mr(2)
23525 .nr(4)
23526 .kr(8)
23527 .sr(1)
23528 .m(2)
23529 .n(n)
23530 .k(k)
23531 .cn_stride(7)
23532 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23533 }
23534 }
23535 }
23536
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_a)23537 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
23538 TEST_REQUIRES_X86_SSE41;
23539 for (uint32_t n = 5; n < 8; n++) {
23540 for (size_t k = 1; k <= 40; k += 9) {
23541 GemmMicrokernelTester()
23542 .mr(2)
23543 .nr(4)
23544 .kr(8)
23545 .sr(1)
23546 .m(2)
23547 .n(n)
23548 .k(k)
23549 .a_stride(43)
23550 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23551 }
23552 }
23553 }
23554
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)23555 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
23556 TEST_REQUIRES_X86_SSE41;
23557 for (uint32_t n = 5; n < 8; n++) {
23558 for (size_t k = 1; k <= 40; k += 9) {
23559 for (uint32_t m = 1; m <= 2; m++) {
23560 GemmMicrokernelTester()
23561 .mr(2)
23562 .nr(4)
23563 .kr(8)
23564 .sr(1)
23565 .m(m)
23566 .n(n)
23567 .k(k)
23568 .iterations(1)
23569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23570 }
23571 }
23572 }
23573 }
23574
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)23575 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
23576 TEST_REQUIRES_X86_SSE41;
23577 for (uint32_t n = 8; n <= 12; n += 4) {
23578 for (size_t k = 1; k <= 40; k += 9) {
23579 GemmMicrokernelTester()
23580 .mr(2)
23581 .nr(4)
23582 .kr(8)
23583 .sr(1)
23584 .m(2)
23585 .n(n)
23586 .k(k)
23587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23588 }
23589 }
23590 }
23591
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)23592 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
23593 TEST_REQUIRES_X86_SSE41;
23594 for (uint32_t n = 8; n <= 12; n += 4) {
23595 for (size_t k = 1; k <= 40; k += 9) {
23596 GemmMicrokernelTester()
23597 .mr(2)
23598 .nr(4)
23599 .kr(8)
23600 .sr(1)
23601 .m(2)
23602 .n(n)
23603 .k(k)
23604 .cn_stride(7)
23605 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23606 }
23607 }
23608 }
23609
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_a)23610 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_a) {
23611 TEST_REQUIRES_X86_SSE41;
23612 for (uint32_t n = 8; n <= 12; n += 4) {
23613 for (size_t k = 1; k <= 40; k += 9) {
23614 GemmMicrokernelTester()
23615 .mr(2)
23616 .nr(4)
23617 .kr(8)
23618 .sr(1)
23619 .m(2)
23620 .n(n)
23621 .k(k)
23622 .a_stride(43)
23623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23624 }
23625 }
23626 }
23627
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)23628 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
23629 TEST_REQUIRES_X86_SSE41;
23630 for (uint32_t n = 8; n <= 12; n += 4) {
23631 for (size_t k = 1; k <= 40; k += 9) {
23632 for (uint32_t m = 1; m <= 2; m++) {
23633 GemmMicrokernelTester()
23634 .mr(2)
23635 .nr(4)
23636 .kr(8)
23637 .sr(1)
23638 .m(m)
23639 .n(n)
23640 .k(k)
23641 .iterations(1)
23642 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23643 }
23644 }
23645 }
23646 }
23647
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)23648 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
23649 TEST_REQUIRES_X86_SSE41;
23650 for (size_t k = 1; k <= 40; k += 9) {
23651 for (uint32_t n = 1; n <= 4; n++) {
23652 for (uint32_t m = 1; m <= 2; m++) {
23653 GemmMicrokernelTester()
23654 .mr(2)
23655 .nr(4)
23656 .kr(8)
23657 .sr(1)
23658 .m(m)
23659 .n(n)
23660 .k(k)
23661 .cm_stride(7)
23662 .iterations(1)
23663 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23664 }
23665 }
23666 }
23667 }
23668
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)23669 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
23670 TEST_REQUIRES_X86_SSE41;
23671 GemmMicrokernelTester()
23672 .mr(2)
23673 .nr(4)
23674 .kr(8)
23675 .sr(1)
23676 .m(2)
23677 .n(4)
23678 .k(8)
23679 .qmin(128)
23680 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23681 }
23682
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)23683 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
23684 TEST_REQUIRES_X86_SSE41;
23685 GemmMicrokernelTester()
23686 .mr(2)
23687 .nr(4)
23688 .kr(8)
23689 .sr(1)
23690 .m(2)
23691 .n(4)
23692 .k(8)
23693 .qmax(128)
23694 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23695 }
23696
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)23697 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
23698 TEST_REQUIRES_X86_SSE41;
23699 GemmMicrokernelTester()
23700 .mr(2)
23701 .nr(4)
23702 .kr(8)
23703 .sr(1)
23704 .m(2)
23705 .n(4)
23706 .k(8)
23707 .cm_stride(7)
23708 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23709 }
23710
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_a_zero_point)23711 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
23712 TEST_REQUIRES_X86_SSE41;
23713 for (size_t k = 1; k <= 40; k += 9) {
23714 GemmMicrokernelTester()
23715 .mr(2)
23716 .nr(4)
23717 .kr(8)
23718 .sr(1)
23719 .m(2)
23720 .n(4)
23721 .k(k)
23722 .a_zero_point(0)
23723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23724 }
23725 }
23726
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_b_zero_point)23727 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
23728 TEST_REQUIRES_X86_SSE41;
23729 for (size_t k = 1; k <= 40; k += 9) {
23730 GemmMicrokernelTester()
23731 .mr(2)
23732 .nr(4)
23733 .kr(8)
23734 .sr(1)
23735 .m(2)
23736 .n(4)
23737 .k(k)
23738 .b_zero_point(0)
23739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23740 }
23741 }
23742
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_zero_point)23743 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
23744 TEST_REQUIRES_X86_SSE41;
23745 for (size_t k = 1; k <= 40; k += 9) {
23746 GemmMicrokernelTester()
23747 .mr(2)
23748 .nr(4)
23749 .kr(8)
23750 .sr(1)
23751 .m(2)
23752 .n(4)
23753 .k(k)
23754 .a_zero_point(0)
23755 .b_zero_point(0)
23756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23757 }
23758 }
23759 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23760
23761
23762 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)23763 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
23764 TEST_REQUIRES_X86_AVX;
23765 GemmMicrokernelTester()
23766 .mr(1)
23767 .nr(4)
23768 .kr(8)
23769 .sr(1)
23770 .m(1)
23771 .n(4)
23772 .k(8)
23773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23774 }
23775
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)23776 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
23777 TEST_REQUIRES_X86_AVX;
23778 GemmMicrokernelTester()
23779 .mr(1)
23780 .nr(4)
23781 .kr(8)
23782 .sr(1)
23783 .m(1)
23784 .n(4)
23785 .k(8)
23786 .cn_stride(7)
23787 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23788 }
23789
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_strided_a)23790 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_strided_a) {
23791 TEST_REQUIRES_X86_AVX;
23792 GemmMicrokernelTester()
23793 .mr(1)
23794 .nr(4)
23795 .kr(8)
23796 .sr(1)
23797 .m(1)
23798 .n(4)
23799 .k(8)
23800 .a_stride(11)
23801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23802 }
23803
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)23804 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
23805 TEST_REQUIRES_X86_AVX;
23806 for (uint32_t n = 1; n <= 4; n++) {
23807 for (uint32_t m = 1; m <= 1; m++) {
23808 GemmMicrokernelTester()
23809 .mr(1)
23810 .nr(4)
23811 .kr(8)
23812 .sr(1)
23813 .m(m)
23814 .n(n)
23815 .k(8)
23816 .iterations(1)
23817 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23818 }
23819 }
23820 }
23821
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)23822 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
23823 TEST_REQUIRES_X86_AVX;
23824 for (uint32_t m = 1; m <= 1; m++) {
23825 GemmMicrokernelTester()
23826 .mr(1)
23827 .nr(4)
23828 .kr(8)
23829 .sr(1)
23830 .m(m)
23831 .n(4)
23832 .k(8)
23833 .iterations(1)
23834 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23835 }
23836 }
23837
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)23838 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
23839 TEST_REQUIRES_X86_AVX;
23840 for (uint32_t n = 1; n <= 4; n++) {
23841 GemmMicrokernelTester()
23842 .mr(1)
23843 .nr(4)
23844 .kr(8)
23845 .sr(1)
23846 .m(1)
23847 .n(n)
23848 .k(8)
23849 .iterations(1)
23850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23851 }
23852 }
23853
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)23854 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
23855 TEST_REQUIRES_X86_AVX;
23856 for (size_t k = 1; k < 8; k++) {
23857 GemmMicrokernelTester()
23858 .mr(1)
23859 .nr(4)
23860 .kr(8)
23861 .sr(1)
23862 .m(1)
23863 .n(4)
23864 .k(k)
23865 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23866 }
23867 }
23868
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_strided_a)23869 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_strided_a) {
23870 TEST_REQUIRES_X86_AVX;
23871 for (size_t k = 1; k < 8; k++) {
23872 GemmMicrokernelTester()
23873 .mr(1)
23874 .nr(4)
23875 .kr(8)
23876 .sr(1)
23877 .m(1)
23878 .n(4)
23879 .k(k)
23880 .a_stride(11)
23881 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23882 }
23883 }
23884
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)23885 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
23886 TEST_REQUIRES_X86_AVX;
23887 for (size_t k = 1; k < 8; k++) {
23888 for (uint32_t n = 1; n <= 4; n++) {
23889 for (uint32_t m = 1; m <= 1; m++) {
23890 GemmMicrokernelTester()
23891 .mr(1)
23892 .nr(4)
23893 .kr(8)
23894 .sr(1)
23895 .m(m)
23896 .n(n)
23897 .k(k)
23898 .iterations(1)
23899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23900 }
23901 }
23902 }
23903 }
23904
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)23905 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
23906 TEST_REQUIRES_X86_AVX;
23907 for (size_t k = 9; k < 16; k++) {
23908 GemmMicrokernelTester()
23909 .mr(1)
23910 .nr(4)
23911 .kr(8)
23912 .sr(1)
23913 .m(1)
23914 .n(4)
23915 .k(k)
23916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23917 }
23918 }
23919
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_strided_a)23920 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_strided_a) {
23921 TEST_REQUIRES_X86_AVX;
23922 for (size_t k = 9; k < 16; k++) {
23923 GemmMicrokernelTester()
23924 .mr(1)
23925 .nr(4)
23926 .kr(8)
23927 .sr(1)
23928 .m(1)
23929 .n(4)
23930 .k(k)
23931 .a_stride(19)
23932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23933 }
23934 }
23935
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)23936 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
23937 TEST_REQUIRES_X86_AVX;
23938 for (size_t k = 9; k < 16; k++) {
23939 for (uint32_t n = 1; n <= 4; n++) {
23940 for (uint32_t m = 1; m <= 1; m++) {
23941 GemmMicrokernelTester()
23942 .mr(1)
23943 .nr(4)
23944 .kr(8)
23945 .sr(1)
23946 .m(m)
23947 .n(n)
23948 .k(k)
23949 .iterations(1)
23950 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23951 }
23952 }
23953 }
23954 }
23955
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)23956 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
23957 TEST_REQUIRES_X86_AVX;
23958 for (size_t k = 16; k <= 80; k += 8) {
23959 GemmMicrokernelTester()
23960 .mr(1)
23961 .nr(4)
23962 .kr(8)
23963 .sr(1)
23964 .m(1)
23965 .n(4)
23966 .k(k)
23967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23968 }
23969 }
23970
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_strided_a)23971 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_strided_a) {
23972 TEST_REQUIRES_X86_AVX;
23973 for (size_t k = 16; k <= 80; k += 8) {
23974 GemmMicrokernelTester()
23975 .mr(1)
23976 .nr(4)
23977 .kr(8)
23978 .sr(1)
23979 .m(1)
23980 .n(4)
23981 .k(k)
23982 .a_stride(83)
23983 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23984 }
23985 }
23986
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)23987 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
23988 TEST_REQUIRES_X86_AVX;
23989 for (size_t k = 16; k <= 80; k += 8) {
23990 for (uint32_t n = 1; n <= 4; n++) {
23991 for (uint32_t m = 1; m <= 1; m++) {
23992 GemmMicrokernelTester()
23993 .mr(1)
23994 .nr(4)
23995 .kr(8)
23996 .sr(1)
23997 .m(m)
23998 .n(n)
23999 .k(k)
24000 .iterations(1)
24001 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24002 }
24003 }
24004 }
24005 }
24006
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)24007 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
24008 TEST_REQUIRES_X86_AVX;
24009 for (uint32_t n = 5; n < 8; n++) {
24010 for (size_t k = 1; k <= 40; k += 9) {
24011 GemmMicrokernelTester()
24012 .mr(1)
24013 .nr(4)
24014 .kr(8)
24015 .sr(1)
24016 .m(1)
24017 .n(n)
24018 .k(k)
24019 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24020 }
24021 }
24022 }
24023
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)24024 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
24025 TEST_REQUIRES_X86_AVX;
24026 for (uint32_t n = 5; n < 8; n++) {
24027 for (size_t k = 1; k <= 40; k += 9) {
24028 GemmMicrokernelTester()
24029 .mr(1)
24030 .nr(4)
24031 .kr(8)
24032 .sr(1)
24033 .m(1)
24034 .n(n)
24035 .k(k)
24036 .cn_stride(7)
24037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24038 }
24039 }
24040 }
24041
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_a)24042 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_a) {
24043 TEST_REQUIRES_X86_AVX;
24044 for (uint32_t n = 5; n < 8; n++) {
24045 for (size_t k = 1; k <= 40; k += 9) {
24046 GemmMicrokernelTester()
24047 .mr(1)
24048 .nr(4)
24049 .kr(8)
24050 .sr(1)
24051 .m(1)
24052 .n(n)
24053 .k(k)
24054 .a_stride(43)
24055 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24056 }
24057 }
24058 }
24059
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)24060 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
24061 TEST_REQUIRES_X86_AVX;
24062 for (uint32_t n = 5; n < 8; n++) {
24063 for (size_t k = 1; k <= 40; k += 9) {
24064 for (uint32_t m = 1; m <= 1; m++) {
24065 GemmMicrokernelTester()
24066 .mr(1)
24067 .nr(4)
24068 .kr(8)
24069 .sr(1)
24070 .m(m)
24071 .n(n)
24072 .k(k)
24073 .iterations(1)
24074 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24075 }
24076 }
24077 }
24078 }
24079
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)24080 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
24081 TEST_REQUIRES_X86_AVX;
24082 for (uint32_t n = 8; n <= 12; n += 4) {
24083 for (size_t k = 1; k <= 40; k += 9) {
24084 GemmMicrokernelTester()
24085 .mr(1)
24086 .nr(4)
24087 .kr(8)
24088 .sr(1)
24089 .m(1)
24090 .n(n)
24091 .k(k)
24092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24093 }
24094 }
24095 }
24096
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)24097 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
24098 TEST_REQUIRES_X86_AVX;
24099 for (uint32_t n = 8; n <= 12; n += 4) {
24100 for (size_t k = 1; k <= 40; k += 9) {
24101 GemmMicrokernelTester()
24102 .mr(1)
24103 .nr(4)
24104 .kr(8)
24105 .sr(1)
24106 .m(1)
24107 .n(n)
24108 .k(k)
24109 .cn_stride(7)
24110 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24111 }
24112 }
24113 }
24114
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_a)24115 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_a) {
24116 TEST_REQUIRES_X86_AVX;
24117 for (uint32_t n = 8; n <= 12; n += 4) {
24118 for (size_t k = 1; k <= 40; k += 9) {
24119 GemmMicrokernelTester()
24120 .mr(1)
24121 .nr(4)
24122 .kr(8)
24123 .sr(1)
24124 .m(1)
24125 .n(n)
24126 .k(k)
24127 .a_stride(43)
24128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24129 }
24130 }
24131 }
24132
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)24133 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
24134 TEST_REQUIRES_X86_AVX;
24135 for (uint32_t n = 8; n <= 12; n += 4) {
24136 for (size_t k = 1; k <= 40; k += 9) {
24137 for (uint32_t m = 1; m <= 1; m++) {
24138 GemmMicrokernelTester()
24139 .mr(1)
24140 .nr(4)
24141 .kr(8)
24142 .sr(1)
24143 .m(m)
24144 .n(n)
24145 .k(k)
24146 .iterations(1)
24147 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24148 }
24149 }
24150 }
24151 }
24152
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)24153 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
24154 TEST_REQUIRES_X86_AVX;
24155 for (size_t k = 1; k <= 40; k += 9) {
24156 for (uint32_t n = 1; n <= 4; n++) {
24157 for (uint32_t m = 1; m <= 1; m++) {
24158 GemmMicrokernelTester()
24159 .mr(1)
24160 .nr(4)
24161 .kr(8)
24162 .sr(1)
24163 .m(m)
24164 .n(n)
24165 .k(k)
24166 .cm_stride(7)
24167 .iterations(1)
24168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24169 }
24170 }
24171 }
24172 }
24173
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)24174 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
24175 TEST_REQUIRES_X86_AVX;
24176 GemmMicrokernelTester()
24177 .mr(1)
24178 .nr(4)
24179 .kr(8)
24180 .sr(1)
24181 .m(1)
24182 .n(4)
24183 .k(8)
24184 .qmin(128)
24185 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24186 }
24187
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)24188 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
24189 TEST_REQUIRES_X86_AVX;
24190 GemmMicrokernelTester()
24191 .mr(1)
24192 .nr(4)
24193 .kr(8)
24194 .sr(1)
24195 .m(1)
24196 .n(4)
24197 .k(8)
24198 .qmax(128)
24199 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24200 }
24201
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)24202 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
24203 TEST_REQUIRES_X86_AVX;
24204 GemmMicrokernelTester()
24205 .mr(1)
24206 .nr(4)
24207 .kr(8)
24208 .sr(1)
24209 .m(1)
24210 .n(4)
24211 .k(8)
24212 .cm_stride(7)
24213 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24214 }
24215
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_a_zero_point)24216 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
24217 TEST_REQUIRES_X86_AVX;
24218 for (size_t k = 1; k <= 40; k += 9) {
24219 GemmMicrokernelTester()
24220 .mr(1)
24221 .nr(4)
24222 .kr(8)
24223 .sr(1)
24224 .m(1)
24225 .n(4)
24226 .k(k)
24227 .a_zero_point(0)
24228 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24229 }
24230 }
24231
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_b_zero_point)24232 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
24233 TEST_REQUIRES_X86_AVX;
24234 for (size_t k = 1; k <= 40; k += 9) {
24235 GemmMicrokernelTester()
24236 .mr(1)
24237 .nr(4)
24238 .kr(8)
24239 .sr(1)
24240 .m(1)
24241 .n(4)
24242 .k(k)
24243 .b_zero_point(0)
24244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24245 }
24246 }
24247
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_zero_point)24248 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
24249 TEST_REQUIRES_X86_AVX;
24250 for (size_t k = 1; k <= 40; k += 9) {
24251 GemmMicrokernelTester()
24252 .mr(1)
24253 .nr(4)
24254 .kr(8)
24255 .sr(1)
24256 .m(1)
24257 .n(4)
24258 .k(k)
24259 .a_zero_point(0)
24260 .b_zero_point(0)
24261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24262 }
24263 }
24264 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24265
24266
24267 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)24268 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
24269 TEST_REQUIRES_X86_AVX;
24270 GemmMicrokernelTester()
24271 .mr(2)
24272 .nr(4)
24273 .kr(8)
24274 .sr(1)
24275 .m(2)
24276 .n(4)
24277 .k(8)
24278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24279 }
24280
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)24281 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
24282 TEST_REQUIRES_X86_AVX;
24283 GemmMicrokernelTester()
24284 .mr(2)
24285 .nr(4)
24286 .kr(8)
24287 .sr(1)
24288 .m(2)
24289 .n(4)
24290 .k(8)
24291 .cn_stride(7)
24292 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24293 }
24294
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_strided_a)24295 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_strided_a) {
24296 TEST_REQUIRES_X86_AVX;
24297 GemmMicrokernelTester()
24298 .mr(2)
24299 .nr(4)
24300 .kr(8)
24301 .sr(1)
24302 .m(2)
24303 .n(4)
24304 .k(8)
24305 .a_stride(11)
24306 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24307 }
24308
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)24309 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
24310 TEST_REQUIRES_X86_AVX;
24311 for (uint32_t n = 1; n <= 4; n++) {
24312 for (uint32_t m = 1; m <= 2; m++) {
24313 GemmMicrokernelTester()
24314 .mr(2)
24315 .nr(4)
24316 .kr(8)
24317 .sr(1)
24318 .m(m)
24319 .n(n)
24320 .k(8)
24321 .iterations(1)
24322 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24323 }
24324 }
24325 }
24326
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)24327 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
24328 TEST_REQUIRES_X86_AVX;
24329 for (uint32_t m = 1; m <= 2; m++) {
24330 GemmMicrokernelTester()
24331 .mr(2)
24332 .nr(4)
24333 .kr(8)
24334 .sr(1)
24335 .m(m)
24336 .n(4)
24337 .k(8)
24338 .iterations(1)
24339 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24340 }
24341 }
24342
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)24343 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
24344 TEST_REQUIRES_X86_AVX;
24345 for (uint32_t n = 1; n <= 4; n++) {
24346 GemmMicrokernelTester()
24347 .mr(2)
24348 .nr(4)
24349 .kr(8)
24350 .sr(1)
24351 .m(2)
24352 .n(n)
24353 .k(8)
24354 .iterations(1)
24355 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24356 }
24357 }
24358
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)24359 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
24360 TEST_REQUIRES_X86_AVX;
24361 for (size_t k = 1; k < 8; k++) {
24362 GemmMicrokernelTester()
24363 .mr(2)
24364 .nr(4)
24365 .kr(8)
24366 .sr(1)
24367 .m(2)
24368 .n(4)
24369 .k(k)
24370 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24371 }
24372 }
24373
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_strided_a)24374 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_strided_a) {
24375 TEST_REQUIRES_X86_AVX;
24376 for (size_t k = 1; k < 8; k++) {
24377 GemmMicrokernelTester()
24378 .mr(2)
24379 .nr(4)
24380 .kr(8)
24381 .sr(1)
24382 .m(2)
24383 .n(4)
24384 .k(k)
24385 .a_stride(11)
24386 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24387 }
24388 }
24389
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)24390 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
24391 TEST_REQUIRES_X86_AVX;
24392 for (size_t k = 1; k < 8; k++) {
24393 for (uint32_t n = 1; n <= 4; n++) {
24394 for (uint32_t m = 1; m <= 2; m++) {
24395 GemmMicrokernelTester()
24396 .mr(2)
24397 .nr(4)
24398 .kr(8)
24399 .sr(1)
24400 .m(m)
24401 .n(n)
24402 .k(k)
24403 .iterations(1)
24404 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24405 }
24406 }
24407 }
24408 }
24409
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)24410 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
24411 TEST_REQUIRES_X86_AVX;
24412 for (size_t k = 9; k < 16; k++) {
24413 GemmMicrokernelTester()
24414 .mr(2)
24415 .nr(4)
24416 .kr(8)
24417 .sr(1)
24418 .m(2)
24419 .n(4)
24420 .k(k)
24421 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24422 }
24423 }
24424
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_strided_a)24425 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_strided_a) {
24426 TEST_REQUIRES_X86_AVX;
24427 for (size_t k = 9; k < 16; k++) {
24428 GemmMicrokernelTester()
24429 .mr(2)
24430 .nr(4)
24431 .kr(8)
24432 .sr(1)
24433 .m(2)
24434 .n(4)
24435 .k(k)
24436 .a_stride(19)
24437 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24438 }
24439 }
24440
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)24441 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
24442 TEST_REQUIRES_X86_AVX;
24443 for (size_t k = 9; k < 16; k++) {
24444 for (uint32_t n = 1; n <= 4; n++) {
24445 for (uint32_t m = 1; m <= 2; m++) {
24446 GemmMicrokernelTester()
24447 .mr(2)
24448 .nr(4)
24449 .kr(8)
24450 .sr(1)
24451 .m(m)
24452 .n(n)
24453 .k(k)
24454 .iterations(1)
24455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24456 }
24457 }
24458 }
24459 }
24460
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)24461 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
24462 TEST_REQUIRES_X86_AVX;
24463 for (size_t k = 16; k <= 80; k += 8) {
24464 GemmMicrokernelTester()
24465 .mr(2)
24466 .nr(4)
24467 .kr(8)
24468 .sr(1)
24469 .m(2)
24470 .n(4)
24471 .k(k)
24472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24473 }
24474 }
24475
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_strided_a)24476 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_strided_a) {
24477 TEST_REQUIRES_X86_AVX;
24478 for (size_t k = 16; k <= 80; k += 8) {
24479 GemmMicrokernelTester()
24480 .mr(2)
24481 .nr(4)
24482 .kr(8)
24483 .sr(1)
24484 .m(2)
24485 .n(4)
24486 .k(k)
24487 .a_stride(83)
24488 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24489 }
24490 }
24491
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)24492 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
24493 TEST_REQUIRES_X86_AVX;
24494 for (size_t k = 16; k <= 80; k += 8) {
24495 for (uint32_t n = 1; n <= 4; n++) {
24496 for (uint32_t m = 1; m <= 2; m++) {
24497 GemmMicrokernelTester()
24498 .mr(2)
24499 .nr(4)
24500 .kr(8)
24501 .sr(1)
24502 .m(m)
24503 .n(n)
24504 .k(k)
24505 .iterations(1)
24506 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24507 }
24508 }
24509 }
24510 }
24511
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)24512 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
24513 TEST_REQUIRES_X86_AVX;
24514 for (uint32_t n = 5; n < 8; n++) {
24515 for (size_t k = 1; k <= 40; k += 9) {
24516 GemmMicrokernelTester()
24517 .mr(2)
24518 .nr(4)
24519 .kr(8)
24520 .sr(1)
24521 .m(2)
24522 .n(n)
24523 .k(k)
24524 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24525 }
24526 }
24527 }
24528
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)24529 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
24530 TEST_REQUIRES_X86_AVX;
24531 for (uint32_t n = 5; n < 8; n++) {
24532 for (size_t k = 1; k <= 40; k += 9) {
24533 GemmMicrokernelTester()
24534 .mr(2)
24535 .nr(4)
24536 .kr(8)
24537 .sr(1)
24538 .m(2)
24539 .n(n)
24540 .k(k)
24541 .cn_stride(7)
24542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24543 }
24544 }
24545 }
24546
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_a)24547 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_a) {
24548 TEST_REQUIRES_X86_AVX;
24549 for (uint32_t n = 5; n < 8; n++) {
24550 for (size_t k = 1; k <= 40; k += 9) {
24551 GemmMicrokernelTester()
24552 .mr(2)
24553 .nr(4)
24554 .kr(8)
24555 .sr(1)
24556 .m(2)
24557 .n(n)
24558 .k(k)
24559 .a_stride(43)
24560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24561 }
24562 }
24563 }
24564
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)24565 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
24566 TEST_REQUIRES_X86_AVX;
24567 for (uint32_t n = 5; n < 8; n++) {
24568 for (size_t k = 1; k <= 40; k += 9) {
24569 for (uint32_t m = 1; m <= 2; m++) {
24570 GemmMicrokernelTester()
24571 .mr(2)
24572 .nr(4)
24573 .kr(8)
24574 .sr(1)
24575 .m(m)
24576 .n(n)
24577 .k(k)
24578 .iterations(1)
24579 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24580 }
24581 }
24582 }
24583 }
24584
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)24585 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
24586 TEST_REQUIRES_X86_AVX;
24587 for (uint32_t n = 8; n <= 12; n += 4) {
24588 for (size_t k = 1; k <= 40; k += 9) {
24589 GemmMicrokernelTester()
24590 .mr(2)
24591 .nr(4)
24592 .kr(8)
24593 .sr(1)
24594 .m(2)
24595 .n(n)
24596 .k(k)
24597 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24598 }
24599 }
24600 }
24601
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)24602 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
24603 TEST_REQUIRES_X86_AVX;
24604 for (uint32_t n = 8; n <= 12; n += 4) {
24605 for (size_t k = 1; k <= 40; k += 9) {
24606 GemmMicrokernelTester()
24607 .mr(2)
24608 .nr(4)
24609 .kr(8)
24610 .sr(1)
24611 .m(2)
24612 .n(n)
24613 .k(k)
24614 .cn_stride(7)
24615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24616 }
24617 }
24618 }
24619
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_a)24620 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_a) {
24621 TEST_REQUIRES_X86_AVX;
24622 for (uint32_t n = 8; n <= 12; n += 4) {
24623 for (size_t k = 1; k <= 40; k += 9) {
24624 GemmMicrokernelTester()
24625 .mr(2)
24626 .nr(4)
24627 .kr(8)
24628 .sr(1)
24629 .m(2)
24630 .n(n)
24631 .k(k)
24632 .a_stride(43)
24633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24634 }
24635 }
24636 }
24637
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)24638 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
24639 TEST_REQUIRES_X86_AVX;
24640 for (uint32_t n = 8; n <= 12; n += 4) {
24641 for (size_t k = 1; k <= 40; k += 9) {
24642 for (uint32_t m = 1; m <= 2; m++) {
24643 GemmMicrokernelTester()
24644 .mr(2)
24645 .nr(4)
24646 .kr(8)
24647 .sr(1)
24648 .m(m)
24649 .n(n)
24650 .k(k)
24651 .iterations(1)
24652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24653 }
24654 }
24655 }
24656 }
24657
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)24658 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
24659 TEST_REQUIRES_X86_AVX;
24660 for (size_t k = 1; k <= 40; k += 9) {
24661 for (uint32_t n = 1; n <= 4; n++) {
24662 for (uint32_t m = 1; m <= 2; m++) {
24663 GemmMicrokernelTester()
24664 .mr(2)
24665 .nr(4)
24666 .kr(8)
24667 .sr(1)
24668 .m(m)
24669 .n(n)
24670 .k(k)
24671 .cm_stride(7)
24672 .iterations(1)
24673 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24674 }
24675 }
24676 }
24677 }
24678
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)24679 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
24680 TEST_REQUIRES_X86_AVX;
24681 GemmMicrokernelTester()
24682 .mr(2)
24683 .nr(4)
24684 .kr(8)
24685 .sr(1)
24686 .m(2)
24687 .n(4)
24688 .k(8)
24689 .qmin(128)
24690 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24691 }
24692
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)24693 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
24694 TEST_REQUIRES_X86_AVX;
24695 GemmMicrokernelTester()
24696 .mr(2)
24697 .nr(4)
24698 .kr(8)
24699 .sr(1)
24700 .m(2)
24701 .n(4)
24702 .k(8)
24703 .qmax(128)
24704 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24705 }
24706
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)24707 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
24708 TEST_REQUIRES_X86_AVX;
24709 GemmMicrokernelTester()
24710 .mr(2)
24711 .nr(4)
24712 .kr(8)
24713 .sr(1)
24714 .m(2)
24715 .n(4)
24716 .k(8)
24717 .cm_stride(7)
24718 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24719 }
24720
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_a_zero_point)24721 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
24722 TEST_REQUIRES_X86_AVX;
24723 for (size_t k = 1; k <= 40; k += 9) {
24724 GemmMicrokernelTester()
24725 .mr(2)
24726 .nr(4)
24727 .kr(8)
24728 .sr(1)
24729 .m(2)
24730 .n(4)
24731 .k(k)
24732 .a_zero_point(0)
24733 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24734 }
24735 }
24736
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_b_zero_point)24737 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
24738 TEST_REQUIRES_X86_AVX;
24739 for (size_t k = 1; k <= 40; k += 9) {
24740 GemmMicrokernelTester()
24741 .mr(2)
24742 .nr(4)
24743 .kr(8)
24744 .sr(1)
24745 .m(2)
24746 .n(4)
24747 .k(k)
24748 .b_zero_point(0)
24749 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24750 }
24751 }
24752
TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_zero_point)24753 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
24754 TEST_REQUIRES_X86_AVX;
24755 for (size_t k = 1; k <= 40; k += 9) {
24756 GemmMicrokernelTester()
24757 .mr(2)
24758 .nr(4)
24759 .kr(8)
24760 .sr(1)
24761 .m(2)
24762 .n(4)
24763 .k(k)
24764 .a_zero_point(0)
24765 .b_zero_point(0)
24766 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24767 }
24768 }
24769 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24770
24771
24772 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)24773 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
24774 TEST_REQUIRES_X86_XOP;
24775 GemmMicrokernelTester()
24776 .mr(3)
24777 .nr(4)
24778 .kr(8)
24779 .sr(1)
24780 .m(3)
24781 .n(4)
24782 .k(8)
24783 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24784 }
24785
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)24786 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
24787 TEST_REQUIRES_X86_XOP;
24788 GemmMicrokernelTester()
24789 .mr(3)
24790 .nr(4)
24791 .kr(8)
24792 .sr(1)
24793 .m(3)
24794 .n(4)
24795 .k(8)
24796 .cn_stride(7)
24797 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24798 }
24799
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_strided_a)24800 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_strided_a) {
24801 TEST_REQUIRES_X86_XOP;
24802 GemmMicrokernelTester()
24803 .mr(3)
24804 .nr(4)
24805 .kr(8)
24806 .sr(1)
24807 .m(3)
24808 .n(4)
24809 .k(8)
24810 .a_stride(11)
24811 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24812 }
24813
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)24814 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
24815 TEST_REQUIRES_X86_XOP;
24816 for (uint32_t n = 1; n <= 4; n++) {
24817 for (uint32_t m = 1; m <= 3; m++) {
24818 GemmMicrokernelTester()
24819 .mr(3)
24820 .nr(4)
24821 .kr(8)
24822 .sr(1)
24823 .m(m)
24824 .n(n)
24825 .k(8)
24826 .iterations(1)
24827 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24828 }
24829 }
24830 }
24831
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)24832 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
24833 TEST_REQUIRES_X86_XOP;
24834 for (uint32_t m = 1; m <= 3; m++) {
24835 GemmMicrokernelTester()
24836 .mr(3)
24837 .nr(4)
24838 .kr(8)
24839 .sr(1)
24840 .m(m)
24841 .n(4)
24842 .k(8)
24843 .iterations(1)
24844 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24845 }
24846 }
24847
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)24848 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
24849 TEST_REQUIRES_X86_XOP;
24850 for (uint32_t n = 1; n <= 4; n++) {
24851 GemmMicrokernelTester()
24852 .mr(3)
24853 .nr(4)
24854 .kr(8)
24855 .sr(1)
24856 .m(3)
24857 .n(n)
24858 .k(8)
24859 .iterations(1)
24860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24861 }
24862 }
24863
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)24864 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
24865 TEST_REQUIRES_X86_XOP;
24866 for (size_t k = 1; k < 8; k++) {
24867 GemmMicrokernelTester()
24868 .mr(3)
24869 .nr(4)
24870 .kr(8)
24871 .sr(1)
24872 .m(3)
24873 .n(4)
24874 .k(k)
24875 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24876 }
24877 }
24878
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_strided_a)24879 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_strided_a) {
24880 TEST_REQUIRES_X86_XOP;
24881 for (size_t k = 1; k < 8; k++) {
24882 GemmMicrokernelTester()
24883 .mr(3)
24884 .nr(4)
24885 .kr(8)
24886 .sr(1)
24887 .m(3)
24888 .n(4)
24889 .k(k)
24890 .a_stride(11)
24891 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24892 }
24893 }
24894
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)24895 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
24896 TEST_REQUIRES_X86_XOP;
24897 for (size_t k = 1; k < 8; k++) {
24898 for (uint32_t n = 1; n <= 4; n++) {
24899 for (uint32_t m = 1; m <= 3; m++) {
24900 GemmMicrokernelTester()
24901 .mr(3)
24902 .nr(4)
24903 .kr(8)
24904 .sr(1)
24905 .m(m)
24906 .n(n)
24907 .k(k)
24908 .iterations(1)
24909 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24910 }
24911 }
24912 }
24913 }
24914
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)24915 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
24916 TEST_REQUIRES_X86_XOP;
24917 for (size_t k = 9; k < 16; k++) {
24918 GemmMicrokernelTester()
24919 .mr(3)
24920 .nr(4)
24921 .kr(8)
24922 .sr(1)
24923 .m(3)
24924 .n(4)
24925 .k(k)
24926 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24927 }
24928 }
24929
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_strided_a)24930 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_strided_a) {
24931 TEST_REQUIRES_X86_XOP;
24932 for (size_t k = 9; k < 16; k++) {
24933 GemmMicrokernelTester()
24934 .mr(3)
24935 .nr(4)
24936 .kr(8)
24937 .sr(1)
24938 .m(3)
24939 .n(4)
24940 .k(k)
24941 .a_stride(19)
24942 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24943 }
24944 }
24945
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)24946 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
24947 TEST_REQUIRES_X86_XOP;
24948 for (size_t k = 9; k < 16; k++) {
24949 for (uint32_t n = 1; n <= 4; n++) {
24950 for (uint32_t m = 1; m <= 3; m++) {
24951 GemmMicrokernelTester()
24952 .mr(3)
24953 .nr(4)
24954 .kr(8)
24955 .sr(1)
24956 .m(m)
24957 .n(n)
24958 .k(k)
24959 .iterations(1)
24960 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24961 }
24962 }
24963 }
24964 }
24965
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)24966 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
24967 TEST_REQUIRES_X86_XOP;
24968 for (size_t k = 16; k <= 80; k += 8) {
24969 GemmMicrokernelTester()
24970 .mr(3)
24971 .nr(4)
24972 .kr(8)
24973 .sr(1)
24974 .m(3)
24975 .n(4)
24976 .k(k)
24977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24978 }
24979 }
24980
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_strided_a)24981 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_strided_a) {
24982 TEST_REQUIRES_X86_XOP;
24983 for (size_t k = 16; k <= 80; k += 8) {
24984 GemmMicrokernelTester()
24985 .mr(3)
24986 .nr(4)
24987 .kr(8)
24988 .sr(1)
24989 .m(3)
24990 .n(4)
24991 .k(k)
24992 .a_stride(83)
24993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24994 }
24995 }
24996
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)24997 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
24998 TEST_REQUIRES_X86_XOP;
24999 for (size_t k = 16; k <= 80; k += 8) {
25000 for (uint32_t n = 1; n <= 4; n++) {
25001 for (uint32_t m = 1; m <= 3; m++) {
25002 GemmMicrokernelTester()
25003 .mr(3)
25004 .nr(4)
25005 .kr(8)
25006 .sr(1)
25007 .m(m)
25008 .n(n)
25009 .k(k)
25010 .iterations(1)
25011 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25012 }
25013 }
25014 }
25015 }
25016
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)25017 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
25018 TEST_REQUIRES_X86_XOP;
25019 for (uint32_t n = 5; n < 8; n++) {
25020 for (size_t k = 1; k <= 40; k += 9) {
25021 GemmMicrokernelTester()
25022 .mr(3)
25023 .nr(4)
25024 .kr(8)
25025 .sr(1)
25026 .m(3)
25027 .n(n)
25028 .k(k)
25029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25030 }
25031 }
25032 }
25033
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)25034 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
25035 TEST_REQUIRES_X86_XOP;
25036 for (uint32_t n = 5; n < 8; n++) {
25037 for (size_t k = 1; k <= 40; k += 9) {
25038 GemmMicrokernelTester()
25039 .mr(3)
25040 .nr(4)
25041 .kr(8)
25042 .sr(1)
25043 .m(3)
25044 .n(n)
25045 .k(k)
25046 .cn_stride(7)
25047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25048 }
25049 }
25050 }
25051
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_a)25052 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_a) {
25053 TEST_REQUIRES_X86_XOP;
25054 for (uint32_t n = 5; n < 8; n++) {
25055 for (size_t k = 1; k <= 40; k += 9) {
25056 GemmMicrokernelTester()
25057 .mr(3)
25058 .nr(4)
25059 .kr(8)
25060 .sr(1)
25061 .m(3)
25062 .n(n)
25063 .k(k)
25064 .a_stride(43)
25065 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25066 }
25067 }
25068 }
25069
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)25070 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
25071 TEST_REQUIRES_X86_XOP;
25072 for (uint32_t n = 5; n < 8; n++) {
25073 for (size_t k = 1; k <= 40; k += 9) {
25074 for (uint32_t m = 1; m <= 3; m++) {
25075 GemmMicrokernelTester()
25076 .mr(3)
25077 .nr(4)
25078 .kr(8)
25079 .sr(1)
25080 .m(m)
25081 .n(n)
25082 .k(k)
25083 .iterations(1)
25084 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25085 }
25086 }
25087 }
25088 }
25089
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)25090 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
25091 TEST_REQUIRES_X86_XOP;
25092 for (uint32_t n = 8; n <= 12; n += 4) {
25093 for (size_t k = 1; k <= 40; k += 9) {
25094 GemmMicrokernelTester()
25095 .mr(3)
25096 .nr(4)
25097 .kr(8)
25098 .sr(1)
25099 .m(3)
25100 .n(n)
25101 .k(k)
25102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25103 }
25104 }
25105 }
25106
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)25107 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
25108 TEST_REQUIRES_X86_XOP;
25109 for (uint32_t n = 8; n <= 12; n += 4) {
25110 for (size_t k = 1; k <= 40; k += 9) {
25111 GemmMicrokernelTester()
25112 .mr(3)
25113 .nr(4)
25114 .kr(8)
25115 .sr(1)
25116 .m(3)
25117 .n(n)
25118 .k(k)
25119 .cn_stride(7)
25120 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25121 }
25122 }
25123 }
25124
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_a)25125 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_a) {
25126 TEST_REQUIRES_X86_XOP;
25127 for (uint32_t n = 8; n <= 12; n += 4) {
25128 for (size_t k = 1; k <= 40; k += 9) {
25129 GemmMicrokernelTester()
25130 .mr(3)
25131 .nr(4)
25132 .kr(8)
25133 .sr(1)
25134 .m(3)
25135 .n(n)
25136 .k(k)
25137 .a_stride(43)
25138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25139 }
25140 }
25141 }
25142
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)25143 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
25144 TEST_REQUIRES_X86_XOP;
25145 for (uint32_t n = 8; n <= 12; n += 4) {
25146 for (size_t k = 1; k <= 40; k += 9) {
25147 for (uint32_t m = 1; m <= 3; m++) {
25148 GemmMicrokernelTester()
25149 .mr(3)
25150 .nr(4)
25151 .kr(8)
25152 .sr(1)
25153 .m(m)
25154 .n(n)
25155 .k(k)
25156 .iterations(1)
25157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25158 }
25159 }
25160 }
25161 }
25162
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)25163 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
25164 TEST_REQUIRES_X86_XOP;
25165 for (size_t k = 1; k <= 40; k += 9) {
25166 for (uint32_t n = 1; n <= 4; n++) {
25167 for (uint32_t m = 1; m <= 3; m++) {
25168 GemmMicrokernelTester()
25169 .mr(3)
25170 .nr(4)
25171 .kr(8)
25172 .sr(1)
25173 .m(m)
25174 .n(n)
25175 .k(k)
25176 .cm_stride(7)
25177 .iterations(1)
25178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25179 }
25180 }
25181 }
25182 }
25183
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)25184 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
25185 TEST_REQUIRES_X86_XOP;
25186 GemmMicrokernelTester()
25187 .mr(3)
25188 .nr(4)
25189 .kr(8)
25190 .sr(1)
25191 .m(3)
25192 .n(4)
25193 .k(8)
25194 .qmin(128)
25195 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25196 }
25197
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)25198 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
25199 TEST_REQUIRES_X86_XOP;
25200 GemmMicrokernelTester()
25201 .mr(3)
25202 .nr(4)
25203 .kr(8)
25204 .sr(1)
25205 .m(3)
25206 .n(4)
25207 .k(8)
25208 .qmax(128)
25209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25210 }
25211
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)25212 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
25213 TEST_REQUIRES_X86_XOP;
25214 GemmMicrokernelTester()
25215 .mr(3)
25216 .nr(4)
25217 .kr(8)
25218 .sr(1)
25219 .m(3)
25220 .n(4)
25221 .k(8)
25222 .cm_stride(7)
25223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25224 }
25225
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_a_zero_point)25226 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
25227 TEST_REQUIRES_X86_XOP;
25228 for (size_t k = 1; k <= 40; k += 9) {
25229 GemmMicrokernelTester()
25230 .mr(3)
25231 .nr(4)
25232 .kr(8)
25233 .sr(1)
25234 .m(3)
25235 .n(4)
25236 .k(k)
25237 .a_zero_point(0)
25238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25239 }
25240 }
25241
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_b_zero_point)25242 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
25243 TEST_REQUIRES_X86_XOP;
25244 for (size_t k = 1; k <= 40; k += 9) {
25245 GemmMicrokernelTester()
25246 .mr(3)
25247 .nr(4)
25248 .kr(8)
25249 .sr(1)
25250 .m(3)
25251 .n(4)
25252 .k(k)
25253 .b_zero_point(0)
25254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25255 }
25256 }
25257
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_zero_point)25258 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
25259 TEST_REQUIRES_X86_XOP;
25260 for (size_t k = 1; k <= 40; k += 9) {
25261 GemmMicrokernelTester()
25262 .mr(3)
25263 .nr(4)
25264 .kr(8)
25265 .sr(1)
25266 .m(3)
25267 .n(4)
25268 .k(k)
25269 .a_zero_point(0)
25270 .b_zero_point(0)
25271 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25272 }
25273 }
25274 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25275
25276
25277 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)25278 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
25279 TEST_REQUIRES_X86_AVX2;
25280 GemmMicrokernelTester()
25281 .mr(2)
25282 .nr(8)
25283 .kr(8)
25284 .sr(1)
25285 .m(2)
25286 .n(8)
25287 .k(8)
25288 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25289 }
25290
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)25291 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
25292 TEST_REQUIRES_X86_AVX2;
25293 GemmMicrokernelTester()
25294 .mr(2)
25295 .nr(8)
25296 .kr(8)
25297 .sr(1)
25298 .m(2)
25299 .n(8)
25300 .k(8)
25301 .cn_stride(11)
25302 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25303 }
25304
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_strided_a)25305 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
25306 TEST_REQUIRES_X86_AVX2;
25307 GemmMicrokernelTester()
25308 .mr(2)
25309 .nr(8)
25310 .kr(8)
25311 .sr(1)
25312 .m(2)
25313 .n(8)
25314 .k(8)
25315 .a_stride(11)
25316 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25317 }
25318
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)25319 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
25320 TEST_REQUIRES_X86_AVX2;
25321 for (uint32_t n = 1; n <= 8; n++) {
25322 for (uint32_t m = 1; m <= 2; m++) {
25323 GemmMicrokernelTester()
25324 .mr(2)
25325 .nr(8)
25326 .kr(8)
25327 .sr(1)
25328 .m(m)
25329 .n(n)
25330 .k(8)
25331 .iterations(1)
25332 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25333 }
25334 }
25335 }
25336
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)25337 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
25338 TEST_REQUIRES_X86_AVX2;
25339 for (uint32_t m = 1; m <= 2; m++) {
25340 GemmMicrokernelTester()
25341 .mr(2)
25342 .nr(8)
25343 .kr(8)
25344 .sr(1)
25345 .m(m)
25346 .n(8)
25347 .k(8)
25348 .iterations(1)
25349 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25350 }
25351 }
25352
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)25353 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
25354 TEST_REQUIRES_X86_AVX2;
25355 for (uint32_t n = 1; n <= 8; n++) {
25356 GemmMicrokernelTester()
25357 .mr(2)
25358 .nr(8)
25359 .kr(8)
25360 .sr(1)
25361 .m(2)
25362 .n(n)
25363 .k(8)
25364 .iterations(1)
25365 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25366 }
25367 }
25368
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)25369 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
25370 TEST_REQUIRES_X86_AVX2;
25371 for (size_t k = 1; k < 8; k++) {
25372 GemmMicrokernelTester()
25373 .mr(2)
25374 .nr(8)
25375 .kr(8)
25376 .sr(1)
25377 .m(2)
25378 .n(8)
25379 .k(k)
25380 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25381 }
25382 }
25383
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_strided_a)25384 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
25385 TEST_REQUIRES_X86_AVX2;
25386 for (size_t k = 1; k < 8; k++) {
25387 GemmMicrokernelTester()
25388 .mr(2)
25389 .nr(8)
25390 .kr(8)
25391 .sr(1)
25392 .m(2)
25393 .n(8)
25394 .k(k)
25395 .a_stride(11)
25396 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25397 }
25398 }
25399
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)25400 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
25401 TEST_REQUIRES_X86_AVX2;
25402 for (size_t k = 1; k < 8; k++) {
25403 for (uint32_t n = 1; n <= 8; n++) {
25404 for (uint32_t m = 1; m <= 2; m++) {
25405 GemmMicrokernelTester()
25406 .mr(2)
25407 .nr(8)
25408 .kr(8)
25409 .sr(1)
25410 .m(m)
25411 .n(n)
25412 .k(k)
25413 .iterations(1)
25414 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25415 }
25416 }
25417 }
25418 }
25419
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)25420 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
25421 TEST_REQUIRES_X86_AVX2;
25422 for (size_t k = 9; k < 16; k++) {
25423 GemmMicrokernelTester()
25424 .mr(2)
25425 .nr(8)
25426 .kr(8)
25427 .sr(1)
25428 .m(2)
25429 .n(8)
25430 .k(k)
25431 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25432 }
25433 }
25434
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_strided_a)25435 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
25436 TEST_REQUIRES_X86_AVX2;
25437 for (size_t k = 9; k < 16; k++) {
25438 GemmMicrokernelTester()
25439 .mr(2)
25440 .nr(8)
25441 .kr(8)
25442 .sr(1)
25443 .m(2)
25444 .n(8)
25445 .k(k)
25446 .a_stride(19)
25447 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25448 }
25449 }
25450
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)25451 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
25452 TEST_REQUIRES_X86_AVX2;
25453 for (size_t k = 9; k < 16; k++) {
25454 for (uint32_t n = 1; n <= 8; n++) {
25455 for (uint32_t m = 1; m <= 2; m++) {
25456 GemmMicrokernelTester()
25457 .mr(2)
25458 .nr(8)
25459 .kr(8)
25460 .sr(1)
25461 .m(m)
25462 .n(n)
25463 .k(k)
25464 .iterations(1)
25465 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25466 }
25467 }
25468 }
25469 }
25470
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)25471 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
25472 TEST_REQUIRES_X86_AVX2;
25473 for (size_t k = 16; k <= 80; k += 8) {
25474 GemmMicrokernelTester()
25475 .mr(2)
25476 .nr(8)
25477 .kr(8)
25478 .sr(1)
25479 .m(2)
25480 .n(8)
25481 .k(k)
25482 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25483 }
25484 }
25485
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_strided_a)25486 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
25487 TEST_REQUIRES_X86_AVX2;
25488 for (size_t k = 16; k <= 80; k += 8) {
25489 GemmMicrokernelTester()
25490 .mr(2)
25491 .nr(8)
25492 .kr(8)
25493 .sr(1)
25494 .m(2)
25495 .n(8)
25496 .k(k)
25497 .a_stride(83)
25498 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25499 }
25500 }
25501
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)25502 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
25503 TEST_REQUIRES_X86_AVX2;
25504 for (size_t k = 16; k <= 80; k += 8) {
25505 for (uint32_t n = 1; n <= 8; n++) {
25506 for (uint32_t m = 1; m <= 2; m++) {
25507 GemmMicrokernelTester()
25508 .mr(2)
25509 .nr(8)
25510 .kr(8)
25511 .sr(1)
25512 .m(m)
25513 .n(n)
25514 .k(k)
25515 .iterations(1)
25516 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25517 }
25518 }
25519 }
25520 }
25521
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)25522 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
25523 TEST_REQUIRES_X86_AVX2;
25524 for (uint32_t n = 9; n < 16; n++) {
25525 for (size_t k = 1; k <= 40; k += 9) {
25526 GemmMicrokernelTester()
25527 .mr(2)
25528 .nr(8)
25529 .kr(8)
25530 .sr(1)
25531 .m(2)
25532 .n(n)
25533 .k(k)
25534 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25535 }
25536 }
25537 }
25538
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)25539 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
25540 TEST_REQUIRES_X86_AVX2;
25541 for (uint32_t n = 9; n < 16; n++) {
25542 for (size_t k = 1; k <= 40; k += 9) {
25543 GemmMicrokernelTester()
25544 .mr(2)
25545 .nr(8)
25546 .kr(8)
25547 .sr(1)
25548 .m(2)
25549 .n(n)
25550 .k(k)
25551 .cn_stride(11)
25552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25553 }
25554 }
25555 }
25556
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_a)25557 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
25558 TEST_REQUIRES_X86_AVX2;
25559 for (uint32_t n = 9; n < 16; n++) {
25560 for (size_t k = 1; k <= 40; k += 9) {
25561 GemmMicrokernelTester()
25562 .mr(2)
25563 .nr(8)
25564 .kr(8)
25565 .sr(1)
25566 .m(2)
25567 .n(n)
25568 .k(k)
25569 .a_stride(43)
25570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25571 }
25572 }
25573 }
25574
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)25575 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
25576 TEST_REQUIRES_X86_AVX2;
25577 for (uint32_t n = 9; n < 16; n++) {
25578 for (size_t k = 1; k <= 40; k += 9) {
25579 for (uint32_t m = 1; m <= 2; m++) {
25580 GemmMicrokernelTester()
25581 .mr(2)
25582 .nr(8)
25583 .kr(8)
25584 .sr(1)
25585 .m(m)
25586 .n(n)
25587 .k(k)
25588 .iterations(1)
25589 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25590 }
25591 }
25592 }
25593 }
25594
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)25595 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
25596 TEST_REQUIRES_X86_AVX2;
25597 for (uint32_t n = 16; n <= 24; n += 8) {
25598 for (size_t k = 1; k <= 40; k += 9) {
25599 GemmMicrokernelTester()
25600 .mr(2)
25601 .nr(8)
25602 .kr(8)
25603 .sr(1)
25604 .m(2)
25605 .n(n)
25606 .k(k)
25607 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25608 }
25609 }
25610 }
25611
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)25612 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
25613 TEST_REQUIRES_X86_AVX2;
25614 for (uint32_t n = 16; n <= 24; n += 8) {
25615 for (size_t k = 1; k <= 40; k += 9) {
25616 GemmMicrokernelTester()
25617 .mr(2)
25618 .nr(8)
25619 .kr(8)
25620 .sr(1)
25621 .m(2)
25622 .n(n)
25623 .k(k)
25624 .cn_stride(11)
25625 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25626 }
25627 }
25628 }
25629
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_a)25630 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
25631 TEST_REQUIRES_X86_AVX2;
25632 for (uint32_t n = 16; n <= 24; n += 8) {
25633 for (size_t k = 1; k <= 40; k += 9) {
25634 GemmMicrokernelTester()
25635 .mr(2)
25636 .nr(8)
25637 .kr(8)
25638 .sr(1)
25639 .m(2)
25640 .n(n)
25641 .k(k)
25642 .a_stride(43)
25643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25644 }
25645 }
25646 }
25647
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)25648 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
25649 TEST_REQUIRES_X86_AVX2;
25650 for (uint32_t n = 16; n <= 24; n += 8) {
25651 for (size_t k = 1; k <= 40; k += 9) {
25652 for (uint32_t m = 1; m <= 2; m++) {
25653 GemmMicrokernelTester()
25654 .mr(2)
25655 .nr(8)
25656 .kr(8)
25657 .sr(1)
25658 .m(m)
25659 .n(n)
25660 .k(k)
25661 .iterations(1)
25662 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25663 }
25664 }
25665 }
25666 }
25667
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)25668 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
25669 TEST_REQUIRES_X86_AVX2;
25670 for (size_t k = 1; k <= 40; k += 9) {
25671 for (uint32_t n = 1; n <= 8; n++) {
25672 for (uint32_t m = 1; m <= 2; m++) {
25673 GemmMicrokernelTester()
25674 .mr(2)
25675 .nr(8)
25676 .kr(8)
25677 .sr(1)
25678 .m(m)
25679 .n(n)
25680 .k(k)
25681 .cm_stride(11)
25682 .iterations(1)
25683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25684 }
25685 }
25686 }
25687 }
25688
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,qmin)25689 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
25690 TEST_REQUIRES_X86_AVX2;
25691 GemmMicrokernelTester()
25692 .mr(2)
25693 .nr(8)
25694 .kr(8)
25695 .sr(1)
25696 .m(2)
25697 .n(8)
25698 .k(8)
25699 .qmin(128)
25700 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25701 }
25702
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,qmax)25703 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
25704 TEST_REQUIRES_X86_AVX2;
25705 GemmMicrokernelTester()
25706 .mr(2)
25707 .nr(8)
25708 .kr(8)
25709 .sr(1)
25710 .m(2)
25711 .n(8)
25712 .k(8)
25713 .qmax(128)
25714 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25715 }
25716
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)25717 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
25718 TEST_REQUIRES_X86_AVX2;
25719 GemmMicrokernelTester()
25720 .mr(2)
25721 .nr(8)
25722 .kr(8)
25723 .sr(1)
25724 .m(2)
25725 .n(8)
25726 .k(8)
25727 .cm_stride(11)
25728 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25729 }
25730
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,no_a_zero_point)25731 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_a_zero_point) {
25732 TEST_REQUIRES_X86_AVX2;
25733 for (size_t k = 1; k <= 40; k += 9) {
25734 GemmMicrokernelTester()
25735 .mr(2)
25736 .nr(8)
25737 .kr(8)
25738 .sr(1)
25739 .m(2)
25740 .n(8)
25741 .k(k)
25742 .a_zero_point(0)
25743 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25744 }
25745 }
25746
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,no_b_zero_point)25747 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_b_zero_point) {
25748 TEST_REQUIRES_X86_AVX2;
25749 for (size_t k = 1; k <= 40; k += 9) {
25750 GemmMicrokernelTester()
25751 .mr(2)
25752 .nr(8)
25753 .kr(8)
25754 .sr(1)
25755 .m(2)
25756 .n(8)
25757 .k(k)
25758 .b_zero_point(0)
25759 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25760 }
25761 }
25762
TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2,no_zero_point)25763 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_zero_point) {
25764 TEST_REQUIRES_X86_AVX2;
25765 for (size_t k = 1; k <= 40; k += 9) {
25766 GemmMicrokernelTester()
25767 .mr(2)
25768 .nr(8)
25769 .kr(8)
25770 .sr(1)
25771 .m(2)
25772 .n(8)
25773 .k(k)
25774 .a_zero_point(0)
25775 .b_zero_point(0)
25776 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25777 }
25778 }
25779 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25780
25781
25782 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)25783 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
25784 TEST_REQUIRES_X86_AVX2;
25785 GemmMicrokernelTester()
25786 .mr(3)
25787 .nr(8)
25788 .kr(8)
25789 .sr(1)
25790 .m(3)
25791 .n(8)
25792 .k(8)
25793 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25794 }
25795
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)25796 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
25797 TEST_REQUIRES_X86_AVX2;
25798 GemmMicrokernelTester()
25799 .mr(3)
25800 .nr(8)
25801 .kr(8)
25802 .sr(1)
25803 .m(3)
25804 .n(8)
25805 .k(8)
25806 .cn_stride(11)
25807 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25808 }
25809
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_strided_a)25810 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
25811 TEST_REQUIRES_X86_AVX2;
25812 GemmMicrokernelTester()
25813 .mr(3)
25814 .nr(8)
25815 .kr(8)
25816 .sr(1)
25817 .m(3)
25818 .n(8)
25819 .k(8)
25820 .a_stride(11)
25821 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25822 }
25823
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)25824 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
25825 TEST_REQUIRES_X86_AVX2;
25826 for (uint32_t n = 1; n <= 8; n++) {
25827 for (uint32_t m = 1; m <= 3; m++) {
25828 GemmMicrokernelTester()
25829 .mr(3)
25830 .nr(8)
25831 .kr(8)
25832 .sr(1)
25833 .m(m)
25834 .n(n)
25835 .k(8)
25836 .iterations(1)
25837 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25838 }
25839 }
25840 }
25841
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)25842 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
25843 TEST_REQUIRES_X86_AVX2;
25844 for (uint32_t m = 1; m <= 3; m++) {
25845 GemmMicrokernelTester()
25846 .mr(3)
25847 .nr(8)
25848 .kr(8)
25849 .sr(1)
25850 .m(m)
25851 .n(8)
25852 .k(8)
25853 .iterations(1)
25854 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25855 }
25856 }
25857
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)25858 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
25859 TEST_REQUIRES_X86_AVX2;
25860 for (uint32_t n = 1; n <= 8; n++) {
25861 GemmMicrokernelTester()
25862 .mr(3)
25863 .nr(8)
25864 .kr(8)
25865 .sr(1)
25866 .m(3)
25867 .n(n)
25868 .k(8)
25869 .iterations(1)
25870 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25871 }
25872 }
25873
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)25874 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
25875 TEST_REQUIRES_X86_AVX2;
25876 for (size_t k = 1; k < 8; k++) {
25877 GemmMicrokernelTester()
25878 .mr(3)
25879 .nr(8)
25880 .kr(8)
25881 .sr(1)
25882 .m(3)
25883 .n(8)
25884 .k(k)
25885 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25886 }
25887 }
25888
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_strided_a)25889 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
25890 TEST_REQUIRES_X86_AVX2;
25891 for (size_t k = 1; k < 8; k++) {
25892 GemmMicrokernelTester()
25893 .mr(3)
25894 .nr(8)
25895 .kr(8)
25896 .sr(1)
25897 .m(3)
25898 .n(8)
25899 .k(k)
25900 .a_stride(11)
25901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25902 }
25903 }
25904
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)25905 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
25906 TEST_REQUIRES_X86_AVX2;
25907 for (size_t k = 1; k < 8; k++) {
25908 for (uint32_t n = 1; n <= 8; n++) {
25909 for (uint32_t m = 1; m <= 3; m++) {
25910 GemmMicrokernelTester()
25911 .mr(3)
25912 .nr(8)
25913 .kr(8)
25914 .sr(1)
25915 .m(m)
25916 .n(n)
25917 .k(k)
25918 .iterations(1)
25919 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25920 }
25921 }
25922 }
25923 }
25924
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)25925 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
25926 TEST_REQUIRES_X86_AVX2;
25927 for (size_t k = 9; k < 16; k++) {
25928 GemmMicrokernelTester()
25929 .mr(3)
25930 .nr(8)
25931 .kr(8)
25932 .sr(1)
25933 .m(3)
25934 .n(8)
25935 .k(k)
25936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25937 }
25938 }
25939
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_strided_a)25940 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
25941 TEST_REQUIRES_X86_AVX2;
25942 for (size_t k = 9; k < 16; k++) {
25943 GemmMicrokernelTester()
25944 .mr(3)
25945 .nr(8)
25946 .kr(8)
25947 .sr(1)
25948 .m(3)
25949 .n(8)
25950 .k(k)
25951 .a_stride(19)
25952 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25953 }
25954 }
25955
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)25956 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
25957 TEST_REQUIRES_X86_AVX2;
25958 for (size_t k = 9; k < 16; k++) {
25959 for (uint32_t n = 1; n <= 8; n++) {
25960 for (uint32_t m = 1; m <= 3; m++) {
25961 GemmMicrokernelTester()
25962 .mr(3)
25963 .nr(8)
25964 .kr(8)
25965 .sr(1)
25966 .m(m)
25967 .n(n)
25968 .k(k)
25969 .iterations(1)
25970 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25971 }
25972 }
25973 }
25974 }
25975
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)25976 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
25977 TEST_REQUIRES_X86_AVX2;
25978 for (size_t k = 16; k <= 80; k += 8) {
25979 GemmMicrokernelTester()
25980 .mr(3)
25981 .nr(8)
25982 .kr(8)
25983 .sr(1)
25984 .m(3)
25985 .n(8)
25986 .k(k)
25987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25988 }
25989 }
25990
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_strided_a)25991 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
25992 TEST_REQUIRES_X86_AVX2;
25993 for (size_t k = 16; k <= 80; k += 8) {
25994 GemmMicrokernelTester()
25995 .mr(3)
25996 .nr(8)
25997 .kr(8)
25998 .sr(1)
25999 .m(3)
26000 .n(8)
26001 .k(k)
26002 .a_stride(83)
26003 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26004 }
26005 }
26006
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)26007 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
26008 TEST_REQUIRES_X86_AVX2;
26009 for (size_t k = 16; k <= 80; k += 8) {
26010 for (uint32_t n = 1; n <= 8; n++) {
26011 for (uint32_t m = 1; m <= 3; m++) {
26012 GemmMicrokernelTester()
26013 .mr(3)
26014 .nr(8)
26015 .kr(8)
26016 .sr(1)
26017 .m(m)
26018 .n(n)
26019 .k(k)
26020 .iterations(1)
26021 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26022 }
26023 }
26024 }
26025 }
26026
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)26027 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
26028 TEST_REQUIRES_X86_AVX2;
26029 for (uint32_t n = 9; n < 16; n++) {
26030 for (size_t k = 1; k <= 40; k += 9) {
26031 GemmMicrokernelTester()
26032 .mr(3)
26033 .nr(8)
26034 .kr(8)
26035 .sr(1)
26036 .m(3)
26037 .n(n)
26038 .k(k)
26039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26040 }
26041 }
26042 }
26043
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)26044 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
26045 TEST_REQUIRES_X86_AVX2;
26046 for (uint32_t n = 9; n < 16; n++) {
26047 for (size_t k = 1; k <= 40; k += 9) {
26048 GemmMicrokernelTester()
26049 .mr(3)
26050 .nr(8)
26051 .kr(8)
26052 .sr(1)
26053 .m(3)
26054 .n(n)
26055 .k(k)
26056 .cn_stride(11)
26057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26058 }
26059 }
26060 }
26061
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_a)26062 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
26063 TEST_REQUIRES_X86_AVX2;
26064 for (uint32_t n = 9; n < 16; n++) {
26065 for (size_t k = 1; k <= 40; k += 9) {
26066 GemmMicrokernelTester()
26067 .mr(3)
26068 .nr(8)
26069 .kr(8)
26070 .sr(1)
26071 .m(3)
26072 .n(n)
26073 .k(k)
26074 .a_stride(43)
26075 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26076 }
26077 }
26078 }
26079
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)26080 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
26081 TEST_REQUIRES_X86_AVX2;
26082 for (uint32_t n = 9; n < 16; n++) {
26083 for (size_t k = 1; k <= 40; k += 9) {
26084 for (uint32_t m = 1; m <= 3; m++) {
26085 GemmMicrokernelTester()
26086 .mr(3)
26087 .nr(8)
26088 .kr(8)
26089 .sr(1)
26090 .m(m)
26091 .n(n)
26092 .k(k)
26093 .iterations(1)
26094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26095 }
26096 }
26097 }
26098 }
26099
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)26100 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
26101 TEST_REQUIRES_X86_AVX2;
26102 for (uint32_t n = 16; n <= 24; n += 8) {
26103 for (size_t k = 1; k <= 40; k += 9) {
26104 GemmMicrokernelTester()
26105 .mr(3)
26106 .nr(8)
26107 .kr(8)
26108 .sr(1)
26109 .m(3)
26110 .n(n)
26111 .k(k)
26112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26113 }
26114 }
26115 }
26116
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)26117 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
26118 TEST_REQUIRES_X86_AVX2;
26119 for (uint32_t n = 16; n <= 24; n += 8) {
26120 for (size_t k = 1; k <= 40; k += 9) {
26121 GemmMicrokernelTester()
26122 .mr(3)
26123 .nr(8)
26124 .kr(8)
26125 .sr(1)
26126 .m(3)
26127 .n(n)
26128 .k(k)
26129 .cn_stride(11)
26130 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26131 }
26132 }
26133 }
26134
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_a)26135 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
26136 TEST_REQUIRES_X86_AVX2;
26137 for (uint32_t n = 16; n <= 24; n += 8) {
26138 for (size_t k = 1; k <= 40; k += 9) {
26139 GemmMicrokernelTester()
26140 .mr(3)
26141 .nr(8)
26142 .kr(8)
26143 .sr(1)
26144 .m(3)
26145 .n(n)
26146 .k(k)
26147 .a_stride(43)
26148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26149 }
26150 }
26151 }
26152
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)26153 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
26154 TEST_REQUIRES_X86_AVX2;
26155 for (uint32_t n = 16; n <= 24; n += 8) {
26156 for (size_t k = 1; k <= 40; k += 9) {
26157 for (uint32_t m = 1; m <= 3; m++) {
26158 GemmMicrokernelTester()
26159 .mr(3)
26160 .nr(8)
26161 .kr(8)
26162 .sr(1)
26163 .m(m)
26164 .n(n)
26165 .k(k)
26166 .iterations(1)
26167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26168 }
26169 }
26170 }
26171 }
26172
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)26173 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
26174 TEST_REQUIRES_X86_AVX2;
26175 for (size_t k = 1; k <= 40; k += 9) {
26176 for (uint32_t n = 1; n <= 8; n++) {
26177 for (uint32_t m = 1; m <= 3; m++) {
26178 GemmMicrokernelTester()
26179 .mr(3)
26180 .nr(8)
26181 .kr(8)
26182 .sr(1)
26183 .m(m)
26184 .n(n)
26185 .k(k)
26186 .cm_stride(11)
26187 .iterations(1)
26188 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26189 }
26190 }
26191 }
26192 }
26193
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,qmin)26194 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
26195 TEST_REQUIRES_X86_AVX2;
26196 GemmMicrokernelTester()
26197 .mr(3)
26198 .nr(8)
26199 .kr(8)
26200 .sr(1)
26201 .m(3)
26202 .n(8)
26203 .k(8)
26204 .qmin(128)
26205 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26206 }
26207
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,qmax)26208 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
26209 TEST_REQUIRES_X86_AVX2;
26210 GemmMicrokernelTester()
26211 .mr(3)
26212 .nr(8)
26213 .kr(8)
26214 .sr(1)
26215 .m(3)
26216 .n(8)
26217 .k(8)
26218 .qmax(128)
26219 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26220 }
26221
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)26222 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
26223 TEST_REQUIRES_X86_AVX2;
26224 GemmMicrokernelTester()
26225 .mr(3)
26226 .nr(8)
26227 .kr(8)
26228 .sr(1)
26229 .m(3)
26230 .n(8)
26231 .k(8)
26232 .cm_stride(11)
26233 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26234 }
26235
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,no_a_zero_point)26236 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_a_zero_point) {
26237 TEST_REQUIRES_X86_AVX2;
26238 for (size_t k = 1; k <= 40; k += 9) {
26239 GemmMicrokernelTester()
26240 .mr(3)
26241 .nr(8)
26242 .kr(8)
26243 .sr(1)
26244 .m(3)
26245 .n(8)
26246 .k(k)
26247 .a_zero_point(0)
26248 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26249 }
26250 }
26251
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,no_b_zero_point)26252 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_b_zero_point) {
26253 TEST_REQUIRES_X86_AVX2;
26254 for (size_t k = 1; k <= 40; k += 9) {
26255 GemmMicrokernelTester()
26256 .mr(3)
26257 .nr(8)
26258 .kr(8)
26259 .sr(1)
26260 .m(3)
26261 .n(8)
26262 .k(k)
26263 .b_zero_point(0)
26264 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26265 }
26266 }
26267
TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2,no_zero_point)26268 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_zero_point) {
26269 TEST_REQUIRES_X86_AVX2;
26270 for (size_t k = 1; k <= 40; k += 9) {
26271 GemmMicrokernelTester()
26272 .mr(3)
26273 .nr(8)
26274 .kr(8)
26275 .sr(1)
26276 .m(3)
26277 .n(8)
26278 .k(k)
26279 .a_zero_point(0)
26280 .b_zero_point(0)
26281 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
26282 }
26283 }
26284 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26285
26286
26287 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)26288 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
26289 TEST_REQUIRES_X86_AVX512SKX;
26290 GemmMicrokernelTester()
26291 .mr(3)
26292 .nr(16)
26293 .kr(8)
26294 .sr(1)
26295 .m(3)
26296 .n(16)
26297 .k(8)
26298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26299 }
26300
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)26301 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
26302 TEST_REQUIRES_X86_AVX512SKX;
26303 GemmMicrokernelTester()
26304 .mr(3)
26305 .nr(16)
26306 .kr(8)
26307 .sr(1)
26308 .m(3)
26309 .n(16)
26310 .k(8)
26311 .cn_stride(19)
26312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26313 }
26314
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_strided_a)26315 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_strided_a) {
26316 TEST_REQUIRES_X86_AVX512SKX;
26317 GemmMicrokernelTester()
26318 .mr(3)
26319 .nr(16)
26320 .kr(8)
26321 .sr(1)
26322 .m(3)
26323 .n(16)
26324 .k(8)
26325 .a_stride(11)
26326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26327 }
26328
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)26329 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
26330 TEST_REQUIRES_X86_AVX512SKX;
26331 for (uint32_t n = 1; n <= 16; n++) {
26332 for (uint32_t m = 1; m <= 3; m++) {
26333 GemmMicrokernelTester()
26334 .mr(3)
26335 .nr(16)
26336 .kr(8)
26337 .sr(1)
26338 .m(m)
26339 .n(n)
26340 .k(8)
26341 .iterations(1)
26342 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26343 }
26344 }
26345 }
26346
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)26347 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
26348 TEST_REQUIRES_X86_AVX512SKX;
26349 for (uint32_t m = 1; m <= 3; m++) {
26350 GemmMicrokernelTester()
26351 .mr(3)
26352 .nr(16)
26353 .kr(8)
26354 .sr(1)
26355 .m(m)
26356 .n(16)
26357 .k(8)
26358 .iterations(1)
26359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26360 }
26361 }
26362
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)26363 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
26364 TEST_REQUIRES_X86_AVX512SKX;
26365 for (uint32_t n = 1; n <= 16; n++) {
26366 GemmMicrokernelTester()
26367 .mr(3)
26368 .nr(16)
26369 .kr(8)
26370 .sr(1)
26371 .m(3)
26372 .n(n)
26373 .k(8)
26374 .iterations(1)
26375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26376 }
26377 }
26378
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)26379 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
26380 TEST_REQUIRES_X86_AVX512SKX;
26381 for (size_t k = 1; k < 8; k++) {
26382 GemmMicrokernelTester()
26383 .mr(3)
26384 .nr(16)
26385 .kr(8)
26386 .sr(1)
26387 .m(3)
26388 .n(16)
26389 .k(k)
26390 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26391 }
26392 }
26393
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_strided_a)26394 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_strided_a) {
26395 TEST_REQUIRES_X86_AVX512SKX;
26396 for (size_t k = 1; k < 8; k++) {
26397 GemmMicrokernelTester()
26398 .mr(3)
26399 .nr(16)
26400 .kr(8)
26401 .sr(1)
26402 .m(3)
26403 .n(16)
26404 .k(k)
26405 .a_stride(11)
26406 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26407 }
26408 }
26409
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)26410 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
26411 TEST_REQUIRES_X86_AVX512SKX;
26412 for (size_t k = 1; k < 8; k++) {
26413 for (uint32_t n = 1; n <= 16; n++) {
26414 for (uint32_t m = 1; m <= 3; m++) {
26415 GemmMicrokernelTester()
26416 .mr(3)
26417 .nr(16)
26418 .kr(8)
26419 .sr(1)
26420 .m(m)
26421 .n(n)
26422 .k(k)
26423 .iterations(1)
26424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26425 }
26426 }
26427 }
26428 }
26429
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)26430 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
26431 TEST_REQUIRES_X86_AVX512SKX;
26432 for (size_t k = 9; k < 16; k++) {
26433 GemmMicrokernelTester()
26434 .mr(3)
26435 .nr(16)
26436 .kr(8)
26437 .sr(1)
26438 .m(3)
26439 .n(16)
26440 .k(k)
26441 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26442 }
26443 }
26444
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_strided_a)26445 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_strided_a) {
26446 TEST_REQUIRES_X86_AVX512SKX;
26447 for (size_t k = 9; k < 16; k++) {
26448 GemmMicrokernelTester()
26449 .mr(3)
26450 .nr(16)
26451 .kr(8)
26452 .sr(1)
26453 .m(3)
26454 .n(16)
26455 .k(k)
26456 .a_stride(19)
26457 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26458 }
26459 }
26460
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)26461 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
26462 TEST_REQUIRES_X86_AVX512SKX;
26463 for (size_t k = 9; k < 16; k++) {
26464 for (uint32_t n = 1; n <= 16; n++) {
26465 for (uint32_t m = 1; m <= 3; m++) {
26466 GemmMicrokernelTester()
26467 .mr(3)
26468 .nr(16)
26469 .kr(8)
26470 .sr(1)
26471 .m(m)
26472 .n(n)
26473 .k(k)
26474 .iterations(1)
26475 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26476 }
26477 }
26478 }
26479 }
26480
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)26481 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
26482 TEST_REQUIRES_X86_AVX512SKX;
26483 for (size_t k = 16; k <= 80; k += 8) {
26484 GemmMicrokernelTester()
26485 .mr(3)
26486 .nr(16)
26487 .kr(8)
26488 .sr(1)
26489 .m(3)
26490 .n(16)
26491 .k(k)
26492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26493 }
26494 }
26495
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_strided_a)26496 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_strided_a) {
26497 TEST_REQUIRES_X86_AVX512SKX;
26498 for (size_t k = 16; k <= 80; k += 8) {
26499 GemmMicrokernelTester()
26500 .mr(3)
26501 .nr(16)
26502 .kr(8)
26503 .sr(1)
26504 .m(3)
26505 .n(16)
26506 .k(k)
26507 .a_stride(83)
26508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26509 }
26510 }
26511
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)26512 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
26513 TEST_REQUIRES_X86_AVX512SKX;
26514 for (size_t k = 16; k <= 80; k += 8) {
26515 for (uint32_t n = 1; n <= 16; n++) {
26516 for (uint32_t m = 1; m <= 3; m++) {
26517 GemmMicrokernelTester()
26518 .mr(3)
26519 .nr(16)
26520 .kr(8)
26521 .sr(1)
26522 .m(m)
26523 .n(n)
26524 .k(k)
26525 .iterations(1)
26526 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26527 }
26528 }
26529 }
26530 }
26531
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)26532 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
26533 TEST_REQUIRES_X86_AVX512SKX;
26534 for (uint32_t n = 17; n < 32; n++) {
26535 for (size_t k = 1; k <= 40; k += 9) {
26536 GemmMicrokernelTester()
26537 .mr(3)
26538 .nr(16)
26539 .kr(8)
26540 .sr(1)
26541 .m(3)
26542 .n(n)
26543 .k(k)
26544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26545 }
26546 }
26547 }
26548
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)26549 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
26550 TEST_REQUIRES_X86_AVX512SKX;
26551 for (uint32_t n = 17; n < 32; n++) {
26552 for (size_t k = 1; k <= 40; k += 9) {
26553 GemmMicrokernelTester()
26554 .mr(3)
26555 .nr(16)
26556 .kr(8)
26557 .sr(1)
26558 .m(3)
26559 .n(n)
26560 .k(k)
26561 .cn_stride(19)
26562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26563 }
26564 }
26565 }
26566
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_a)26567 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_a) {
26568 TEST_REQUIRES_X86_AVX512SKX;
26569 for (uint32_t n = 17; n < 32; n++) {
26570 for (size_t k = 1; k <= 40; k += 9) {
26571 GemmMicrokernelTester()
26572 .mr(3)
26573 .nr(16)
26574 .kr(8)
26575 .sr(1)
26576 .m(3)
26577 .n(n)
26578 .k(k)
26579 .a_stride(43)
26580 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26581 }
26582 }
26583 }
26584
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)26585 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
26586 TEST_REQUIRES_X86_AVX512SKX;
26587 for (uint32_t n = 17; n < 32; n++) {
26588 for (size_t k = 1; k <= 40; k += 9) {
26589 for (uint32_t m = 1; m <= 3; m++) {
26590 GemmMicrokernelTester()
26591 .mr(3)
26592 .nr(16)
26593 .kr(8)
26594 .sr(1)
26595 .m(m)
26596 .n(n)
26597 .k(k)
26598 .iterations(1)
26599 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26600 }
26601 }
26602 }
26603 }
26604
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)26605 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
26606 TEST_REQUIRES_X86_AVX512SKX;
26607 for (uint32_t n = 32; n <= 48; n += 16) {
26608 for (size_t k = 1; k <= 40; k += 9) {
26609 GemmMicrokernelTester()
26610 .mr(3)
26611 .nr(16)
26612 .kr(8)
26613 .sr(1)
26614 .m(3)
26615 .n(n)
26616 .k(k)
26617 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26618 }
26619 }
26620 }
26621
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)26622 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
26623 TEST_REQUIRES_X86_AVX512SKX;
26624 for (uint32_t n = 32; n <= 48; n += 16) {
26625 for (size_t k = 1; k <= 40; k += 9) {
26626 GemmMicrokernelTester()
26627 .mr(3)
26628 .nr(16)
26629 .kr(8)
26630 .sr(1)
26631 .m(3)
26632 .n(n)
26633 .k(k)
26634 .cn_stride(19)
26635 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26636 }
26637 }
26638 }
26639
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_a)26640 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_a) {
26641 TEST_REQUIRES_X86_AVX512SKX;
26642 for (uint32_t n = 32; n <= 48; n += 16) {
26643 for (size_t k = 1; k <= 40; k += 9) {
26644 GemmMicrokernelTester()
26645 .mr(3)
26646 .nr(16)
26647 .kr(8)
26648 .sr(1)
26649 .m(3)
26650 .n(n)
26651 .k(k)
26652 .a_stride(43)
26653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26654 }
26655 }
26656 }
26657
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)26658 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
26659 TEST_REQUIRES_X86_AVX512SKX;
26660 for (uint32_t n = 32; n <= 48; n += 16) {
26661 for (size_t k = 1; k <= 40; k += 9) {
26662 for (uint32_t m = 1; m <= 3; m++) {
26663 GemmMicrokernelTester()
26664 .mr(3)
26665 .nr(16)
26666 .kr(8)
26667 .sr(1)
26668 .m(m)
26669 .n(n)
26670 .k(k)
26671 .iterations(1)
26672 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26673 }
26674 }
26675 }
26676 }
26677
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)26678 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
26679 TEST_REQUIRES_X86_AVX512SKX;
26680 for (size_t k = 1; k <= 40; k += 9) {
26681 for (uint32_t n = 1; n <= 16; n++) {
26682 for (uint32_t m = 1; m <= 3; m++) {
26683 GemmMicrokernelTester()
26684 .mr(3)
26685 .nr(16)
26686 .kr(8)
26687 .sr(1)
26688 .m(m)
26689 .n(n)
26690 .k(k)
26691 .cm_stride(19)
26692 .iterations(1)
26693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26694 }
26695 }
26696 }
26697 }
26698
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)26699 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
26700 TEST_REQUIRES_X86_AVX512SKX;
26701 GemmMicrokernelTester()
26702 .mr(3)
26703 .nr(16)
26704 .kr(8)
26705 .sr(1)
26706 .m(3)
26707 .n(16)
26708 .k(8)
26709 .qmin(128)
26710 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26711 }
26712
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)26713 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
26714 TEST_REQUIRES_X86_AVX512SKX;
26715 GemmMicrokernelTester()
26716 .mr(3)
26717 .nr(16)
26718 .kr(8)
26719 .sr(1)
26720 .m(3)
26721 .n(16)
26722 .k(8)
26723 .qmax(128)
26724 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26725 }
26726
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)26727 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
26728 TEST_REQUIRES_X86_AVX512SKX;
26729 GemmMicrokernelTester()
26730 .mr(3)
26731 .nr(16)
26732 .kr(8)
26733 .sr(1)
26734 .m(3)
26735 .n(16)
26736 .k(8)
26737 .cm_stride(19)
26738 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26739 }
26740
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_a_zero_point)26741 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_a_zero_point) {
26742 TEST_REQUIRES_X86_AVX512SKX;
26743 for (size_t k = 1; k <= 40; k += 9) {
26744 GemmMicrokernelTester()
26745 .mr(3)
26746 .nr(16)
26747 .kr(8)
26748 .sr(1)
26749 .m(3)
26750 .n(16)
26751 .k(k)
26752 .a_zero_point(0)
26753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26754 }
26755 }
26756
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_b_zero_point)26757 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_b_zero_point) {
26758 TEST_REQUIRES_X86_AVX512SKX;
26759 for (size_t k = 1; k <= 40; k += 9) {
26760 GemmMicrokernelTester()
26761 .mr(3)
26762 .nr(16)
26763 .kr(8)
26764 .sr(1)
26765 .m(3)
26766 .n(16)
26767 .k(k)
26768 .b_zero_point(0)
26769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26770 }
26771 }
26772
TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_zero_point)26773 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_zero_point) {
26774 TEST_REQUIRES_X86_AVX512SKX;
26775 for (size_t k = 1; k <= 40; k += 9) {
26776 GemmMicrokernelTester()
26777 .mr(3)
26778 .nr(16)
26779 .kr(8)
26780 .sr(1)
26781 .m(3)
26782 .n(16)
26783 .k(k)
26784 .a_zero_point(0)
26785 .b_zero_point(0)
26786 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26787 }
26788 }
26789 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26790
26791
26792 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)26793 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26794 GemmMicrokernelTester()
26795 .mr(1)
26796 .nr(4)
26797 .kr(2)
26798 .sr(4)
26799 .m(1)
26800 .n(4)
26801 .k(8)
26802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26803 }
26804
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)26805 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
26806 GemmMicrokernelTester()
26807 .mr(1)
26808 .nr(4)
26809 .kr(2)
26810 .sr(4)
26811 .m(1)
26812 .n(4)
26813 .k(8)
26814 .cn_stride(7)
26815 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26816 }
26817
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)26818 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
26819 GemmMicrokernelTester()
26820 .mr(1)
26821 .nr(4)
26822 .kr(2)
26823 .sr(4)
26824 .m(1)
26825 .n(4)
26826 .k(8)
26827 .a_stride(11)
26828 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26829 }
26830
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)26831 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
26832 for (uint32_t n = 1; n <= 4; n++) {
26833 for (uint32_t m = 1; m <= 1; m++) {
26834 GemmMicrokernelTester()
26835 .mr(1)
26836 .nr(4)
26837 .kr(2)
26838 .sr(4)
26839 .m(m)
26840 .n(n)
26841 .k(8)
26842 .iterations(1)
26843 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26844 }
26845 }
26846 }
26847
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)26848 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26849 for (uint32_t m = 1; m <= 1; m++) {
26850 GemmMicrokernelTester()
26851 .mr(1)
26852 .nr(4)
26853 .kr(2)
26854 .sr(4)
26855 .m(m)
26856 .n(4)
26857 .k(8)
26858 .iterations(1)
26859 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26860 }
26861 }
26862
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)26863 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26864 for (uint32_t n = 1; n <= 4; n++) {
26865 GemmMicrokernelTester()
26866 .mr(1)
26867 .nr(4)
26868 .kr(2)
26869 .sr(4)
26870 .m(1)
26871 .n(n)
26872 .k(8)
26873 .iterations(1)
26874 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26875 }
26876 }
26877
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)26878 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26879 for (size_t k = 1; k < 8; k++) {
26880 GemmMicrokernelTester()
26881 .mr(1)
26882 .nr(4)
26883 .kr(2)
26884 .sr(4)
26885 .m(1)
26886 .n(4)
26887 .k(k)
26888 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26889 }
26890 }
26891
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)26892 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
26893 for (size_t k = 1; k < 8; k++) {
26894 GemmMicrokernelTester()
26895 .mr(1)
26896 .nr(4)
26897 .kr(2)
26898 .sr(4)
26899 .m(1)
26900 .n(4)
26901 .k(k)
26902 .a_stride(11)
26903 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26904 }
26905 }
26906
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)26907 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26908 for (size_t k = 1; k < 8; k++) {
26909 for (uint32_t n = 1; n <= 4; n++) {
26910 for (uint32_t m = 1; m <= 1; m++) {
26911 GemmMicrokernelTester()
26912 .mr(1)
26913 .nr(4)
26914 .kr(2)
26915 .sr(4)
26916 .m(m)
26917 .n(n)
26918 .k(k)
26919 .iterations(1)
26920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26921 }
26922 }
26923 }
26924 }
26925
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)26926 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26927 for (size_t k = 9; k < 16; k++) {
26928 GemmMicrokernelTester()
26929 .mr(1)
26930 .nr(4)
26931 .kr(2)
26932 .sr(4)
26933 .m(1)
26934 .n(4)
26935 .k(k)
26936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26937 }
26938 }
26939
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)26940 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
26941 for (size_t k = 9; k < 16; k++) {
26942 GemmMicrokernelTester()
26943 .mr(1)
26944 .nr(4)
26945 .kr(2)
26946 .sr(4)
26947 .m(1)
26948 .n(4)
26949 .k(k)
26950 .a_stride(19)
26951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26952 }
26953 }
26954
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)26955 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
26956 for (size_t k = 9; k < 16; k++) {
26957 for (uint32_t n = 1; n <= 4; n++) {
26958 for (uint32_t m = 1; m <= 1; m++) {
26959 GemmMicrokernelTester()
26960 .mr(1)
26961 .nr(4)
26962 .kr(2)
26963 .sr(4)
26964 .m(m)
26965 .n(n)
26966 .k(k)
26967 .iterations(1)
26968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26969 }
26970 }
26971 }
26972 }
26973
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)26974 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
26975 for (size_t k = 16; k <= 80; k += 8) {
26976 GemmMicrokernelTester()
26977 .mr(1)
26978 .nr(4)
26979 .kr(2)
26980 .sr(4)
26981 .m(1)
26982 .n(4)
26983 .k(k)
26984 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26985 }
26986 }
26987
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)26988 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
26989 for (size_t k = 16; k <= 80; k += 8) {
26990 GemmMicrokernelTester()
26991 .mr(1)
26992 .nr(4)
26993 .kr(2)
26994 .sr(4)
26995 .m(1)
26996 .n(4)
26997 .k(k)
26998 .a_stride(83)
26999 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27000 }
27001 }
27002
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)27003 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
27004 for (size_t k = 16; k <= 80; k += 8) {
27005 for (uint32_t n = 1; n <= 4; n++) {
27006 for (uint32_t m = 1; m <= 1; m++) {
27007 GemmMicrokernelTester()
27008 .mr(1)
27009 .nr(4)
27010 .kr(2)
27011 .sr(4)
27012 .m(m)
27013 .n(n)
27014 .k(k)
27015 .iterations(1)
27016 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27017 }
27018 }
27019 }
27020 }
27021
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)27022 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
27023 for (uint32_t n = 5; n < 8; n++) {
27024 for (size_t k = 1; k <= 40; k += 9) {
27025 GemmMicrokernelTester()
27026 .mr(1)
27027 .nr(4)
27028 .kr(2)
27029 .sr(4)
27030 .m(1)
27031 .n(n)
27032 .k(k)
27033 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27034 }
27035 }
27036 }
27037
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)27038 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
27039 for (uint32_t n = 5; n < 8; n++) {
27040 for (size_t k = 1; k <= 40; k += 9) {
27041 GemmMicrokernelTester()
27042 .mr(1)
27043 .nr(4)
27044 .kr(2)
27045 .sr(4)
27046 .m(1)
27047 .n(n)
27048 .k(k)
27049 .cn_stride(7)
27050 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27051 }
27052 }
27053 }
27054
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)27055 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
27056 for (uint32_t n = 5; n < 8; n++) {
27057 for (size_t k = 1; k <= 40; k += 9) {
27058 GemmMicrokernelTester()
27059 .mr(1)
27060 .nr(4)
27061 .kr(2)
27062 .sr(4)
27063 .m(1)
27064 .n(n)
27065 .k(k)
27066 .a_stride(43)
27067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27068 }
27069 }
27070 }
27071
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)27072 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
27073 for (uint32_t n = 5; n < 8; n++) {
27074 for (size_t k = 1; k <= 40; k += 9) {
27075 for (uint32_t m = 1; m <= 1; m++) {
27076 GemmMicrokernelTester()
27077 .mr(1)
27078 .nr(4)
27079 .kr(2)
27080 .sr(4)
27081 .m(m)
27082 .n(n)
27083 .k(k)
27084 .iterations(1)
27085 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27086 }
27087 }
27088 }
27089 }
27090
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)27091 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
27092 for (uint32_t n = 8; n <= 12; n += 4) {
27093 for (size_t k = 1; k <= 40; k += 9) {
27094 GemmMicrokernelTester()
27095 .mr(1)
27096 .nr(4)
27097 .kr(2)
27098 .sr(4)
27099 .m(1)
27100 .n(n)
27101 .k(k)
27102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27103 }
27104 }
27105 }
27106
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)27107 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
27108 for (uint32_t n = 8; n <= 12; n += 4) {
27109 for (size_t k = 1; k <= 40; k += 9) {
27110 GemmMicrokernelTester()
27111 .mr(1)
27112 .nr(4)
27113 .kr(2)
27114 .sr(4)
27115 .m(1)
27116 .n(n)
27117 .k(k)
27118 .cn_stride(7)
27119 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27120 }
27121 }
27122 }
27123
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)27124 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
27125 for (uint32_t n = 8; n <= 12; n += 4) {
27126 for (size_t k = 1; k <= 40; k += 9) {
27127 GemmMicrokernelTester()
27128 .mr(1)
27129 .nr(4)
27130 .kr(2)
27131 .sr(4)
27132 .m(1)
27133 .n(n)
27134 .k(k)
27135 .a_stride(43)
27136 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27137 }
27138 }
27139 }
27140
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)27141 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
27142 for (uint32_t n = 8; n <= 12; n += 4) {
27143 for (size_t k = 1; k <= 40; k += 9) {
27144 for (uint32_t m = 1; m <= 1; m++) {
27145 GemmMicrokernelTester()
27146 .mr(1)
27147 .nr(4)
27148 .kr(2)
27149 .sr(4)
27150 .m(m)
27151 .n(n)
27152 .k(k)
27153 .iterations(1)
27154 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27155 }
27156 }
27157 }
27158 }
27159
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)27160 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
27161 for (size_t k = 1; k <= 40; k += 9) {
27162 for (uint32_t n = 1; n <= 4; n++) {
27163 for (uint32_t m = 1; m <= 1; m++) {
27164 GemmMicrokernelTester()
27165 .mr(1)
27166 .nr(4)
27167 .kr(2)
27168 .sr(4)
27169 .m(m)
27170 .n(n)
27171 .k(k)
27172 .cm_stride(7)
27173 .iterations(1)
27174 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27175 }
27176 }
27177 }
27178 }
27179
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)27180 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
27181 GemmMicrokernelTester()
27182 .mr(1)
27183 .nr(4)
27184 .kr(2)
27185 .sr(4)
27186 .m(1)
27187 .n(4)
27188 .k(8)
27189 .qmin(128)
27190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27191 }
27192
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)27193 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
27194 GemmMicrokernelTester()
27195 .mr(1)
27196 .nr(4)
27197 .kr(2)
27198 .sr(4)
27199 .m(1)
27200 .n(4)
27201 .k(8)
27202 .qmax(128)
27203 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27204 }
27205
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)27206 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
27207 GemmMicrokernelTester()
27208 .mr(1)
27209 .nr(4)
27210 .kr(2)
27211 .sr(4)
27212 .m(1)
27213 .n(4)
27214 .k(8)
27215 .cm_stride(7)
27216 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27217 }
27218
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)27219 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
27220 for (size_t k = 1; k <= 40; k += 9) {
27221 GemmMicrokernelTester()
27222 .mr(1)
27223 .nr(4)
27224 .kr(2)
27225 .sr(4)
27226 .m(1)
27227 .n(4)
27228 .k(k)
27229 .a_zero_point(0)
27230 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27231 }
27232 }
27233
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)27234 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
27235 for (size_t k = 1; k <= 40; k += 9) {
27236 GemmMicrokernelTester()
27237 .mr(1)
27238 .nr(4)
27239 .kr(2)
27240 .sr(4)
27241 .m(1)
27242 .n(4)
27243 .k(k)
27244 .b_zero_point(0)
27245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27246 }
27247 }
27248
TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)27249 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
27250 for (size_t k = 1; k <= 40; k += 9) {
27251 GemmMicrokernelTester()
27252 .mr(1)
27253 .nr(4)
27254 .kr(2)
27255 .sr(4)
27256 .m(1)
27257 .n(4)
27258 .k(k)
27259 .a_zero_point(0)
27260 .b_zero_point(0)
27261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27262 }
27263 }
27264 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27265
27266
27267 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)27268 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
27269 GemmMicrokernelTester()
27270 .mr(1)
27271 .nr(4)
27272 .kr(8)
27273 .sr(1)
27274 .m(1)
27275 .n(4)
27276 .k(8)
27277 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27278 }
27279
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)27280 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
27281 GemmMicrokernelTester()
27282 .mr(1)
27283 .nr(4)
27284 .kr(8)
27285 .sr(1)
27286 .m(1)
27287 .n(4)
27288 .k(8)
27289 .cn_stride(7)
27290 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27291 }
27292
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)27293 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
27294 GemmMicrokernelTester()
27295 .mr(1)
27296 .nr(4)
27297 .kr(8)
27298 .sr(1)
27299 .m(1)
27300 .n(4)
27301 .k(8)
27302 .a_stride(11)
27303 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27304 }
27305
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)27306 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
27307 for (uint32_t n = 1; n <= 4; n++) {
27308 for (uint32_t m = 1; m <= 1; m++) {
27309 GemmMicrokernelTester()
27310 .mr(1)
27311 .nr(4)
27312 .kr(8)
27313 .sr(1)
27314 .m(m)
27315 .n(n)
27316 .k(8)
27317 .iterations(1)
27318 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27319 }
27320 }
27321 }
27322
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)27323 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
27324 for (uint32_t m = 1; m <= 1; m++) {
27325 GemmMicrokernelTester()
27326 .mr(1)
27327 .nr(4)
27328 .kr(8)
27329 .sr(1)
27330 .m(m)
27331 .n(4)
27332 .k(8)
27333 .iterations(1)
27334 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27335 }
27336 }
27337
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)27338 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
27339 for (uint32_t n = 1; n <= 4; n++) {
27340 GemmMicrokernelTester()
27341 .mr(1)
27342 .nr(4)
27343 .kr(8)
27344 .sr(1)
27345 .m(1)
27346 .n(n)
27347 .k(8)
27348 .iterations(1)
27349 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27350 }
27351 }
27352
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)27353 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
27354 for (size_t k = 1; k < 8; k++) {
27355 GemmMicrokernelTester()
27356 .mr(1)
27357 .nr(4)
27358 .kr(8)
27359 .sr(1)
27360 .m(1)
27361 .n(4)
27362 .k(k)
27363 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27364 }
27365 }
27366
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)27367 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
27368 for (size_t k = 1; k < 8; k++) {
27369 GemmMicrokernelTester()
27370 .mr(1)
27371 .nr(4)
27372 .kr(8)
27373 .sr(1)
27374 .m(1)
27375 .n(4)
27376 .k(k)
27377 .a_stride(11)
27378 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27379 }
27380 }
27381
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)27382 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
27383 for (size_t k = 1; k < 8; k++) {
27384 for (uint32_t n = 1; n <= 4; n++) {
27385 for (uint32_t m = 1; m <= 1; m++) {
27386 GemmMicrokernelTester()
27387 .mr(1)
27388 .nr(4)
27389 .kr(8)
27390 .sr(1)
27391 .m(m)
27392 .n(n)
27393 .k(k)
27394 .iterations(1)
27395 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27396 }
27397 }
27398 }
27399 }
27400
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)27401 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
27402 for (size_t k = 9; k < 16; k++) {
27403 GemmMicrokernelTester()
27404 .mr(1)
27405 .nr(4)
27406 .kr(8)
27407 .sr(1)
27408 .m(1)
27409 .n(4)
27410 .k(k)
27411 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27412 }
27413 }
27414
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)27415 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
27416 for (size_t k = 9; k < 16; k++) {
27417 GemmMicrokernelTester()
27418 .mr(1)
27419 .nr(4)
27420 .kr(8)
27421 .sr(1)
27422 .m(1)
27423 .n(4)
27424 .k(k)
27425 .a_stride(19)
27426 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27427 }
27428 }
27429
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)27430 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
27431 for (size_t k = 9; k < 16; k++) {
27432 for (uint32_t n = 1; n <= 4; n++) {
27433 for (uint32_t m = 1; m <= 1; m++) {
27434 GemmMicrokernelTester()
27435 .mr(1)
27436 .nr(4)
27437 .kr(8)
27438 .sr(1)
27439 .m(m)
27440 .n(n)
27441 .k(k)
27442 .iterations(1)
27443 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27444 }
27445 }
27446 }
27447 }
27448
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)27449 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
27450 for (size_t k = 16; k <= 80; k += 8) {
27451 GemmMicrokernelTester()
27452 .mr(1)
27453 .nr(4)
27454 .kr(8)
27455 .sr(1)
27456 .m(1)
27457 .n(4)
27458 .k(k)
27459 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27460 }
27461 }
27462
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)27463 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
27464 for (size_t k = 16; k <= 80; k += 8) {
27465 GemmMicrokernelTester()
27466 .mr(1)
27467 .nr(4)
27468 .kr(8)
27469 .sr(1)
27470 .m(1)
27471 .n(4)
27472 .k(k)
27473 .a_stride(83)
27474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27475 }
27476 }
27477
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)27478 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
27479 for (size_t k = 16; k <= 80; k += 8) {
27480 for (uint32_t n = 1; n <= 4; n++) {
27481 for (uint32_t m = 1; m <= 1; m++) {
27482 GemmMicrokernelTester()
27483 .mr(1)
27484 .nr(4)
27485 .kr(8)
27486 .sr(1)
27487 .m(m)
27488 .n(n)
27489 .k(k)
27490 .iterations(1)
27491 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27492 }
27493 }
27494 }
27495 }
27496
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)27497 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
27498 for (uint32_t n = 5; n < 8; n++) {
27499 for (size_t k = 1; k <= 40; k += 9) {
27500 GemmMicrokernelTester()
27501 .mr(1)
27502 .nr(4)
27503 .kr(8)
27504 .sr(1)
27505 .m(1)
27506 .n(n)
27507 .k(k)
27508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27509 }
27510 }
27511 }
27512
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)27513 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
27514 for (uint32_t n = 5; n < 8; n++) {
27515 for (size_t k = 1; k <= 40; k += 9) {
27516 GemmMicrokernelTester()
27517 .mr(1)
27518 .nr(4)
27519 .kr(8)
27520 .sr(1)
27521 .m(1)
27522 .n(n)
27523 .k(k)
27524 .cn_stride(7)
27525 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27526 }
27527 }
27528 }
27529
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)27530 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
27531 for (uint32_t n = 5; n < 8; n++) {
27532 for (size_t k = 1; k <= 40; k += 9) {
27533 GemmMicrokernelTester()
27534 .mr(1)
27535 .nr(4)
27536 .kr(8)
27537 .sr(1)
27538 .m(1)
27539 .n(n)
27540 .k(k)
27541 .a_stride(43)
27542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27543 }
27544 }
27545 }
27546
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)27547 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
27548 for (uint32_t n = 5; n < 8; n++) {
27549 for (size_t k = 1; k <= 40; k += 9) {
27550 for (uint32_t m = 1; m <= 1; m++) {
27551 GemmMicrokernelTester()
27552 .mr(1)
27553 .nr(4)
27554 .kr(8)
27555 .sr(1)
27556 .m(m)
27557 .n(n)
27558 .k(k)
27559 .iterations(1)
27560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27561 }
27562 }
27563 }
27564 }
27565
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)27566 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
27567 for (uint32_t n = 8; n <= 12; n += 4) {
27568 for (size_t k = 1; k <= 40; k += 9) {
27569 GemmMicrokernelTester()
27570 .mr(1)
27571 .nr(4)
27572 .kr(8)
27573 .sr(1)
27574 .m(1)
27575 .n(n)
27576 .k(k)
27577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27578 }
27579 }
27580 }
27581
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)27582 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
27583 for (uint32_t n = 8; n <= 12; n += 4) {
27584 for (size_t k = 1; k <= 40; k += 9) {
27585 GemmMicrokernelTester()
27586 .mr(1)
27587 .nr(4)
27588 .kr(8)
27589 .sr(1)
27590 .m(1)
27591 .n(n)
27592 .k(k)
27593 .cn_stride(7)
27594 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27595 }
27596 }
27597 }
27598
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)27599 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
27600 for (uint32_t n = 8; n <= 12; n += 4) {
27601 for (size_t k = 1; k <= 40; k += 9) {
27602 GemmMicrokernelTester()
27603 .mr(1)
27604 .nr(4)
27605 .kr(8)
27606 .sr(1)
27607 .m(1)
27608 .n(n)
27609 .k(k)
27610 .a_stride(43)
27611 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27612 }
27613 }
27614 }
27615
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)27616 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
27617 for (uint32_t n = 8; n <= 12; n += 4) {
27618 for (size_t k = 1; k <= 40; k += 9) {
27619 for (uint32_t m = 1; m <= 1; m++) {
27620 GemmMicrokernelTester()
27621 .mr(1)
27622 .nr(4)
27623 .kr(8)
27624 .sr(1)
27625 .m(m)
27626 .n(n)
27627 .k(k)
27628 .iterations(1)
27629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27630 }
27631 }
27632 }
27633 }
27634
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)27635 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
27636 for (size_t k = 1; k <= 40; k += 9) {
27637 for (uint32_t n = 1; n <= 4; n++) {
27638 for (uint32_t m = 1; m <= 1; m++) {
27639 GemmMicrokernelTester()
27640 .mr(1)
27641 .nr(4)
27642 .kr(8)
27643 .sr(1)
27644 .m(m)
27645 .n(n)
27646 .k(k)
27647 .cm_stride(7)
27648 .iterations(1)
27649 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27650 }
27651 }
27652 }
27653 }
27654
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)27655 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
27656 GemmMicrokernelTester()
27657 .mr(1)
27658 .nr(4)
27659 .kr(8)
27660 .sr(1)
27661 .m(1)
27662 .n(4)
27663 .k(8)
27664 .qmin(128)
27665 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27666 }
27667
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)27668 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
27669 GemmMicrokernelTester()
27670 .mr(1)
27671 .nr(4)
27672 .kr(8)
27673 .sr(1)
27674 .m(1)
27675 .n(4)
27676 .k(8)
27677 .qmax(128)
27678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27679 }
27680
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)27681 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
27682 GemmMicrokernelTester()
27683 .mr(1)
27684 .nr(4)
27685 .kr(8)
27686 .sr(1)
27687 .m(1)
27688 .n(4)
27689 .k(8)
27690 .cm_stride(7)
27691 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27692 }
27693
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)27694 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
27695 for (size_t k = 1; k <= 40; k += 9) {
27696 GemmMicrokernelTester()
27697 .mr(1)
27698 .nr(4)
27699 .kr(8)
27700 .sr(1)
27701 .m(1)
27702 .n(4)
27703 .k(k)
27704 .a_zero_point(0)
27705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27706 }
27707 }
27708
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)27709 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
27710 for (size_t k = 1; k <= 40; k += 9) {
27711 GemmMicrokernelTester()
27712 .mr(1)
27713 .nr(4)
27714 .kr(8)
27715 .sr(1)
27716 .m(1)
27717 .n(4)
27718 .k(k)
27719 .b_zero_point(0)
27720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27721 }
27722 }
27723
TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)27724 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
27725 for (size_t k = 1; k <= 40; k += 9) {
27726 GemmMicrokernelTester()
27727 .mr(1)
27728 .nr(4)
27729 .kr(8)
27730 .sr(1)
27731 .m(1)
27732 .n(4)
27733 .k(k)
27734 .a_zero_point(0)
27735 .b_zero_point(0)
27736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27737 }
27738 }
27739 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27740
27741
27742 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)27743 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
27744 GemmMicrokernelTester()
27745 .mr(2)
27746 .nr(4)
27747 .kr(2)
27748 .sr(1)
27749 .m(2)
27750 .n(4)
27751 .k(8)
27752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27753 }
27754
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)27755 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
27756 GemmMicrokernelTester()
27757 .mr(2)
27758 .nr(4)
27759 .kr(2)
27760 .sr(1)
27761 .m(2)
27762 .n(4)
27763 .k(8)
27764 .cn_stride(7)
27765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27766 }
27767
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)27768 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
27769 GemmMicrokernelTester()
27770 .mr(2)
27771 .nr(4)
27772 .kr(2)
27773 .sr(1)
27774 .m(2)
27775 .n(4)
27776 .k(8)
27777 .a_stride(11)
27778 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27779 }
27780
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)27781 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
27782 for (uint32_t n = 1; n <= 4; n++) {
27783 for (uint32_t m = 1; m <= 2; m++) {
27784 GemmMicrokernelTester()
27785 .mr(2)
27786 .nr(4)
27787 .kr(2)
27788 .sr(1)
27789 .m(m)
27790 .n(n)
27791 .k(8)
27792 .iterations(1)
27793 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27794 }
27795 }
27796 }
27797
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)27798 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
27799 for (uint32_t m = 1; m <= 2; m++) {
27800 GemmMicrokernelTester()
27801 .mr(2)
27802 .nr(4)
27803 .kr(2)
27804 .sr(1)
27805 .m(m)
27806 .n(4)
27807 .k(8)
27808 .iterations(1)
27809 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27810 }
27811 }
27812
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)27813 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
27814 for (uint32_t n = 1; n <= 4; n++) {
27815 GemmMicrokernelTester()
27816 .mr(2)
27817 .nr(4)
27818 .kr(2)
27819 .sr(1)
27820 .m(2)
27821 .n(n)
27822 .k(8)
27823 .iterations(1)
27824 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27825 }
27826 }
27827
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)27828 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
27829 for (size_t k = 1; k < 8; k++) {
27830 GemmMicrokernelTester()
27831 .mr(2)
27832 .nr(4)
27833 .kr(2)
27834 .sr(1)
27835 .m(2)
27836 .n(4)
27837 .k(k)
27838 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27839 }
27840 }
27841
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)27842 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
27843 for (size_t k = 1; k < 8; k++) {
27844 GemmMicrokernelTester()
27845 .mr(2)
27846 .nr(4)
27847 .kr(2)
27848 .sr(1)
27849 .m(2)
27850 .n(4)
27851 .k(k)
27852 .a_stride(11)
27853 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27854 }
27855 }
27856
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)27857 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
27858 for (size_t k = 1; k < 8; k++) {
27859 for (uint32_t n = 1; n <= 4; n++) {
27860 for (uint32_t m = 1; m <= 2; m++) {
27861 GemmMicrokernelTester()
27862 .mr(2)
27863 .nr(4)
27864 .kr(2)
27865 .sr(1)
27866 .m(m)
27867 .n(n)
27868 .k(k)
27869 .iterations(1)
27870 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27871 }
27872 }
27873 }
27874 }
27875
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)27876 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
27877 for (size_t k = 9; k < 16; k++) {
27878 GemmMicrokernelTester()
27879 .mr(2)
27880 .nr(4)
27881 .kr(2)
27882 .sr(1)
27883 .m(2)
27884 .n(4)
27885 .k(k)
27886 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27887 }
27888 }
27889
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)27890 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
27891 for (size_t k = 9; k < 16; k++) {
27892 GemmMicrokernelTester()
27893 .mr(2)
27894 .nr(4)
27895 .kr(2)
27896 .sr(1)
27897 .m(2)
27898 .n(4)
27899 .k(k)
27900 .a_stride(19)
27901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27902 }
27903 }
27904
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)27905 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
27906 for (size_t k = 9; k < 16; k++) {
27907 for (uint32_t n = 1; n <= 4; n++) {
27908 for (uint32_t m = 1; m <= 2; m++) {
27909 GemmMicrokernelTester()
27910 .mr(2)
27911 .nr(4)
27912 .kr(2)
27913 .sr(1)
27914 .m(m)
27915 .n(n)
27916 .k(k)
27917 .iterations(1)
27918 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27919 }
27920 }
27921 }
27922 }
27923
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)27924 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
27925 for (size_t k = 16; k <= 80; k += 8) {
27926 GemmMicrokernelTester()
27927 .mr(2)
27928 .nr(4)
27929 .kr(2)
27930 .sr(1)
27931 .m(2)
27932 .n(4)
27933 .k(k)
27934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27935 }
27936 }
27937
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)27938 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
27939 for (size_t k = 16; k <= 80; k += 8) {
27940 GemmMicrokernelTester()
27941 .mr(2)
27942 .nr(4)
27943 .kr(2)
27944 .sr(1)
27945 .m(2)
27946 .n(4)
27947 .k(k)
27948 .a_stride(83)
27949 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27950 }
27951 }
27952
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)27953 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
27954 for (size_t k = 16; k <= 80; k += 8) {
27955 for (uint32_t n = 1; n <= 4; n++) {
27956 for (uint32_t m = 1; m <= 2; m++) {
27957 GemmMicrokernelTester()
27958 .mr(2)
27959 .nr(4)
27960 .kr(2)
27961 .sr(1)
27962 .m(m)
27963 .n(n)
27964 .k(k)
27965 .iterations(1)
27966 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27967 }
27968 }
27969 }
27970 }
27971
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)27972 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
27973 for (uint32_t n = 5; n < 8; n++) {
27974 for (size_t k = 1; k <= 40; k += 9) {
27975 GemmMicrokernelTester()
27976 .mr(2)
27977 .nr(4)
27978 .kr(2)
27979 .sr(1)
27980 .m(2)
27981 .n(n)
27982 .k(k)
27983 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27984 }
27985 }
27986 }
27987
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)27988 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
27989 for (uint32_t n = 5; n < 8; n++) {
27990 for (size_t k = 1; k <= 40; k += 9) {
27991 GemmMicrokernelTester()
27992 .mr(2)
27993 .nr(4)
27994 .kr(2)
27995 .sr(1)
27996 .m(2)
27997 .n(n)
27998 .k(k)
27999 .cn_stride(7)
28000 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28001 }
28002 }
28003 }
28004
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)28005 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
28006 for (uint32_t n = 5; n < 8; n++) {
28007 for (size_t k = 1; k <= 40; k += 9) {
28008 GemmMicrokernelTester()
28009 .mr(2)
28010 .nr(4)
28011 .kr(2)
28012 .sr(1)
28013 .m(2)
28014 .n(n)
28015 .k(k)
28016 .a_stride(43)
28017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28018 }
28019 }
28020 }
28021
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)28022 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
28023 for (uint32_t n = 5; n < 8; n++) {
28024 for (size_t k = 1; k <= 40; k += 9) {
28025 for (uint32_t m = 1; m <= 2; m++) {
28026 GemmMicrokernelTester()
28027 .mr(2)
28028 .nr(4)
28029 .kr(2)
28030 .sr(1)
28031 .m(m)
28032 .n(n)
28033 .k(k)
28034 .iterations(1)
28035 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28036 }
28037 }
28038 }
28039 }
28040
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)28041 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
28042 for (uint32_t n = 8; n <= 12; n += 4) {
28043 for (size_t k = 1; k <= 40; k += 9) {
28044 GemmMicrokernelTester()
28045 .mr(2)
28046 .nr(4)
28047 .kr(2)
28048 .sr(1)
28049 .m(2)
28050 .n(n)
28051 .k(k)
28052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28053 }
28054 }
28055 }
28056
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)28057 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
28058 for (uint32_t n = 8; n <= 12; n += 4) {
28059 for (size_t k = 1; k <= 40; k += 9) {
28060 GemmMicrokernelTester()
28061 .mr(2)
28062 .nr(4)
28063 .kr(2)
28064 .sr(1)
28065 .m(2)
28066 .n(n)
28067 .k(k)
28068 .cn_stride(7)
28069 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28070 }
28071 }
28072 }
28073
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)28074 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
28075 for (uint32_t n = 8; n <= 12; n += 4) {
28076 for (size_t k = 1; k <= 40; k += 9) {
28077 GemmMicrokernelTester()
28078 .mr(2)
28079 .nr(4)
28080 .kr(2)
28081 .sr(1)
28082 .m(2)
28083 .n(n)
28084 .k(k)
28085 .a_stride(43)
28086 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28087 }
28088 }
28089 }
28090
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)28091 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
28092 for (uint32_t n = 8; n <= 12; n += 4) {
28093 for (size_t k = 1; k <= 40; k += 9) {
28094 for (uint32_t m = 1; m <= 2; m++) {
28095 GemmMicrokernelTester()
28096 .mr(2)
28097 .nr(4)
28098 .kr(2)
28099 .sr(1)
28100 .m(m)
28101 .n(n)
28102 .k(k)
28103 .iterations(1)
28104 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28105 }
28106 }
28107 }
28108 }
28109
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)28110 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
28111 for (size_t k = 1; k <= 40; k += 9) {
28112 for (uint32_t n = 1; n <= 4; n++) {
28113 for (uint32_t m = 1; m <= 2; m++) {
28114 GemmMicrokernelTester()
28115 .mr(2)
28116 .nr(4)
28117 .kr(2)
28118 .sr(1)
28119 .m(m)
28120 .n(n)
28121 .k(k)
28122 .cm_stride(7)
28123 .iterations(1)
28124 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28125 }
28126 }
28127 }
28128 }
28129
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)28130 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
28131 GemmMicrokernelTester()
28132 .mr(2)
28133 .nr(4)
28134 .kr(2)
28135 .sr(1)
28136 .m(2)
28137 .n(4)
28138 .k(8)
28139 .qmin(128)
28140 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28141 }
28142
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)28143 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
28144 GemmMicrokernelTester()
28145 .mr(2)
28146 .nr(4)
28147 .kr(2)
28148 .sr(1)
28149 .m(2)
28150 .n(4)
28151 .k(8)
28152 .qmax(128)
28153 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28154 }
28155
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)28156 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
28157 GemmMicrokernelTester()
28158 .mr(2)
28159 .nr(4)
28160 .kr(2)
28161 .sr(1)
28162 .m(2)
28163 .n(4)
28164 .k(8)
28165 .cm_stride(7)
28166 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28167 }
28168
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)28169 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
28170 for (size_t k = 1; k <= 40; k += 9) {
28171 GemmMicrokernelTester()
28172 .mr(2)
28173 .nr(4)
28174 .kr(2)
28175 .sr(1)
28176 .m(2)
28177 .n(4)
28178 .k(k)
28179 .a_zero_point(0)
28180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28181 }
28182 }
28183
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)28184 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
28185 for (size_t k = 1; k <= 40; k += 9) {
28186 GemmMicrokernelTester()
28187 .mr(2)
28188 .nr(4)
28189 .kr(2)
28190 .sr(1)
28191 .m(2)
28192 .n(4)
28193 .k(k)
28194 .b_zero_point(0)
28195 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28196 }
28197 }
28198
TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)28199 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
28200 for (size_t k = 1; k <= 40; k += 9) {
28201 GemmMicrokernelTester()
28202 .mr(2)
28203 .nr(4)
28204 .kr(2)
28205 .sr(1)
28206 .m(2)
28207 .n(4)
28208 .k(k)
28209 .a_zero_point(0)
28210 .b_zero_point(0)
28211 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28212 }
28213 }
28214 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28215
28216
28217 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)28218 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
28219 GemmMicrokernelTester()
28220 .mr(3)
28221 .nr(4)
28222 .kr(2)
28223 .sr(4)
28224 .m(3)
28225 .n(4)
28226 .k(8)
28227 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28228 }
28229
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)28230 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
28231 GemmMicrokernelTester()
28232 .mr(3)
28233 .nr(4)
28234 .kr(2)
28235 .sr(4)
28236 .m(3)
28237 .n(4)
28238 .k(8)
28239 .cn_stride(7)
28240 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28241 }
28242
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)28243 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
28244 GemmMicrokernelTester()
28245 .mr(3)
28246 .nr(4)
28247 .kr(2)
28248 .sr(4)
28249 .m(3)
28250 .n(4)
28251 .k(8)
28252 .a_stride(11)
28253 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28254 }
28255
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)28256 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
28257 for (uint32_t n = 1; n <= 4; n++) {
28258 for (uint32_t m = 1; m <= 3; m++) {
28259 GemmMicrokernelTester()
28260 .mr(3)
28261 .nr(4)
28262 .kr(2)
28263 .sr(4)
28264 .m(m)
28265 .n(n)
28266 .k(8)
28267 .iterations(1)
28268 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28269 }
28270 }
28271 }
28272
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)28273 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
28274 for (uint32_t m = 1; m <= 3; m++) {
28275 GemmMicrokernelTester()
28276 .mr(3)
28277 .nr(4)
28278 .kr(2)
28279 .sr(4)
28280 .m(m)
28281 .n(4)
28282 .k(8)
28283 .iterations(1)
28284 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28285 }
28286 }
28287
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)28288 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
28289 for (uint32_t n = 1; n <= 4; n++) {
28290 GemmMicrokernelTester()
28291 .mr(3)
28292 .nr(4)
28293 .kr(2)
28294 .sr(4)
28295 .m(3)
28296 .n(n)
28297 .k(8)
28298 .iterations(1)
28299 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28300 }
28301 }
28302
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)28303 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
28304 for (size_t k = 1; k < 8; k++) {
28305 GemmMicrokernelTester()
28306 .mr(3)
28307 .nr(4)
28308 .kr(2)
28309 .sr(4)
28310 .m(3)
28311 .n(4)
28312 .k(k)
28313 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28314 }
28315 }
28316
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)28317 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
28318 for (size_t k = 1; k < 8; k++) {
28319 GemmMicrokernelTester()
28320 .mr(3)
28321 .nr(4)
28322 .kr(2)
28323 .sr(4)
28324 .m(3)
28325 .n(4)
28326 .k(k)
28327 .a_stride(11)
28328 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28329 }
28330 }
28331
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)28332 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
28333 for (size_t k = 1; k < 8; k++) {
28334 for (uint32_t n = 1; n <= 4; n++) {
28335 for (uint32_t m = 1; m <= 3; m++) {
28336 GemmMicrokernelTester()
28337 .mr(3)
28338 .nr(4)
28339 .kr(2)
28340 .sr(4)
28341 .m(m)
28342 .n(n)
28343 .k(k)
28344 .iterations(1)
28345 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28346 }
28347 }
28348 }
28349 }
28350
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)28351 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
28352 for (size_t k = 9; k < 16; k++) {
28353 GemmMicrokernelTester()
28354 .mr(3)
28355 .nr(4)
28356 .kr(2)
28357 .sr(4)
28358 .m(3)
28359 .n(4)
28360 .k(k)
28361 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28362 }
28363 }
28364
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)28365 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
28366 for (size_t k = 9; k < 16; k++) {
28367 GemmMicrokernelTester()
28368 .mr(3)
28369 .nr(4)
28370 .kr(2)
28371 .sr(4)
28372 .m(3)
28373 .n(4)
28374 .k(k)
28375 .a_stride(19)
28376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28377 }
28378 }
28379
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)28380 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
28381 for (size_t k = 9; k < 16; k++) {
28382 for (uint32_t n = 1; n <= 4; n++) {
28383 for (uint32_t m = 1; m <= 3; m++) {
28384 GemmMicrokernelTester()
28385 .mr(3)
28386 .nr(4)
28387 .kr(2)
28388 .sr(4)
28389 .m(m)
28390 .n(n)
28391 .k(k)
28392 .iterations(1)
28393 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28394 }
28395 }
28396 }
28397 }
28398
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)28399 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
28400 for (size_t k = 16; k <= 80; k += 8) {
28401 GemmMicrokernelTester()
28402 .mr(3)
28403 .nr(4)
28404 .kr(2)
28405 .sr(4)
28406 .m(3)
28407 .n(4)
28408 .k(k)
28409 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28410 }
28411 }
28412
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)28413 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
28414 for (size_t k = 16; k <= 80; k += 8) {
28415 GemmMicrokernelTester()
28416 .mr(3)
28417 .nr(4)
28418 .kr(2)
28419 .sr(4)
28420 .m(3)
28421 .n(4)
28422 .k(k)
28423 .a_stride(83)
28424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28425 }
28426 }
28427
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)28428 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
28429 for (size_t k = 16; k <= 80; k += 8) {
28430 for (uint32_t n = 1; n <= 4; n++) {
28431 for (uint32_t m = 1; m <= 3; m++) {
28432 GemmMicrokernelTester()
28433 .mr(3)
28434 .nr(4)
28435 .kr(2)
28436 .sr(4)
28437 .m(m)
28438 .n(n)
28439 .k(k)
28440 .iterations(1)
28441 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28442 }
28443 }
28444 }
28445 }
28446
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)28447 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
28448 for (uint32_t n = 5; n < 8; n++) {
28449 for (size_t k = 1; k <= 40; k += 9) {
28450 GemmMicrokernelTester()
28451 .mr(3)
28452 .nr(4)
28453 .kr(2)
28454 .sr(4)
28455 .m(3)
28456 .n(n)
28457 .k(k)
28458 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28459 }
28460 }
28461 }
28462
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)28463 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
28464 for (uint32_t n = 5; n < 8; n++) {
28465 for (size_t k = 1; k <= 40; k += 9) {
28466 GemmMicrokernelTester()
28467 .mr(3)
28468 .nr(4)
28469 .kr(2)
28470 .sr(4)
28471 .m(3)
28472 .n(n)
28473 .k(k)
28474 .cn_stride(7)
28475 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28476 }
28477 }
28478 }
28479
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)28480 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
28481 for (uint32_t n = 5; n < 8; n++) {
28482 for (size_t k = 1; k <= 40; k += 9) {
28483 GemmMicrokernelTester()
28484 .mr(3)
28485 .nr(4)
28486 .kr(2)
28487 .sr(4)
28488 .m(3)
28489 .n(n)
28490 .k(k)
28491 .a_stride(43)
28492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28493 }
28494 }
28495 }
28496
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)28497 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
28498 for (uint32_t n = 5; n < 8; n++) {
28499 for (size_t k = 1; k <= 40; k += 9) {
28500 for (uint32_t m = 1; m <= 3; m++) {
28501 GemmMicrokernelTester()
28502 .mr(3)
28503 .nr(4)
28504 .kr(2)
28505 .sr(4)
28506 .m(m)
28507 .n(n)
28508 .k(k)
28509 .iterations(1)
28510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28511 }
28512 }
28513 }
28514 }
28515
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)28516 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
28517 for (uint32_t n = 8; n <= 12; n += 4) {
28518 for (size_t k = 1; k <= 40; k += 9) {
28519 GemmMicrokernelTester()
28520 .mr(3)
28521 .nr(4)
28522 .kr(2)
28523 .sr(4)
28524 .m(3)
28525 .n(n)
28526 .k(k)
28527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28528 }
28529 }
28530 }
28531
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)28532 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
28533 for (uint32_t n = 8; n <= 12; n += 4) {
28534 for (size_t k = 1; k <= 40; k += 9) {
28535 GemmMicrokernelTester()
28536 .mr(3)
28537 .nr(4)
28538 .kr(2)
28539 .sr(4)
28540 .m(3)
28541 .n(n)
28542 .k(k)
28543 .cn_stride(7)
28544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28545 }
28546 }
28547 }
28548
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)28549 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
28550 for (uint32_t n = 8; n <= 12; n += 4) {
28551 for (size_t k = 1; k <= 40; k += 9) {
28552 GemmMicrokernelTester()
28553 .mr(3)
28554 .nr(4)
28555 .kr(2)
28556 .sr(4)
28557 .m(3)
28558 .n(n)
28559 .k(k)
28560 .a_stride(43)
28561 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28562 }
28563 }
28564 }
28565
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)28566 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
28567 for (uint32_t n = 8; n <= 12; n += 4) {
28568 for (size_t k = 1; k <= 40; k += 9) {
28569 for (uint32_t m = 1; m <= 3; m++) {
28570 GemmMicrokernelTester()
28571 .mr(3)
28572 .nr(4)
28573 .kr(2)
28574 .sr(4)
28575 .m(m)
28576 .n(n)
28577 .k(k)
28578 .iterations(1)
28579 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28580 }
28581 }
28582 }
28583 }
28584
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)28585 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
28586 for (size_t k = 1; k <= 40; k += 9) {
28587 for (uint32_t n = 1; n <= 4; n++) {
28588 for (uint32_t m = 1; m <= 3; m++) {
28589 GemmMicrokernelTester()
28590 .mr(3)
28591 .nr(4)
28592 .kr(2)
28593 .sr(4)
28594 .m(m)
28595 .n(n)
28596 .k(k)
28597 .cm_stride(7)
28598 .iterations(1)
28599 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28600 }
28601 }
28602 }
28603 }
28604
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)28605 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
28606 GemmMicrokernelTester()
28607 .mr(3)
28608 .nr(4)
28609 .kr(2)
28610 .sr(4)
28611 .m(3)
28612 .n(4)
28613 .k(8)
28614 .qmin(128)
28615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28616 }
28617
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)28618 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
28619 GemmMicrokernelTester()
28620 .mr(3)
28621 .nr(4)
28622 .kr(2)
28623 .sr(4)
28624 .m(3)
28625 .n(4)
28626 .k(8)
28627 .qmax(128)
28628 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28629 }
28630
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)28631 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
28632 GemmMicrokernelTester()
28633 .mr(3)
28634 .nr(4)
28635 .kr(2)
28636 .sr(4)
28637 .m(3)
28638 .n(4)
28639 .k(8)
28640 .cm_stride(7)
28641 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28642 }
28643
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)28644 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
28645 for (size_t k = 1; k <= 40; k += 9) {
28646 GemmMicrokernelTester()
28647 .mr(3)
28648 .nr(4)
28649 .kr(2)
28650 .sr(4)
28651 .m(3)
28652 .n(4)
28653 .k(k)
28654 .a_zero_point(0)
28655 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28656 }
28657 }
28658
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)28659 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
28660 for (size_t k = 1; k <= 40; k += 9) {
28661 GemmMicrokernelTester()
28662 .mr(3)
28663 .nr(4)
28664 .kr(2)
28665 .sr(4)
28666 .m(3)
28667 .n(4)
28668 .k(k)
28669 .b_zero_point(0)
28670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28671 }
28672 }
28673
TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)28674 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
28675 for (size_t k = 1; k <= 40; k += 9) {
28676 GemmMicrokernelTester()
28677 .mr(3)
28678 .nr(4)
28679 .kr(2)
28680 .sr(4)
28681 .m(3)
28682 .n(4)
28683 .k(k)
28684 .a_zero_point(0)
28685 .b_zero_point(0)
28686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28687 }
28688 }
28689 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28690
28691
28692 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)28693 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
28694 GemmMicrokernelTester()
28695 .mr(3)
28696 .nr(4)
28697 .kr(8)
28698 .sr(1)
28699 .m(3)
28700 .n(4)
28701 .k(8)
28702 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28703 }
28704
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)28705 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
28706 GemmMicrokernelTester()
28707 .mr(3)
28708 .nr(4)
28709 .kr(8)
28710 .sr(1)
28711 .m(3)
28712 .n(4)
28713 .k(8)
28714 .cn_stride(7)
28715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28716 }
28717
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)28718 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
28719 GemmMicrokernelTester()
28720 .mr(3)
28721 .nr(4)
28722 .kr(8)
28723 .sr(1)
28724 .m(3)
28725 .n(4)
28726 .k(8)
28727 .a_stride(11)
28728 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28729 }
28730
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)28731 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
28732 for (uint32_t n = 1; n <= 4; n++) {
28733 for (uint32_t m = 1; m <= 3; m++) {
28734 GemmMicrokernelTester()
28735 .mr(3)
28736 .nr(4)
28737 .kr(8)
28738 .sr(1)
28739 .m(m)
28740 .n(n)
28741 .k(8)
28742 .iterations(1)
28743 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28744 }
28745 }
28746 }
28747
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)28748 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
28749 for (uint32_t m = 1; m <= 3; m++) {
28750 GemmMicrokernelTester()
28751 .mr(3)
28752 .nr(4)
28753 .kr(8)
28754 .sr(1)
28755 .m(m)
28756 .n(4)
28757 .k(8)
28758 .iterations(1)
28759 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28760 }
28761 }
28762
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)28763 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
28764 for (uint32_t n = 1; n <= 4; n++) {
28765 GemmMicrokernelTester()
28766 .mr(3)
28767 .nr(4)
28768 .kr(8)
28769 .sr(1)
28770 .m(3)
28771 .n(n)
28772 .k(8)
28773 .iterations(1)
28774 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28775 }
28776 }
28777
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)28778 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
28779 for (size_t k = 1; k < 8; k++) {
28780 GemmMicrokernelTester()
28781 .mr(3)
28782 .nr(4)
28783 .kr(8)
28784 .sr(1)
28785 .m(3)
28786 .n(4)
28787 .k(k)
28788 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28789 }
28790 }
28791
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)28792 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
28793 for (size_t k = 1; k < 8; k++) {
28794 GemmMicrokernelTester()
28795 .mr(3)
28796 .nr(4)
28797 .kr(8)
28798 .sr(1)
28799 .m(3)
28800 .n(4)
28801 .k(k)
28802 .a_stride(11)
28803 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28804 }
28805 }
28806
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)28807 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
28808 for (size_t k = 1; k < 8; k++) {
28809 for (uint32_t n = 1; n <= 4; n++) {
28810 for (uint32_t m = 1; m <= 3; m++) {
28811 GemmMicrokernelTester()
28812 .mr(3)
28813 .nr(4)
28814 .kr(8)
28815 .sr(1)
28816 .m(m)
28817 .n(n)
28818 .k(k)
28819 .iterations(1)
28820 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28821 }
28822 }
28823 }
28824 }
28825
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)28826 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
28827 for (size_t k = 9; k < 16; k++) {
28828 GemmMicrokernelTester()
28829 .mr(3)
28830 .nr(4)
28831 .kr(8)
28832 .sr(1)
28833 .m(3)
28834 .n(4)
28835 .k(k)
28836 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28837 }
28838 }
28839
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)28840 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
28841 for (size_t k = 9; k < 16; k++) {
28842 GemmMicrokernelTester()
28843 .mr(3)
28844 .nr(4)
28845 .kr(8)
28846 .sr(1)
28847 .m(3)
28848 .n(4)
28849 .k(k)
28850 .a_stride(19)
28851 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28852 }
28853 }
28854
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)28855 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
28856 for (size_t k = 9; k < 16; k++) {
28857 for (uint32_t n = 1; n <= 4; n++) {
28858 for (uint32_t m = 1; m <= 3; m++) {
28859 GemmMicrokernelTester()
28860 .mr(3)
28861 .nr(4)
28862 .kr(8)
28863 .sr(1)
28864 .m(m)
28865 .n(n)
28866 .k(k)
28867 .iterations(1)
28868 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28869 }
28870 }
28871 }
28872 }
28873
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)28874 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
28875 for (size_t k = 16; k <= 80; k += 8) {
28876 GemmMicrokernelTester()
28877 .mr(3)
28878 .nr(4)
28879 .kr(8)
28880 .sr(1)
28881 .m(3)
28882 .n(4)
28883 .k(k)
28884 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28885 }
28886 }
28887
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)28888 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
28889 for (size_t k = 16; k <= 80; k += 8) {
28890 GemmMicrokernelTester()
28891 .mr(3)
28892 .nr(4)
28893 .kr(8)
28894 .sr(1)
28895 .m(3)
28896 .n(4)
28897 .k(k)
28898 .a_stride(83)
28899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28900 }
28901 }
28902
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)28903 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
28904 for (size_t k = 16; k <= 80; k += 8) {
28905 for (uint32_t n = 1; n <= 4; n++) {
28906 for (uint32_t m = 1; m <= 3; m++) {
28907 GemmMicrokernelTester()
28908 .mr(3)
28909 .nr(4)
28910 .kr(8)
28911 .sr(1)
28912 .m(m)
28913 .n(n)
28914 .k(k)
28915 .iterations(1)
28916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28917 }
28918 }
28919 }
28920 }
28921
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)28922 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
28923 for (uint32_t n = 5; n < 8; n++) {
28924 for (size_t k = 1; k <= 40; k += 9) {
28925 GemmMicrokernelTester()
28926 .mr(3)
28927 .nr(4)
28928 .kr(8)
28929 .sr(1)
28930 .m(3)
28931 .n(n)
28932 .k(k)
28933 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28934 }
28935 }
28936 }
28937
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)28938 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
28939 for (uint32_t n = 5; n < 8; n++) {
28940 for (size_t k = 1; k <= 40; k += 9) {
28941 GemmMicrokernelTester()
28942 .mr(3)
28943 .nr(4)
28944 .kr(8)
28945 .sr(1)
28946 .m(3)
28947 .n(n)
28948 .k(k)
28949 .cn_stride(7)
28950 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28951 }
28952 }
28953 }
28954
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)28955 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
28956 for (uint32_t n = 5; n < 8; n++) {
28957 for (size_t k = 1; k <= 40; k += 9) {
28958 GemmMicrokernelTester()
28959 .mr(3)
28960 .nr(4)
28961 .kr(8)
28962 .sr(1)
28963 .m(3)
28964 .n(n)
28965 .k(k)
28966 .a_stride(43)
28967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28968 }
28969 }
28970 }
28971
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)28972 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
28973 for (uint32_t n = 5; n < 8; n++) {
28974 for (size_t k = 1; k <= 40; k += 9) {
28975 for (uint32_t m = 1; m <= 3; m++) {
28976 GemmMicrokernelTester()
28977 .mr(3)
28978 .nr(4)
28979 .kr(8)
28980 .sr(1)
28981 .m(m)
28982 .n(n)
28983 .k(k)
28984 .iterations(1)
28985 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28986 }
28987 }
28988 }
28989 }
28990
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)28991 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
28992 for (uint32_t n = 8; n <= 12; n += 4) {
28993 for (size_t k = 1; k <= 40; k += 9) {
28994 GemmMicrokernelTester()
28995 .mr(3)
28996 .nr(4)
28997 .kr(8)
28998 .sr(1)
28999 .m(3)
29000 .n(n)
29001 .k(k)
29002 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29003 }
29004 }
29005 }
29006
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)29007 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
29008 for (uint32_t n = 8; n <= 12; n += 4) {
29009 for (size_t k = 1; k <= 40; k += 9) {
29010 GemmMicrokernelTester()
29011 .mr(3)
29012 .nr(4)
29013 .kr(8)
29014 .sr(1)
29015 .m(3)
29016 .n(n)
29017 .k(k)
29018 .cn_stride(7)
29019 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29020 }
29021 }
29022 }
29023
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)29024 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
29025 for (uint32_t n = 8; n <= 12; n += 4) {
29026 for (size_t k = 1; k <= 40; k += 9) {
29027 GemmMicrokernelTester()
29028 .mr(3)
29029 .nr(4)
29030 .kr(8)
29031 .sr(1)
29032 .m(3)
29033 .n(n)
29034 .k(k)
29035 .a_stride(43)
29036 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29037 }
29038 }
29039 }
29040
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)29041 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
29042 for (uint32_t n = 8; n <= 12; n += 4) {
29043 for (size_t k = 1; k <= 40; k += 9) {
29044 for (uint32_t m = 1; m <= 3; m++) {
29045 GemmMicrokernelTester()
29046 .mr(3)
29047 .nr(4)
29048 .kr(8)
29049 .sr(1)
29050 .m(m)
29051 .n(n)
29052 .k(k)
29053 .iterations(1)
29054 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29055 }
29056 }
29057 }
29058 }
29059
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)29060 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
29061 for (size_t k = 1; k <= 40; k += 9) {
29062 for (uint32_t n = 1; n <= 4; n++) {
29063 for (uint32_t m = 1; m <= 3; m++) {
29064 GemmMicrokernelTester()
29065 .mr(3)
29066 .nr(4)
29067 .kr(8)
29068 .sr(1)
29069 .m(m)
29070 .n(n)
29071 .k(k)
29072 .cm_stride(7)
29073 .iterations(1)
29074 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29075 }
29076 }
29077 }
29078 }
29079
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)29080 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
29081 GemmMicrokernelTester()
29082 .mr(3)
29083 .nr(4)
29084 .kr(8)
29085 .sr(1)
29086 .m(3)
29087 .n(4)
29088 .k(8)
29089 .qmin(128)
29090 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29091 }
29092
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)29093 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
29094 GemmMicrokernelTester()
29095 .mr(3)
29096 .nr(4)
29097 .kr(8)
29098 .sr(1)
29099 .m(3)
29100 .n(4)
29101 .k(8)
29102 .qmax(128)
29103 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29104 }
29105
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)29106 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
29107 GemmMicrokernelTester()
29108 .mr(3)
29109 .nr(4)
29110 .kr(8)
29111 .sr(1)
29112 .m(3)
29113 .n(4)
29114 .k(8)
29115 .cm_stride(7)
29116 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29117 }
29118
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)29119 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
29120 for (size_t k = 1; k <= 40; k += 9) {
29121 GemmMicrokernelTester()
29122 .mr(3)
29123 .nr(4)
29124 .kr(8)
29125 .sr(1)
29126 .m(3)
29127 .n(4)
29128 .k(k)
29129 .a_zero_point(0)
29130 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29131 }
29132 }
29133
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)29134 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
29135 for (size_t k = 1; k <= 40; k += 9) {
29136 GemmMicrokernelTester()
29137 .mr(3)
29138 .nr(4)
29139 .kr(8)
29140 .sr(1)
29141 .m(3)
29142 .n(4)
29143 .k(k)
29144 .b_zero_point(0)
29145 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29146 }
29147 }
29148
TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)29149 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
29150 for (size_t k = 1; k <= 40; k += 9) {
29151 GemmMicrokernelTester()
29152 .mr(3)
29153 .nr(4)
29154 .kr(8)
29155 .sr(1)
29156 .m(3)
29157 .n(4)
29158 .k(k)
29159 .a_zero_point(0)
29160 .b_zero_point(0)
29161 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29162 }
29163 }
29164 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29165
29166
29167 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)29168 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
29169 GemmMicrokernelTester()
29170 .mr(4)
29171 .nr(4)
29172 .kr(2)
29173 .sr(1)
29174 .m(4)
29175 .n(4)
29176 .k(8)
29177 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29178 }
29179
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)29180 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
29181 GemmMicrokernelTester()
29182 .mr(4)
29183 .nr(4)
29184 .kr(2)
29185 .sr(1)
29186 .m(4)
29187 .n(4)
29188 .k(8)
29189 .cn_stride(7)
29190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29191 }
29192
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)29193 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
29194 GemmMicrokernelTester()
29195 .mr(4)
29196 .nr(4)
29197 .kr(2)
29198 .sr(1)
29199 .m(4)
29200 .n(4)
29201 .k(8)
29202 .a_stride(11)
29203 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29204 }
29205
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)29206 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
29207 for (uint32_t n = 1; n <= 4; n++) {
29208 for (uint32_t m = 1; m <= 4; m++) {
29209 GemmMicrokernelTester()
29210 .mr(4)
29211 .nr(4)
29212 .kr(2)
29213 .sr(1)
29214 .m(m)
29215 .n(n)
29216 .k(8)
29217 .iterations(1)
29218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29219 }
29220 }
29221 }
29222
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)29223 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
29224 for (uint32_t m = 1; m <= 4; m++) {
29225 GemmMicrokernelTester()
29226 .mr(4)
29227 .nr(4)
29228 .kr(2)
29229 .sr(1)
29230 .m(m)
29231 .n(4)
29232 .k(8)
29233 .iterations(1)
29234 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29235 }
29236 }
29237
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)29238 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
29239 for (uint32_t n = 1; n <= 4; n++) {
29240 GemmMicrokernelTester()
29241 .mr(4)
29242 .nr(4)
29243 .kr(2)
29244 .sr(1)
29245 .m(4)
29246 .n(n)
29247 .k(8)
29248 .iterations(1)
29249 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29250 }
29251 }
29252
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)29253 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
29254 for (size_t k = 1; k < 8; k++) {
29255 GemmMicrokernelTester()
29256 .mr(4)
29257 .nr(4)
29258 .kr(2)
29259 .sr(1)
29260 .m(4)
29261 .n(4)
29262 .k(k)
29263 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29264 }
29265 }
29266
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)29267 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
29268 for (size_t k = 1; k < 8; k++) {
29269 GemmMicrokernelTester()
29270 .mr(4)
29271 .nr(4)
29272 .kr(2)
29273 .sr(1)
29274 .m(4)
29275 .n(4)
29276 .k(k)
29277 .a_stride(11)
29278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29279 }
29280 }
29281
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)29282 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
29283 for (size_t k = 1; k < 8; k++) {
29284 for (uint32_t n = 1; n <= 4; n++) {
29285 for (uint32_t m = 1; m <= 4; m++) {
29286 GemmMicrokernelTester()
29287 .mr(4)
29288 .nr(4)
29289 .kr(2)
29290 .sr(1)
29291 .m(m)
29292 .n(n)
29293 .k(k)
29294 .iterations(1)
29295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29296 }
29297 }
29298 }
29299 }
29300
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)29301 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
29302 for (size_t k = 9; k < 16; k++) {
29303 GemmMicrokernelTester()
29304 .mr(4)
29305 .nr(4)
29306 .kr(2)
29307 .sr(1)
29308 .m(4)
29309 .n(4)
29310 .k(k)
29311 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29312 }
29313 }
29314
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)29315 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
29316 for (size_t k = 9; k < 16; k++) {
29317 GemmMicrokernelTester()
29318 .mr(4)
29319 .nr(4)
29320 .kr(2)
29321 .sr(1)
29322 .m(4)
29323 .n(4)
29324 .k(k)
29325 .a_stride(19)
29326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29327 }
29328 }
29329
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)29330 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
29331 for (size_t k = 9; k < 16; k++) {
29332 for (uint32_t n = 1; n <= 4; n++) {
29333 for (uint32_t m = 1; m <= 4; m++) {
29334 GemmMicrokernelTester()
29335 .mr(4)
29336 .nr(4)
29337 .kr(2)
29338 .sr(1)
29339 .m(m)
29340 .n(n)
29341 .k(k)
29342 .iterations(1)
29343 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29344 }
29345 }
29346 }
29347 }
29348
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)29349 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
29350 for (size_t k = 16; k <= 80; k += 8) {
29351 GemmMicrokernelTester()
29352 .mr(4)
29353 .nr(4)
29354 .kr(2)
29355 .sr(1)
29356 .m(4)
29357 .n(4)
29358 .k(k)
29359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29360 }
29361 }
29362
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)29363 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
29364 for (size_t k = 16; k <= 80; k += 8) {
29365 GemmMicrokernelTester()
29366 .mr(4)
29367 .nr(4)
29368 .kr(2)
29369 .sr(1)
29370 .m(4)
29371 .n(4)
29372 .k(k)
29373 .a_stride(83)
29374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29375 }
29376 }
29377
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)29378 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
29379 for (size_t k = 16; k <= 80; k += 8) {
29380 for (uint32_t n = 1; n <= 4; n++) {
29381 for (uint32_t m = 1; m <= 4; m++) {
29382 GemmMicrokernelTester()
29383 .mr(4)
29384 .nr(4)
29385 .kr(2)
29386 .sr(1)
29387 .m(m)
29388 .n(n)
29389 .k(k)
29390 .iterations(1)
29391 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29392 }
29393 }
29394 }
29395 }
29396
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)29397 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
29398 for (uint32_t n = 5; n < 8; n++) {
29399 for (size_t k = 1; k <= 40; k += 9) {
29400 GemmMicrokernelTester()
29401 .mr(4)
29402 .nr(4)
29403 .kr(2)
29404 .sr(1)
29405 .m(4)
29406 .n(n)
29407 .k(k)
29408 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29409 }
29410 }
29411 }
29412
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)29413 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
29414 for (uint32_t n = 5; n < 8; n++) {
29415 for (size_t k = 1; k <= 40; k += 9) {
29416 GemmMicrokernelTester()
29417 .mr(4)
29418 .nr(4)
29419 .kr(2)
29420 .sr(1)
29421 .m(4)
29422 .n(n)
29423 .k(k)
29424 .cn_stride(7)
29425 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29426 }
29427 }
29428 }
29429
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)29430 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
29431 for (uint32_t n = 5; n < 8; n++) {
29432 for (size_t k = 1; k <= 40; k += 9) {
29433 GemmMicrokernelTester()
29434 .mr(4)
29435 .nr(4)
29436 .kr(2)
29437 .sr(1)
29438 .m(4)
29439 .n(n)
29440 .k(k)
29441 .a_stride(43)
29442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29443 }
29444 }
29445 }
29446
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)29447 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
29448 for (uint32_t n = 5; n < 8; n++) {
29449 for (size_t k = 1; k <= 40; k += 9) {
29450 for (uint32_t m = 1; m <= 4; m++) {
29451 GemmMicrokernelTester()
29452 .mr(4)
29453 .nr(4)
29454 .kr(2)
29455 .sr(1)
29456 .m(m)
29457 .n(n)
29458 .k(k)
29459 .iterations(1)
29460 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29461 }
29462 }
29463 }
29464 }
29465
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)29466 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
29467 for (uint32_t n = 8; n <= 12; n += 4) {
29468 for (size_t k = 1; k <= 40; k += 9) {
29469 GemmMicrokernelTester()
29470 .mr(4)
29471 .nr(4)
29472 .kr(2)
29473 .sr(1)
29474 .m(4)
29475 .n(n)
29476 .k(k)
29477 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29478 }
29479 }
29480 }
29481
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)29482 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
29483 for (uint32_t n = 8; n <= 12; n += 4) {
29484 for (size_t k = 1; k <= 40; k += 9) {
29485 GemmMicrokernelTester()
29486 .mr(4)
29487 .nr(4)
29488 .kr(2)
29489 .sr(1)
29490 .m(4)
29491 .n(n)
29492 .k(k)
29493 .cn_stride(7)
29494 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29495 }
29496 }
29497 }
29498
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)29499 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
29500 for (uint32_t n = 8; n <= 12; n += 4) {
29501 for (size_t k = 1; k <= 40; k += 9) {
29502 GemmMicrokernelTester()
29503 .mr(4)
29504 .nr(4)
29505 .kr(2)
29506 .sr(1)
29507 .m(4)
29508 .n(n)
29509 .k(k)
29510 .a_stride(43)
29511 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29512 }
29513 }
29514 }
29515
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)29516 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
29517 for (uint32_t n = 8; n <= 12; n += 4) {
29518 for (size_t k = 1; k <= 40; k += 9) {
29519 for (uint32_t m = 1; m <= 4; m++) {
29520 GemmMicrokernelTester()
29521 .mr(4)
29522 .nr(4)
29523 .kr(2)
29524 .sr(1)
29525 .m(m)
29526 .n(n)
29527 .k(k)
29528 .iterations(1)
29529 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29530 }
29531 }
29532 }
29533 }
29534
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)29535 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
29536 for (size_t k = 1; k <= 40; k += 9) {
29537 for (uint32_t n = 1; n <= 4; n++) {
29538 for (uint32_t m = 1; m <= 4; m++) {
29539 GemmMicrokernelTester()
29540 .mr(4)
29541 .nr(4)
29542 .kr(2)
29543 .sr(1)
29544 .m(m)
29545 .n(n)
29546 .k(k)
29547 .cm_stride(7)
29548 .iterations(1)
29549 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29550 }
29551 }
29552 }
29553 }
29554
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)29555 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
29556 GemmMicrokernelTester()
29557 .mr(4)
29558 .nr(4)
29559 .kr(2)
29560 .sr(1)
29561 .m(4)
29562 .n(4)
29563 .k(8)
29564 .qmin(128)
29565 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29566 }
29567
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)29568 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
29569 GemmMicrokernelTester()
29570 .mr(4)
29571 .nr(4)
29572 .kr(2)
29573 .sr(1)
29574 .m(4)
29575 .n(4)
29576 .k(8)
29577 .qmax(128)
29578 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29579 }
29580
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)29581 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
29582 GemmMicrokernelTester()
29583 .mr(4)
29584 .nr(4)
29585 .kr(2)
29586 .sr(1)
29587 .m(4)
29588 .n(4)
29589 .k(8)
29590 .cm_stride(7)
29591 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29592 }
29593
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)29594 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
29595 for (size_t k = 1; k <= 40; k += 9) {
29596 GemmMicrokernelTester()
29597 .mr(4)
29598 .nr(4)
29599 .kr(2)
29600 .sr(1)
29601 .m(4)
29602 .n(4)
29603 .k(k)
29604 .a_zero_point(0)
29605 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29606 }
29607 }
29608
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)29609 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
29610 for (size_t k = 1; k <= 40; k += 9) {
29611 GemmMicrokernelTester()
29612 .mr(4)
29613 .nr(4)
29614 .kr(2)
29615 .sr(1)
29616 .m(4)
29617 .n(4)
29618 .k(k)
29619 .b_zero_point(0)
29620 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29621 }
29622 }
29623
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)29624 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
29625 for (size_t k = 1; k <= 40; k += 9) {
29626 GemmMicrokernelTester()
29627 .mr(4)
29628 .nr(4)
29629 .kr(2)
29630 .sr(1)
29631 .m(4)
29632 .n(4)
29633 .k(k)
29634 .a_zero_point(0)
29635 .b_zero_point(0)
29636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29637 }
29638 }
29639 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29640
29641
29642 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)29643 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29644 GemmMicrokernelTester()
29645 .mr(4)
29646 .nr(4)
29647 .kr(2)
29648 .sr(1)
29649 .m(4)
29650 .n(4)
29651 .k(8)
29652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29653 }
29654
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)29655 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
29656 GemmMicrokernelTester()
29657 .mr(4)
29658 .nr(4)
29659 .kr(2)
29660 .sr(1)
29661 .m(4)
29662 .n(4)
29663 .k(8)
29664 .cn_stride(7)
29665 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29666 }
29667
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)29668 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
29669 GemmMicrokernelTester()
29670 .mr(4)
29671 .nr(4)
29672 .kr(2)
29673 .sr(1)
29674 .m(4)
29675 .n(4)
29676 .k(8)
29677 .a_stride(11)
29678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29679 }
29680
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29681 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29682 for (uint32_t n = 1; n <= 4; n++) {
29683 for (uint32_t m = 1; m <= 4; m++) {
29684 GemmMicrokernelTester()
29685 .mr(4)
29686 .nr(4)
29687 .kr(2)
29688 .sr(1)
29689 .m(m)
29690 .n(n)
29691 .k(8)
29692 .iterations(1)
29693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29694 }
29695 }
29696 }
29697
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29698 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29699 for (uint32_t m = 1; m <= 4; m++) {
29700 GemmMicrokernelTester()
29701 .mr(4)
29702 .nr(4)
29703 .kr(2)
29704 .sr(1)
29705 .m(m)
29706 .n(4)
29707 .k(8)
29708 .iterations(1)
29709 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29710 }
29711 }
29712
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29713 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29714 for (uint32_t n = 1; n <= 4; n++) {
29715 GemmMicrokernelTester()
29716 .mr(4)
29717 .nr(4)
29718 .kr(2)
29719 .sr(1)
29720 .m(4)
29721 .n(n)
29722 .k(8)
29723 .iterations(1)
29724 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29725 }
29726 }
29727
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)29728 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29729 for (size_t k = 1; k < 8; k++) {
29730 GemmMicrokernelTester()
29731 .mr(4)
29732 .nr(4)
29733 .kr(2)
29734 .sr(1)
29735 .m(4)
29736 .n(4)
29737 .k(k)
29738 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29739 }
29740 }
29741
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)29742 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
29743 for (size_t k = 1; k < 8; k++) {
29744 GemmMicrokernelTester()
29745 .mr(4)
29746 .nr(4)
29747 .kr(2)
29748 .sr(1)
29749 .m(4)
29750 .n(4)
29751 .k(k)
29752 .a_stride(11)
29753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29754 }
29755 }
29756
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29757 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29758 for (size_t k = 1; k < 8; k++) {
29759 for (uint32_t n = 1; n <= 4; n++) {
29760 for (uint32_t m = 1; m <= 4; m++) {
29761 GemmMicrokernelTester()
29762 .mr(4)
29763 .nr(4)
29764 .kr(2)
29765 .sr(1)
29766 .m(m)
29767 .n(n)
29768 .k(k)
29769 .iterations(1)
29770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29771 }
29772 }
29773 }
29774 }
29775
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)29776 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29777 for (size_t k = 9; k < 16; k++) {
29778 GemmMicrokernelTester()
29779 .mr(4)
29780 .nr(4)
29781 .kr(2)
29782 .sr(1)
29783 .m(4)
29784 .n(4)
29785 .k(k)
29786 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29787 }
29788 }
29789
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)29790 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
29791 for (size_t k = 9; k < 16; k++) {
29792 GemmMicrokernelTester()
29793 .mr(4)
29794 .nr(4)
29795 .kr(2)
29796 .sr(1)
29797 .m(4)
29798 .n(4)
29799 .k(k)
29800 .a_stride(19)
29801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29802 }
29803 }
29804
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29805 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29806 for (size_t k = 9; k < 16; k++) {
29807 for (uint32_t n = 1; n <= 4; n++) {
29808 for (uint32_t m = 1; m <= 4; m++) {
29809 GemmMicrokernelTester()
29810 .mr(4)
29811 .nr(4)
29812 .kr(2)
29813 .sr(1)
29814 .m(m)
29815 .n(n)
29816 .k(k)
29817 .iterations(1)
29818 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29819 }
29820 }
29821 }
29822 }
29823
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)29824 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
29825 for (size_t k = 16; k <= 80; k += 8) {
29826 GemmMicrokernelTester()
29827 .mr(4)
29828 .nr(4)
29829 .kr(2)
29830 .sr(1)
29831 .m(4)
29832 .n(4)
29833 .k(k)
29834 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29835 }
29836 }
29837
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)29838 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
29839 for (size_t k = 16; k <= 80; k += 8) {
29840 GemmMicrokernelTester()
29841 .mr(4)
29842 .nr(4)
29843 .kr(2)
29844 .sr(1)
29845 .m(4)
29846 .n(4)
29847 .k(k)
29848 .a_stride(83)
29849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29850 }
29851 }
29852
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29853 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29854 for (size_t k = 16; k <= 80; k += 8) {
29855 for (uint32_t n = 1; n <= 4; n++) {
29856 for (uint32_t m = 1; m <= 4; m++) {
29857 GemmMicrokernelTester()
29858 .mr(4)
29859 .nr(4)
29860 .kr(2)
29861 .sr(1)
29862 .m(m)
29863 .n(n)
29864 .k(k)
29865 .iterations(1)
29866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29867 }
29868 }
29869 }
29870 }
29871
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29872 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29873 for (uint32_t n = 5; n < 8; n++) {
29874 for (size_t k = 1; k <= 40; k += 9) {
29875 GemmMicrokernelTester()
29876 .mr(4)
29877 .nr(4)
29878 .kr(2)
29879 .sr(1)
29880 .m(4)
29881 .n(n)
29882 .k(k)
29883 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29884 }
29885 }
29886 }
29887
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29888 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29889 for (uint32_t n = 5; n < 8; n++) {
29890 for (size_t k = 1; k <= 40; k += 9) {
29891 GemmMicrokernelTester()
29892 .mr(4)
29893 .nr(4)
29894 .kr(2)
29895 .sr(1)
29896 .m(4)
29897 .n(n)
29898 .k(k)
29899 .cn_stride(7)
29900 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29901 }
29902 }
29903 }
29904
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)29905 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
29906 for (uint32_t n = 5; n < 8; n++) {
29907 for (size_t k = 1; k <= 40; k += 9) {
29908 GemmMicrokernelTester()
29909 .mr(4)
29910 .nr(4)
29911 .kr(2)
29912 .sr(1)
29913 .m(4)
29914 .n(n)
29915 .k(k)
29916 .a_stride(43)
29917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29918 }
29919 }
29920 }
29921
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29922 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29923 for (uint32_t n = 5; n < 8; n++) {
29924 for (size_t k = 1; k <= 40; k += 9) {
29925 for (uint32_t m = 1; m <= 4; m++) {
29926 GemmMicrokernelTester()
29927 .mr(4)
29928 .nr(4)
29929 .kr(2)
29930 .sr(1)
29931 .m(m)
29932 .n(n)
29933 .k(k)
29934 .iterations(1)
29935 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29936 }
29937 }
29938 }
29939 }
29940
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)29941 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
29942 for (uint32_t n = 8; n <= 12; n += 4) {
29943 for (size_t k = 1; k <= 40; k += 9) {
29944 GemmMicrokernelTester()
29945 .mr(4)
29946 .nr(4)
29947 .kr(2)
29948 .sr(1)
29949 .m(4)
29950 .n(n)
29951 .k(k)
29952 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29953 }
29954 }
29955 }
29956
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29957 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29958 for (uint32_t n = 8; n <= 12; n += 4) {
29959 for (size_t k = 1; k <= 40; k += 9) {
29960 GemmMicrokernelTester()
29961 .mr(4)
29962 .nr(4)
29963 .kr(2)
29964 .sr(1)
29965 .m(4)
29966 .n(n)
29967 .k(k)
29968 .cn_stride(7)
29969 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29970 }
29971 }
29972 }
29973
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)29974 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
29975 for (uint32_t n = 8; n <= 12; n += 4) {
29976 for (size_t k = 1; k <= 40; k += 9) {
29977 GemmMicrokernelTester()
29978 .mr(4)
29979 .nr(4)
29980 .kr(2)
29981 .sr(1)
29982 .m(4)
29983 .n(n)
29984 .k(k)
29985 .a_stride(43)
29986 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29987 }
29988 }
29989 }
29990
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29991 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29992 for (uint32_t n = 8; n <= 12; n += 4) {
29993 for (size_t k = 1; k <= 40; k += 9) {
29994 for (uint32_t m = 1; m <= 4; m++) {
29995 GemmMicrokernelTester()
29996 .mr(4)
29997 .nr(4)
29998 .kr(2)
29999 .sr(1)
30000 .m(m)
30001 .n(n)
30002 .k(k)
30003 .iterations(1)
30004 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30005 }
30006 }
30007 }
30008 }
30009
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30010 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30011 for (size_t k = 1; k <= 40; k += 9) {
30012 for (uint32_t n = 1; n <= 4; n++) {
30013 for (uint32_t m = 1; m <= 4; m++) {
30014 GemmMicrokernelTester()
30015 .mr(4)
30016 .nr(4)
30017 .kr(2)
30018 .sr(1)
30019 .m(m)
30020 .n(n)
30021 .k(k)
30022 .cm_stride(7)
30023 .iterations(1)
30024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30025 }
30026 }
30027 }
30028 }
30029
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)30030 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
30031 GemmMicrokernelTester()
30032 .mr(4)
30033 .nr(4)
30034 .kr(2)
30035 .sr(1)
30036 .m(4)
30037 .n(4)
30038 .k(8)
30039 .qmin(128)
30040 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30041 }
30042
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)30043 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
30044 GemmMicrokernelTester()
30045 .mr(4)
30046 .nr(4)
30047 .kr(2)
30048 .sr(1)
30049 .m(4)
30050 .n(4)
30051 .k(8)
30052 .qmax(128)
30053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30054 }
30055
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)30056 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
30057 GemmMicrokernelTester()
30058 .mr(4)
30059 .nr(4)
30060 .kr(2)
30061 .sr(1)
30062 .m(4)
30063 .n(4)
30064 .k(8)
30065 .cm_stride(7)
30066 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30067 }
30068
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30069 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30070 for (size_t k = 1; k <= 40; k += 9) {
30071 GemmMicrokernelTester()
30072 .mr(4)
30073 .nr(4)
30074 .kr(2)
30075 .sr(1)
30076 .m(4)
30077 .n(4)
30078 .k(k)
30079 .a_zero_point(0)
30080 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30081 }
30082 }
30083
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30084 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30085 for (size_t k = 1; k <= 40; k += 9) {
30086 GemmMicrokernelTester()
30087 .mr(4)
30088 .nr(4)
30089 .kr(2)
30090 .sr(1)
30091 .m(4)
30092 .n(4)
30093 .k(k)
30094 .b_zero_point(0)
30095 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30096 }
30097 }
30098
TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)30099 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30100 for (size_t k = 1; k <= 40; k += 9) {
30101 GemmMicrokernelTester()
30102 .mr(4)
30103 .nr(4)
30104 .kr(2)
30105 .sr(1)
30106 .m(4)
30107 .n(4)
30108 .k(k)
30109 .a_zero_point(0)
30110 .b_zero_point(0)
30111 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30112 }
30113 }
30114 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30115
30116
30117 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)30118 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
30119 GemmMicrokernelTester()
30120 .mr(4)
30121 .nr(4)
30122 .kr(8)
30123 .sr(1)
30124 .m(4)
30125 .n(4)
30126 .k(8)
30127 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30128 }
30129
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)30130 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
30131 GemmMicrokernelTester()
30132 .mr(4)
30133 .nr(4)
30134 .kr(8)
30135 .sr(1)
30136 .m(4)
30137 .n(4)
30138 .k(8)
30139 .cn_stride(7)
30140 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30141 }
30142
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)30143 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
30144 GemmMicrokernelTester()
30145 .mr(4)
30146 .nr(4)
30147 .kr(8)
30148 .sr(1)
30149 .m(4)
30150 .n(4)
30151 .k(8)
30152 .a_stride(11)
30153 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30154 }
30155
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30156 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30157 for (uint32_t n = 1; n <= 4; n++) {
30158 for (uint32_t m = 1; m <= 4; m++) {
30159 GemmMicrokernelTester()
30160 .mr(4)
30161 .nr(4)
30162 .kr(8)
30163 .sr(1)
30164 .m(m)
30165 .n(n)
30166 .k(8)
30167 .iterations(1)
30168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30169 }
30170 }
30171 }
30172
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30173 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30174 for (uint32_t m = 1; m <= 4; m++) {
30175 GemmMicrokernelTester()
30176 .mr(4)
30177 .nr(4)
30178 .kr(8)
30179 .sr(1)
30180 .m(m)
30181 .n(4)
30182 .k(8)
30183 .iterations(1)
30184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30185 }
30186 }
30187
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30188 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30189 for (uint32_t n = 1; n <= 4; n++) {
30190 GemmMicrokernelTester()
30191 .mr(4)
30192 .nr(4)
30193 .kr(8)
30194 .sr(1)
30195 .m(4)
30196 .n(n)
30197 .k(8)
30198 .iterations(1)
30199 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30200 }
30201 }
30202
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)30203 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30204 for (size_t k = 1; k < 8; k++) {
30205 GemmMicrokernelTester()
30206 .mr(4)
30207 .nr(4)
30208 .kr(8)
30209 .sr(1)
30210 .m(4)
30211 .n(4)
30212 .k(k)
30213 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30214 }
30215 }
30216
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)30217 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
30218 for (size_t k = 1; k < 8; k++) {
30219 GemmMicrokernelTester()
30220 .mr(4)
30221 .nr(4)
30222 .kr(8)
30223 .sr(1)
30224 .m(4)
30225 .n(4)
30226 .k(k)
30227 .a_stride(11)
30228 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30229 }
30230 }
30231
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30232 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30233 for (size_t k = 1; k < 8; k++) {
30234 for (uint32_t n = 1; n <= 4; n++) {
30235 for (uint32_t m = 1; m <= 4; m++) {
30236 GemmMicrokernelTester()
30237 .mr(4)
30238 .nr(4)
30239 .kr(8)
30240 .sr(1)
30241 .m(m)
30242 .n(n)
30243 .k(k)
30244 .iterations(1)
30245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30246 }
30247 }
30248 }
30249 }
30250
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)30251 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
30252 for (size_t k = 9; k < 16; k++) {
30253 GemmMicrokernelTester()
30254 .mr(4)
30255 .nr(4)
30256 .kr(8)
30257 .sr(1)
30258 .m(4)
30259 .n(4)
30260 .k(k)
30261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30262 }
30263 }
30264
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)30265 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
30266 for (size_t k = 9; k < 16; k++) {
30267 GemmMicrokernelTester()
30268 .mr(4)
30269 .nr(4)
30270 .kr(8)
30271 .sr(1)
30272 .m(4)
30273 .n(4)
30274 .k(k)
30275 .a_stride(19)
30276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30277 }
30278 }
30279
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)30280 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
30281 for (size_t k = 9; k < 16; k++) {
30282 for (uint32_t n = 1; n <= 4; n++) {
30283 for (uint32_t m = 1; m <= 4; m++) {
30284 GemmMicrokernelTester()
30285 .mr(4)
30286 .nr(4)
30287 .kr(8)
30288 .sr(1)
30289 .m(m)
30290 .n(n)
30291 .k(k)
30292 .iterations(1)
30293 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30294 }
30295 }
30296 }
30297 }
30298
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)30299 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
30300 for (size_t k = 16; k <= 80; k += 8) {
30301 GemmMicrokernelTester()
30302 .mr(4)
30303 .nr(4)
30304 .kr(8)
30305 .sr(1)
30306 .m(4)
30307 .n(4)
30308 .k(k)
30309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30310 }
30311 }
30312
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)30313 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
30314 for (size_t k = 16; k <= 80; k += 8) {
30315 GemmMicrokernelTester()
30316 .mr(4)
30317 .nr(4)
30318 .kr(8)
30319 .sr(1)
30320 .m(4)
30321 .n(4)
30322 .k(k)
30323 .a_stride(83)
30324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30325 }
30326 }
30327
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)30328 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
30329 for (size_t k = 16; k <= 80; k += 8) {
30330 for (uint32_t n = 1; n <= 4; n++) {
30331 for (uint32_t m = 1; m <= 4; m++) {
30332 GemmMicrokernelTester()
30333 .mr(4)
30334 .nr(4)
30335 .kr(8)
30336 .sr(1)
30337 .m(m)
30338 .n(n)
30339 .k(k)
30340 .iterations(1)
30341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30342 }
30343 }
30344 }
30345 }
30346
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)30347 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
30348 for (uint32_t n = 5; n < 8; n++) {
30349 for (size_t k = 1; k <= 40; k += 9) {
30350 GemmMicrokernelTester()
30351 .mr(4)
30352 .nr(4)
30353 .kr(8)
30354 .sr(1)
30355 .m(4)
30356 .n(n)
30357 .k(k)
30358 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30359 }
30360 }
30361 }
30362
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)30363 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
30364 for (uint32_t n = 5; n < 8; n++) {
30365 for (size_t k = 1; k <= 40; k += 9) {
30366 GemmMicrokernelTester()
30367 .mr(4)
30368 .nr(4)
30369 .kr(8)
30370 .sr(1)
30371 .m(4)
30372 .n(n)
30373 .k(k)
30374 .cn_stride(7)
30375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30376 }
30377 }
30378 }
30379
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)30380 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
30381 for (uint32_t n = 5; n < 8; n++) {
30382 for (size_t k = 1; k <= 40; k += 9) {
30383 GemmMicrokernelTester()
30384 .mr(4)
30385 .nr(4)
30386 .kr(8)
30387 .sr(1)
30388 .m(4)
30389 .n(n)
30390 .k(k)
30391 .a_stride(43)
30392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30393 }
30394 }
30395 }
30396
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30397 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30398 for (uint32_t n = 5; n < 8; n++) {
30399 for (size_t k = 1; k <= 40; k += 9) {
30400 for (uint32_t m = 1; m <= 4; m++) {
30401 GemmMicrokernelTester()
30402 .mr(4)
30403 .nr(4)
30404 .kr(8)
30405 .sr(1)
30406 .m(m)
30407 .n(n)
30408 .k(k)
30409 .iterations(1)
30410 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30411 }
30412 }
30413 }
30414 }
30415
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)30416 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
30417 for (uint32_t n = 8; n <= 12; n += 4) {
30418 for (size_t k = 1; k <= 40; k += 9) {
30419 GemmMicrokernelTester()
30420 .mr(4)
30421 .nr(4)
30422 .kr(8)
30423 .sr(1)
30424 .m(4)
30425 .n(n)
30426 .k(k)
30427 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30428 }
30429 }
30430 }
30431
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30432 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30433 for (uint32_t n = 8; n <= 12; n += 4) {
30434 for (size_t k = 1; k <= 40; k += 9) {
30435 GemmMicrokernelTester()
30436 .mr(4)
30437 .nr(4)
30438 .kr(8)
30439 .sr(1)
30440 .m(4)
30441 .n(n)
30442 .k(k)
30443 .cn_stride(7)
30444 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30445 }
30446 }
30447 }
30448
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)30449 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
30450 for (uint32_t n = 8; n <= 12; n += 4) {
30451 for (size_t k = 1; k <= 40; k += 9) {
30452 GemmMicrokernelTester()
30453 .mr(4)
30454 .nr(4)
30455 .kr(8)
30456 .sr(1)
30457 .m(4)
30458 .n(n)
30459 .k(k)
30460 .a_stride(43)
30461 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30462 }
30463 }
30464 }
30465
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30466 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30467 for (uint32_t n = 8; n <= 12; n += 4) {
30468 for (size_t k = 1; k <= 40; k += 9) {
30469 for (uint32_t m = 1; m <= 4; m++) {
30470 GemmMicrokernelTester()
30471 .mr(4)
30472 .nr(4)
30473 .kr(8)
30474 .sr(1)
30475 .m(m)
30476 .n(n)
30477 .k(k)
30478 .iterations(1)
30479 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30480 }
30481 }
30482 }
30483 }
30484
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30485 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30486 for (size_t k = 1; k <= 40; k += 9) {
30487 for (uint32_t n = 1; n <= 4; n++) {
30488 for (uint32_t m = 1; m <= 4; m++) {
30489 GemmMicrokernelTester()
30490 .mr(4)
30491 .nr(4)
30492 .kr(8)
30493 .sr(1)
30494 .m(m)
30495 .n(n)
30496 .k(k)
30497 .cm_stride(7)
30498 .iterations(1)
30499 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30500 }
30501 }
30502 }
30503 }
30504
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)30505 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
30506 GemmMicrokernelTester()
30507 .mr(4)
30508 .nr(4)
30509 .kr(8)
30510 .sr(1)
30511 .m(4)
30512 .n(4)
30513 .k(8)
30514 .qmin(128)
30515 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30516 }
30517
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)30518 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
30519 GemmMicrokernelTester()
30520 .mr(4)
30521 .nr(4)
30522 .kr(8)
30523 .sr(1)
30524 .m(4)
30525 .n(4)
30526 .k(8)
30527 .qmax(128)
30528 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30529 }
30530
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)30531 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
30532 GemmMicrokernelTester()
30533 .mr(4)
30534 .nr(4)
30535 .kr(8)
30536 .sr(1)
30537 .m(4)
30538 .n(4)
30539 .k(8)
30540 .cm_stride(7)
30541 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30542 }
30543
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30544 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30545 for (size_t k = 1; k <= 40; k += 9) {
30546 GemmMicrokernelTester()
30547 .mr(4)
30548 .nr(4)
30549 .kr(8)
30550 .sr(1)
30551 .m(4)
30552 .n(4)
30553 .k(k)
30554 .a_zero_point(0)
30555 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30556 }
30557 }
30558
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30559 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30560 for (size_t k = 1; k <= 40; k += 9) {
30561 GemmMicrokernelTester()
30562 .mr(4)
30563 .nr(4)
30564 .kr(8)
30565 .sr(1)
30566 .m(4)
30567 .n(4)
30568 .k(k)
30569 .b_zero_point(0)
30570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30571 }
30572 }
30573
TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)30574 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30575 for (size_t k = 1; k <= 40; k += 9) {
30576 GemmMicrokernelTester()
30577 .mr(4)
30578 .nr(4)
30579 .kr(8)
30580 .sr(1)
30581 .m(4)
30582 .n(4)
30583 .k(k)
30584 .a_zero_point(0)
30585 .b_zero_point(0)
30586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30587 }
30588 }
30589 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30590
30591
30592 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)30593 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
30594 GemmMicrokernelTester()
30595 .mr(1)
30596 .nr(2)
30597 .kr(1)
30598 .sr(1)
30599 .m(1)
30600 .n(2)
30601 .k(1)
30602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30603 }
30604
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)30605 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
30606 GemmMicrokernelTester()
30607 .mr(1)
30608 .nr(2)
30609 .kr(1)
30610 .sr(1)
30611 .m(1)
30612 .n(2)
30613 .k(1)
30614 .cn_stride(5)
30615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30616 }
30617
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_strided_a)30618 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_strided_a) {
30619 GemmMicrokernelTester()
30620 .mr(1)
30621 .nr(2)
30622 .kr(1)
30623 .sr(1)
30624 .m(1)
30625 .n(2)
30626 .k(1)
30627 .a_stride(3)
30628 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30629 }
30630
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)30631 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
30632 for (uint32_t n = 1; n <= 2; n++) {
30633 for (uint32_t m = 1; m <= 1; m++) {
30634 GemmMicrokernelTester()
30635 .mr(1)
30636 .nr(2)
30637 .kr(1)
30638 .sr(1)
30639 .m(m)
30640 .n(n)
30641 .k(1)
30642 .iterations(1)
30643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30644 }
30645 }
30646 }
30647
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)30648 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
30649 for (uint32_t m = 1; m <= 1; m++) {
30650 GemmMicrokernelTester()
30651 .mr(1)
30652 .nr(2)
30653 .kr(1)
30654 .sr(1)
30655 .m(m)
30656 .n(2)
30657 .k(1)
30658 .iterations(1)
30659 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30660 }
30661 }
30662
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)30663 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
30664 for (uint32_t n = 1; n <= 2; n++) {
30665 GemmMicrokernelTester()
30666 .mr(1)
30667 .nr(2)
30668 .kr(1)
30669 .sr(1)
30670 .m(1)
30671 .n(n)
30672 .k(1)
30673 .iterations(1)
30674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30675 }
30676 }
30677
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)30678 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
30679 for (size_t k = 2; k < 10; k++) {
30680 GemmMicrokernelTester()
30681 .mr(1)
30682 .nr(2)
30683 .kr(1)
30684 .sr(1)
30685 .m(1)
30686 .n(2)
30687 .k(k)
30688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30689 }
30690 }
30691
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_strided_a)30692 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_strided_a) {
30693 for (size_t k = 2; k < 10; k++) {
30694 GemmMicrokernelTester()
30695 .mr(1)
30696 .nr(2)
30697 .kr(1)
30698 .sr(1)
30699 .m(1)
30700 .n(2)
30701 .k(k)
30702 .a_stride(11)
30703 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30704 }
30705 }
30706
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)30707 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
30708 for (size_t k = 2; k < 10; k++) {
30709 for (uint32_t n = 1; n <= 2; n++) {
30710 for (uint32_t m = 1; m <= 1; m++) {
30711 GemmMicrokernelTester()
30712 .mr(1)
30713 .nr(2)
30714 .kr(1)
30715 .sr(1)
30716 .m(m)
30717 .n(n)
30718 .k(k)
30719 .iterations(1)
30720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30721 }
30722 }
30723 }
30724 }
30725
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)30726 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
30727 for (uint32_t n = 3; n < 4; n++) {
30728 for (size_t k = 1; k <= 5; k += 2) {
30729 GemmMicrokernelTester()
30730 .mr(1)
30731 .nr(2)
30732 .kr(1)
30733 .sr(1)
30734 .m(1)
30735 .n(n)
30736 .k(k)
30737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30738 }
30739 }
30740 }
30741
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)30742 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
30743 for (uint32_t n = 3; n < 4; n++) {
30744 for (size_t k = 1; k <= 5; k += 2) {
30745 GemmMicrokernelTester()
30746 .mr(1)
30747 .nr(2)
30748 .kr(1)
30749 .sr(1)
30750 .m(1)
30751 .n(n)
30752 .k(k)
30753 .cn_stride(5)
30754 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30755 }
30756 }
30757 }
30758
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_a)30759 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_a) {
30760 for (uint32_t n = 3; n < 4; n++) {
30761 for (size_t k = 1; k <= 5; k += 2) {
30762 GemmMicrokernelTester()
30763 .mr(1)
30764 .nr(2)
30765 .kr(1)
30766 .sr(1)
30767 .m(1)
30768 .n(n)
30769 .k(k)
30770 .a_stride(7)
30771 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30772 }
30773 }
30774 }
30775
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)30776 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
30777 for (uint32_t n = 3; n < 4; n++) {
30778 for (size_t k = 1; k <= 5; k += 2) {
30779 for (uint32_t m = 1; m <= 1; m++) {
30780 GemmMicrokernelTester()
30781 .mr(1)
30782 .nr(2)
30783 .kr(1)
30784 .sr(1)
30785 .m(m)
30786 .n(n)
30787 .k(k)
30788 .iterations(1)
30789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30790 }
30791 }
30792 }
30793 }
30794
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)30795 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
30796 for (uint32_t n = 4; n <= 6; n += 2) {
30797 for (size_t k = 1; k <= 5; k += 2) {
30798 GemmMicrokernelTester()
30799 .mr(1)
30800 .nr(2)
30801 .kr(1)
30802 .sr(1)
30803 .m(1)
30804 .n(n)
30805 .k(k)
30806 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30807 }
30808 }
30809 }
30810
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)30811 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
30812 for (uint32_t n = 4; n <= 6; n += 2) {
30813 for (size_t k = 1; k <= 5; k += 2) {
30814 GemmMicrokernelTester()
30815 .mr(1)
30816 .nr(2)
30817 .kr(1)
30818 .sr(1)
30819 .m(1)
30820 .n(n)
30821 .k(k)
30822 .cn_stride(5)
30823 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30824 }
30825 }
30826 }
30827
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_a)30828 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_a) {
30829 for (uint32_t n = 4; n <= 6; n += 2) {
30830 for (size_t k = 1; k <= 5; k += 2) {
30831 GemmMicrokernelTester()
30832 .mr(1)
30833 .nr(2)
30834 .kr(1)
30835 .sr(1)
30836 .m(1)
30837 .n(n)
30838 .k(k)
30839 .a_stride(7)
30840 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30841 }
30842 }
30843 }
30844
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)30845 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
30846 for (uint32_t n = 4; n <= 6; n += 2) {
30847 for (size_t k = 1; k <= 5; k += 2) {
30848 for (uint32_t m = 1; m <= 1; m++) {
30849 GemmMicrokernelTester()
30850 .mr(1)
30851 .nr(2)
30852 .kr(1)
30853 .sr(1)
30854 .m(m)
30855 .n(n)
30856 .k(k)
30857 .iterations(1)
30858 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30859 }
30860 }
30861 }
30862 }
30863
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)30864 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
30865 for (size_t k = 1; k <= 5; k += 2) {
30866 for (uint32_t n = 1; n <= 2; n++) {
30867 for (uint32_t m = 1; m <= 1; m++) {
30868 GemmMicrokernelTester()
30869 .mr(1)
30870 .nr(2)
30871 .kr(1)
30872 .sr(1)
30873 .m(m)
30874 .n(n)
30875 .k(k)
30876 .cm_stride(5)
30877 .iterations(1)
30878 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30879 }
30880 }
30881 }
30882 }
30883
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)30884 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
30885 GemmMicrokernelTester()
30886 .mr(1)
30887 .nr(2)
30888 .kr(1)
30889 .sr(1)
30890 .m(1)
30891 .n(2)
30892 .k(1)
30893 .qmin(128)
30894 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30895 }
30896
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)30897 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
30898 GemmMicrokernelTester()
30899 .mr(1)
30900 .nr(2)
30901 .kr(1)
30902 .sr(1)
30903 .m(1)
30904 .n(2)
30905 .k(1)
30906 .qmax(128)
30907 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30908 }
30909
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)30910 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
30911 GemmMicrokernelTester()
30912 .mr(1)
30913 .nr(2)
30914 .kr(1)
30915 .sr(1)
30916 .m(1)
30917 .n(2)
30918 .k(1)
30919 .cm_stride(5)
30920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30921 }
30922
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_a_zero_point)30923 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_a_zero_point) {
30924 for (size_t k = 1; k <= 5; k += 2) {
30925 GemmMicrokernelTester()
30926 .mr(1)
30927 .nr(2)
30928 .kr(1)
30929 .sr(1)
30930 .m(1)
30931 .n(2)
30932 .k(k)
30933 .a_zero_point(0)
30934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30935 }
30936 }
30937
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_b_zero_point)30938 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_b_zero_point) {
30939 for (size_t k = 1; k <= 5; k += 2) {
30940 GemmMicrokernelTester()
30941 .mr(1)
30942 .nr(2)
30943 .kr(1)
30944 .sr(1)
30945 .m(1)
30946 .n(2)
30947 .k(k)
30948 .b_zero_point(0)
30949 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30950 }
30951 }
30952
TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_zero_point)30953 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_zero_point) {
30954 for (size_t k = 1; k <= 5; k += 2) {
30955 GemmMicrokernelTester()
30956 .mr(1)
30957 .nr(2)
30958 .kr(1)
30959 .sr(1)
30960 .m(1)
30961 .n(2)
30962 .k(k)
30963 .a_zero_point(0)
30964 .b_zero_point(0)
30965 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30966 }
30967 }
30968 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30969
30970
30971 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)30972 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
30973 GemmMicrokernelTester()
30974 .mr(1)
30975 .nr(4)
30976 .kr(1)
30977 .sr(1)
30978 .m(1)
30979 .n(4)
30980 .k(1)
30981 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30982 }
30983
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)30984 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
30985 GemmMicrokernelTester()
30986 .mr(1)
30987 .nr(4)
30988 .kr(1)
30989 .sr(1)
30990 .m(1)
30991 .n(4)
30992 .k(1)
30993 .cn_stride(7)
30994 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
30995 }
30996
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_strided_a)30997 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_strided_a) {
30998 GemmMicrokernelTester()
30999 .mr(1)
31000 .nr(4)
31001 .kr(1)
31002 .sr(1)
31003 .m(1)
31004 .n(4)
31005 .k(1)
31006 .a_stride(3)
31007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31008 }
31009
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)31010 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
31011 for (uint32_t n = 1; n <= 4; n++) {
31012 for (uint32_t m = 1; m <= 1; m++) {
31013 GemmMicrokernelTester()
31014 .mr(1)
31015 .nr(4)
31016 .kr(1)
31017 .sr(1)
31018 .m(m)
31019 .n(n)
31020 .k(1)
31021 .iterations(1)
31022 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31023 }
31024 }
31025 }
31026
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)31027 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
31028 for (uint32_t m = 1; m <= 1; m++) {
31029 GemmMicrokernelTester()
31030 .mr(1)
31031 .nr(4)
31032 .kr(1)
31033 .sr(1)
31034 .m(m)
31035 .n(4)
31036 .k(1)
31037 .iterations(1)
31038 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31039 }
31040 }
31041
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)31042 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
31043 for (uint32_t n = 1; n <= 4; n++) {
31044 GemmMicrokernelTester()
31045 .mr(1)
31046 .nr(4)
31047 .kr(1)
31048 .sr(1)
31049 .m(1)
31050 .n(n)
31051 .k(1)
31052 .iterations(1)
31053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31054 }
31055 }
31056
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)31057 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
31058 for (size_t k = 2; k < 10; k++) {
31059 GemmMicrokernelTester()
31060 .mr(1)
31061 .nr(4)
31062 .kr(1)
31063 .sr(1)
31064 .m(1)
31065 .n(4)
31066 .k(k)
31067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31068 }
31069 }
31070
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_strided_a)31071 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_strided_a) {
31072 for (size_t k = 2; k < 10; k++) {
31073 GemmMicrokernelTester()
31074 .mr(1)
31075 .nr(4)
31076 .kr(1)
31077 .sr(1)
31078 .m(1)
31079 .n(4)
31080 .k(k)
31081 .a_stride(11)
31082 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31083 }
31084 }
31085
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)31086 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
31087 for (size_t k = 2; k < 10; k++) {
31088 for (uint32_t n = 1; n <= 4; n++) {
31089 for (uint32_t m = 1; m <= 1; m++) {
31090 GemmMicrokernelTester()
31091 .mr(1)
31092 .nr(4)
31093 .kr(1)
31094 .sr(1)
31095 .m(m)
31096 .n(n)
31097 .k(k)
31098 .iterations(1)
31099 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31100 }
31101 }
31102 }
31103 }
31104
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)31105 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
31106 for (uint32_t n = 5; n < 8; n++) {
31107 for (size_t k = 1; k <= 5; k += 2) {
31108 GemmMicrokernelTester()
31109 .mr(1)
31110 .nr(4)
31111 .kr(1)
31112 .sr(1)
31113 .m(1)
31114 .n(n)
31115 .k(k)
31116 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31117 }
31118 }
31119 }
31120
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)31121 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
31122 for (uint32_t n = 5; n < 8; n++) {
31123 for (size_t k = 1; k <= 5; k += 2) {
31124 GemmMicrokernelTester()
31125 .mr(1)
31126 .nr(4)
31127 .kr(1)
31128 .sr(1)
31129 .m(1)
31130 .n(n)
31131 .k(k)
31132 .cn_stride(7)
31133 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31134 }
31135 }
31136 }
31137
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_a)31138 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_a) {
31139 for (uint32_t n = 5; n < 8; n++) {
31140 for (size_t k = 1; k <= 5; k += 2) {
31141 GemmMicrokernelTester()
31142 .mr(1)
31143 .nr(4)
31144 .kr(1)
31145 .sr(1)
31146 .m(1)
31147 .n(n)
31148 .k(k)
31149 .a_stride(7)
31150 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31151 }
31152 }
31153 }
31154
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)31155 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
31156 for (uint32_t n = 5; n < 8; n++) {
31157 for (size_t k = 1; k <= 5; k += 2) {
31158 for (uint32_t m = 1; m <= 1; m++) {
31159 GemmMicrokernelTester()
31160 .mr(1)
31161 .nr(4)
31162 .kr(1)
31163 .sr(1)
31164 .m(m)
31165 .n(n)
31166 .k(k)
31167 .iterations(1)
31168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31169 }
31170 }
31171 }
31172 }
31173
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)31174 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
31175 for (uint32_t n = 8; n <= 12; n += 4) {
31176 for (size_t k = 1; k <= 5; k += 2) {
31177 GemmMicrokernelTester()
31178 .mr(1)
31179 .nr(4)
31180 .kr(1)
31181 .sr(1)
31182 .m(1)
31183 .n(n)
31184 .k(k)
31185 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31186 }
31187 }
31188 }
31189
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)31190 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
31191 for (uint32_t n = 8; n <= 12; n += 4) {
31192 for (size_t k = 1; k <= 5; k += 2) {
31193 GemmMicrokernelTester()
31194 .mr(1)
31195 .nr(4)
31196 .kr(1)
31197 .sr(1)
31198 .m(1)
31199 .n(n)
31200 .k(k)
31201 .cn_stride(7)
31202 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31203 }
31204 }
31205 }
31206
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_a)31207 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_a) {
31208 for (uint32_t n = 8; n <= 12; n += 4) {
31209 for (size_t k = 1; k <= 5; k += 2) {
31210 GemmMicrokernelTester()
31211 .mr(1)
31212 .nr(4)
31213 .kr(1)
31214 .sr(1)
31215 .m(1)
31216 .n(n)
31217 .k(k)
31218 .a_stride(7)
31219 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31220 }
31221 }
31222 }
31223
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)31224 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
31225 for (uint32_t n = 8; n <= 12; n += 4) {
31226 for (size_t k = 1; k <= 5; k += 2) {
31227 for (uint32_t m = 1; m <= 1; m++) {
31228 GemmMicrokernelTester()
31229 .mr(1)
31230 .nr(4)
31231 .kr(1)
31232 .sr(1)
31233 .m(m)
31234 .n(n)
31235 .k(k)
31236 .iterations(1)
31237 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31238 }
31239 }
31240 }
31241 }
31242
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)31243 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
31244 for (size_t k = 1; k <= 5; k += 2) {
31245 for (uint32_t n = 1; n <= 4; n++) {
31246 for (uint32_t m = 1; m <= 1; m++) {
31247 GemmMicrokernelTester()
31248 .mr(1)
31249 .nr(4)
31250 .kr(1)
31251 .sr(1)
31252 .m(m)
31253 .n(n)
31254 .k(k)
31255 .cm_stride(7)
31256 .iterations(1)
31257 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31258 }
31259 }
31260 }
31261 }
31262
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)31263 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
31264 GemmMicrokernelTester()
31265 .mr(1)
31266 .nr(4)
31267 .kr(1)
31268 .sr(1)
31269 .m(1)
31270 .n(4)
31271 .k(1)
31272 .qmin(128)
31273 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31274 }
31275
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)31276 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
31277 GemmMicrokernelTester()
31278 .mr(1)
31279 .nr(4)
31280 .kr(1)
31281 .sr(1)
31282 .m(1)
31283 .n(4)
31284 .k(1)
31285 .qmax(128)
31286 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31287 }
31288
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)31289 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
31290 GemmMicrokernelTester()
31291 .mr(1)
31292 .nr(4)
31293 .kr(1)
31294 .sr(1)
31295 .m(1)
31296 .n(4)
31297 .k(1)
31298 .cm_stride(7)
31299 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31300 }
31301
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_a_zero_point)31302 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_a_zero_point) {
31303 for (size_t k = 1; k <= 5; k += 2) {
31304 GemmMicrokernelTester()
31305 .mr(1)
31306 .nr(4)
31307 .kr(1)
31308 .sr(1)
31309 .m(1)
31310 .n(4)
31311 .k(k)
31312 .a_zero_point(0)
31313 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31314 }
31315 }
31316
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_b_zero_point)31317 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_b_zero_point) {
31318 for (size_t k = 1; k <= 5; k += 2) {
31319 GemmMicrokernelTester()
31320 .mr(1)
31321 .nr(4)
31322 .kr(1)
31323 .sr(1)
31324 .m(1)
31325 .n(4)
31326 .k(k)
31327 .b_zero_point(0)
31328 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31329 }
31330 }
31331
TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_zero_point)31332 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_zero_point) {
31333 for (size_t k = 1; k <= 5; k += 2) {
31334 GemmMicrokernelTester()
31335 .mr(1)
31336 .nr(4)
31337 .kr(1)
31338 .sr(1)
31339 .m(1)
31340 .n(4)
31341 .k(k)
31342 .a_zero_point(0)
31343 .b_zero_point(0)
31344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31345 }
31346 }
31347 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31348
31349
31350 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)31351 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
31352 GemmMicrokernelTester()
31353 .mr(2)
31354 .nr(2)
31355 .kr(1)
31356 .sr(1)
31357 .m(2)
31358 .n(2)
31359 .k(1)
31360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31361 }
31362
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)31363 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
31364 GemmMicrokernelTester()
31365 .mr(2)
31366 .nr(2)
31367 .kr(1)
31368 .sr(1)
31369 .m(2)
31370 .n(2)
31371 .k(1)
31372 .cn_stride(5)
31373 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31374 }
31375
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_strided_a)31376 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_strided_a) {
31377 GemmMicrokernelTester()
31378 .mr(2)
31379 .nr(2)
31380 .kr(1)
31381 .sr(1)
31382 .m(2)
31383 .n(2)
31384 .k(1)
31385 .a_stride(3)
31386 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31387 }
31388
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)31389 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
31390 for (uint32_t n = 1; n <= 2; n++) {
31391 for (uint32_t m = 1; m <= 2; m++) {
31392 GemmMicrokernelTester()
31393 .mr(2)
31394 .nr(2)
31395 .kr(1)
31396 .sr(1)
31397 .m(m)
31398 .n(n)
31399 .k(1)
31400 .iterations(1)
31401 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31402 }
31403 }
31404 }
31405
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)31406 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
31407 for (uint32_t m = 1; m <= 2; m++) {
31408 GemmMicrokernelTester()
31409 .mr(2)
31410 .nr(2)
31411 .kr(1)
31412 .sr(1)
31413 .m(m)
31414 .n(2)
31415 .k(1)
31416 .iterations(1)
31417 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31418 }
31419 }
31420
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)31421 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
31422 for (uint32_t n = 1; n <= 2; n++) {
31423 GemmMicrokernelTester()
31424 .mr(2)
31425 .nr(2)
31426 .kr(1)
31427 .sr(1)
31428 .m(2)
31429 .n(n)
31430 .k(1)
31431 .iterations(1)
31432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31433 }
31434 }
31435
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)31436 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
31437 for (size_t k = 2; k < 10; k++) {
31438 GemmMicrokernelTester()
31439 .mr(2)
31440 .nr(2)
31441 .kr(1)
31442 .sr(1)
31443 .m(2)
31444 .n(2)
31445 .k(k)
31446 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31447 }
31448 }
31449
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_strided_a)31450 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_strided_a) {
31451 for (size_t k = 2; k < 10; k++) {
31452 GemmMicrokernelTester()
31453 .mr(2)
31454 .nr(2)
31455 .kr(1)
31456 .sr(1)
31457 .m(2)
31458 .n(2)
31459 .k(k)
31460 .a_stride(11)
31461 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31462 }
31463 }
31464
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)31465 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
31466 for (size_t k = 2; k < 10; k++) {
31467 for (uint32_t n = 1; n <= 2; n++) {
31468 for (uint32_t m = 1; m <= 2; m++) {
31469 GemmMicrokernelTester()
31470 .mr(2)
31471 .nr(2)
31472 .kr(1)
31473 .sr(1)
31474 .m(m)
31475 .n(n)
31476 .k(k)
31477 .iterations(1)
31478 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31479 }
31480 }
31481 }
31482 }
31483
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)31484 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
31485 for (uint32_t n = 3; n < 4; n++) {
31486 for (size_t k = 1; k <= 5; k += 2) {
31487 GemmMicrokernelTester()
31488 .mr(2)
31489 .nr(2)
31490 .kr(1)
31491 .sr(1)
31492 .m(2)
31493 .n(n)
31494 .k(k)
31495 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31496 }
31497 }
31498 }
31499
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)31500 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
31501 for (uint32_t n = 3; n < 4; n++) {
31502 for (size_t k = 1; k <= 5; k += 2) {
31503 GemmMicrokernelTester()
31504 .mr(2)
31505 .nr(2)
31506 .kr(1)
31507 .sr(1)
31508 .m(2)
31509 .n(n)
31510 .k(k)
31511 .cn_stride(5)
31512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31513 }
31514 }
31515 }
31516
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_a)31517 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_a) {
31518 for (uint32_t n = 3; n < 4; n++) {
31519 for (size_t k = 1; k <= 5; k += 2) {
31520 GemmMicrokernelTester()
31521 .mr(2)
31522 .nr(2)
31523 .kr(1)
31524 .sr(1)
31525 .m(2)
31526 .n(n)
31527 .k(k)
31528 .a_stride(7)
31529 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31530 }
31531 }
31532 }
31533
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)31534 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
31535 for (uint32_t n = 3; n < 4; n++) {
31536 for (size_t k = 1; k <= 5; k += 2) {
31537 for (uint32_t m = 1; m <= 2; m++) {
31538 GemmMicrokernelTester()
31539 .mr(2)
31540 .nr(2)
31541 .kr(1)
31542 .sr(1)
31543 .m(m)
31544 .n(n)
31545 .k(k)
31546 .iterations(1)
31547 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31548 }
31549 }
31550 }
31551 }
31552
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)31553 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
31554 for (uint32_t n = 4; n <= 6; n += 2) {
31555 for (size_t k = 1; k <= 5; k += 2) {
31556 GemmMicrokernelTester()
31557 .mr(2)
31558 .nr(2)
31559 .kr(1)
31560 .sr(1)
31561 .m(2)
31562 .n(n)
31563 .k(k)
31564 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31565 }
31566 }
31567 }
31568
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)31569 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
31570 for (uint32_t n = 4; n <= 6; n += 2) {
31571 for (size_t k = 1; k <= 5; k += 2) {
31572 GemmMicrokernelTester()
31573 .mr(2)
31574 .nr(2)
31575 .kr(1)
31576 .sr(1)
31577 .m(2)
31578 .n(n)
31579 .k(k)
31580 .cn_stride(5)
31581 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31582 }
31583 }
31584 }
31585
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_a)31586 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_a) {
31587 for (uint32_t n = 4; n <= 6; n += 2) {
31588 for (size_t k = 1; k <= 5; k += 2) {
31589 GemmMicrokernelTester()
31590 .mr(2)
31591 .nr(2)
31592 .kr(1)
31593 .sr(1)
31594 .m(2)
31595 .n(n)
31596 .k(k)
31597 .a_stride(7)
31598 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31599 }
31600 }
31601 }
31602
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)31603 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
31604 for (uint32_t n = 4; n <= 6; n += 2) {
31605 for (size_t k = 1; k <= 5; k += 2) {
31606 for (uint32_t m = 1; m <= 2; m++) {
31607 GemmMicrokernelTester()
31608 .mr(2)
31609 .nr(2)
31610 .kr(1)
31611 .sr(1)
31612 .m(m)
31613 .n(n)
31614 .k(k)
31615 .iterations(1)
31616 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31617 }
31618 }
31619 }
31620 }
31621
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)31622 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
31623 for (size_t k = 1; k <= 5; k += 2) {
31624 for (uint32_t n = 1; n <= 2; n++) {
31625 for (uint32_t m = 1; m <= 2; m++) {
31626 GemmMicrokernelTester()
31627 .mr(2)
31628 .nr(2)
31629 .kr(1)
31630 .sr(1)
31631 .m(m)
31632 .n(n)
31633 .k(k)
31634 .cm_stride(5)
31635 .iterations(1)
31636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31637 }
31638 }
31639 }
31640 }
31641
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)31642 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
31643 GemmMicrokernelTester()
31644 .mr(2)
31645 .nr(2)
31646 .kr(1)
31647 .sr(1)
31648 .m(2)
31649 .n(2)
31650 .k(1)
31651 .qmin(128)
31652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31653 }
31654
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)31655 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
31656 GemmMicrokernelTester()
31657 .mr(2)
31658 .nr(2)
31659 .kr(1)
31660 .sr(1)
31661 .m(2)
31662 .n(2)
31663 .k(1)
31664 .qmax(128)
31665 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31666 }
31667
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)31668 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
31669 GemmMicrokernelTester()
31670 .mr(2)
31671 .nr(2)
31672 .kr(1)
31673 .sr(1)
31674 .m(2)
31675 .n(2)
31676 .k(1)
31677 .cm_stride(5)
31678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31679 }
31680
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_a_zero_point)31681 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_a_zero_point) {
31682 for (size_t k = 1; k <= 5; k += 2) {
31683 GemmMicrokernelTester()
31684 .mr(2)
31685 .nr(2)
31686 .kr(1)
31687 .sr(1)
31688 .m(2)
31689 .n(2)
31690 .k(k)
31691 .a_zero_point(0)
31692 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31693 }
31694 }
31695
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_b_zero_point)31696 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_b_zero_point) {
31697 for (size_t k = 1; k <= 5; k += 2) {
31698 GemmMicrokernelTester()
31699 .mr(2)
31700 .nr(2)
31701 .kr(1)
31702 .sr(1)
31703 .m(2)
31704 .n(2)
31705 .k(k)
31706 .b_zero_point(0)
31707 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31708 }
31709 }
31710
TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_zero_point)31711 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_zero_point) {
31712 for (size_t k = 1; k <= 5; k += 2) {
31713 GemmMicrokernelTester()
31714 .mr(2)
31715 .nr(2)
31716 .kr(1)
31717 .sr(1)
31718 .m(2)
31719 .n(2)
31720 .k(k)
31721 .a_zero_point(0)
31722 .b_zero_point(0)
31723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31724 }
31725 }
31726 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31727
31728
31729 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)31730 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
31731 GemmMicrokernelTester()
31732 .mr(2)
31733 .nr(4)
31734 .kr(1)
31735 .sr(1)
31736 .m(2)
31737 .n(4)
31738 .k(1)
31739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31740 }
31741
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)31742 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
31743 GemmMicrokernelTester()
31744 .mr(2)
31745 .nr(4)
31746 .kr(1)
31747 .sr(1)
31748 .m(2)
31749 .n(4)
31750 .k(1)
31751 .cn_stride(7)
31752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31753 }
31754
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_strided_a)31755 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_strided_a) {
31756 GemmMicrokernelTester()
31757 .mr(2)
31758 .nr(4)
31759 .kr(1)
31760 .sr(1)
31761 .m(2)
31762 .n(4)
31763 .k(1)
31764 .a_stride(3)
31765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31766 }
31767
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)31768 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
31769 for (uint32_t n = 1; n <= 4; n++) {
31770 for (uint32_t m = 1; m <= 2; m++) {
31771 GemmMicrokernelTester()
31772 .mr(2)
31773 .nr(4)
31774 .kr(1)
31775 .sr(1)
31776 .m(m)
31777 .n(n)
31778 .k(1)
31779 .iterations(1)
31780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31781 }
31782 }
31783 }
31784
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)31785 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
31786 for (uint32_t m = 1; m <= 2; m++) {
31787 GemmMicrokernelTester()
31788 .mr(2)
31789 .nr(4)
31790 .kr(1)
31791 .sr(1)
31792 .m(m)
31793 .n(4)
31794 .k(1)
31795 .iterations(1)
31796 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31797 }
31798 }
31799
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)31800 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
31801 for (uint32_t n = 1; n <= 4; n++) {
31802 GemmMicrokernelTester()
31803 .mr(2)
31804 .nr(4)
31805 .kr(1)
31806 .sr(1)
31807 .m(2)
31808 .n(n)
31809 .k(1)
31810 .iterations(1)
31811 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31812 }
31813 }
31814
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)31815 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
31816 for (size_t k = 2; k < 10; k++) {
31817 GemmMicrokernelTester()
31818 .mr(2)
31819 .nr(4)
31820 .kr(1)
31821 .sr(1)
31822 .m(2)
31823 .n(4)
31824 .k(k)
31825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31826 }
31827 }
31828
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_strided_a)31829 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_strided_a) {
31830 for (size_t k = 2; k < 10; k++) {
31831 GemmMicrokernelTester()
31832 .mr(2)
31833 .nr(4)
31834 .kr(1)
31835 .sr(1)
31836 .m(2)
31837 .n(4)
31838 .k(k)
31839 .a_stride(11)
31840 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31841 }
31842 }
31843
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)31844 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
31845 for (size_t k = 2; k < 10; k++) {
31846 for (uint32_t n = 1; n <= 4; n++) {
31847 for (uint32_t m = 1; m <= 2; m++) {
31848 GemmMicrokernelTester()
31849 .mr(2)
31850 .nr(4)
31851 .kr(1)
31852 .sr(1)
31853 .m(m)
31854 .n(n)
31855 .k(k)
31856 .iterations(1)
31857 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31858 }
31859 }
31860 }
31861 }
31862
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)31863 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
31864 for (uint32_t n = 5; n < 8; n++) {
31865 for (size_t k = 1; k <= 5; k += 2) {
31866 GemmMicrokernelTester()
31867 .mr(2)
31868 .nr(4)
31869 .kr(1)
31870 .sr(1)
31871 .m(2)
31872 .n(n)
31873 .k(k)
31874 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31875 }
31876 }
31877 }
31878
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)31879 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
31880 for (uint32_t n = 5; n < 8; n++) {
31881 for (size_t k = 1; k <= 5; k += 2) {
31882 GemmMicrokernelTester()
31883 .mr(2)
31884 .nr(4)
31885 .kr(1)
31886 .sr(1)
31887 .m(2)
31888 .n(n)
31889 .k(k)
31890 .cn_stride(7)
31891 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31892 }
31893 }
31894 }
31895
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_a)31896 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_a) {
31897 for (uint32_t n = 5; n < 8; n++) {
31898 for (size_t k = 1; k <= 5; k += 2) {
31899 GemmMicrokernelTester()
31900 .mr(2)
31901 .nr(4)
31902 .kr(1)
31903 .sr(1)
31904 .m(2)
31905 .n(n)
31906 .k(k)
31907 .a_stride(7)
31908 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31909 }
31910 }
31911 }
31912
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)31913 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
31914 for (uint32_t n = 5; n < 8; n++) {
31915 for (size_t k = 1; k <= 5; k += 2) {
31916 for (uint32_t m = 1; m <= 2; m++) {
31917 GemmMicrokernelTester()
31918 .mr(2)
31919 .nr(4)
31920 .kr(1)
31921 .sr(1)
31922 .m(m)
31923 .n(n)
31924 .k(k)
31925 .iterations(1)
31926 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31927 }
31928 }
31929 }
31930 }
31931
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)31932 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
31933 for (uint32_t n = 8; n <= 12; n += 4) {
31934 for (size_t k = 1; k <= 5; k += 2) {
31935 GemmMicrokernelTester()
31936 .mr(2)
31937 .nr(4)
31938 .kr(1)
31939 .sr(1)
31940 .m(2)
31941 .n(n)
31942 .k(k)
31943 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31944 }
31945 }
31946 }
31947
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)31948 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
31949 for (uint32_t n = 8; n <= 12; n += 4) {
31950 for (size_t k = 1; k <= 5; k += 2) {
31951 GemmMicrokernelTester()
31952 .mr(2)
31953 .nr(4)
31954 .kr(1)
31955 .sr(1)
31956 .m(2)
31957 .n(n)
31958 .k(k)
31959 .cn_stride(7)
31960 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31961 }
31962 }
31963 }
31964
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_a)31965 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_a) {
31966 for (uint32_t n = 8; n <= 12; n += 4) {
31967 for (size_t k = 1; k <= 5; k += 2) {
31968 GemmMicrokernelTester()
31969 .mr(2)
31970 .nr(4)
31971 .kr(1)
31972 .sr(1)
31973 .m(2)
31974 .n(n)
31975 .k(k)
31976 .a_stride(7)
31977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31978 }
31979 }
31980 }
31981
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)31982 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
31983 for (uint32_t n = 8; n <= 12; n += 4) {
31984 for (size_t k = 1; k <= 5; k += 2) {
31985 for (uint32_t m = 1; m <= 2; m++) {
31986 GemmMicrokernelTester()
31987 .mr(2)
31988 .nr(4)
31989 .kr(1)
31990 .sr(1)
31991 .m(m)
31992 .n(n)
31993 .k(k)
31994 .iterations(1)
31995 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
31996 }
31997 }
31998 }
31999 }
32000
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)32001 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
32002 for (size_t k = 1; k <= 5; k += 2) {
32003 for (uint32_t n = 1; n <= 4; n++) {
32004 for (uint32_t m = 1; m <= 2; m++) {
32005 GemmMicrokernelTester()
32006 .mr(2)
32007 .nr(4)
32008 .kr(1)
32009 .sr(1)
32010 .m(m)
32011 .n(n)
32012 .k(k)
32013 .cm_stride(7)
32014 .iterations(1)
32015 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32016 }
32017 }
32018 }
32019 }
32020
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)32021 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
32022 GemmMicrokernelTester()
32023 .mr(2)
32024 .nr(4)
32025 .kr(1)
32026 .sr(1)
32027 .m(2)
32028 .n(4)
32029 .k(1)
32030 .qmin(128)
32031 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32032 }
32033
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)32034 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
32035 GemmMicrokernelTester()
32036 .mr(2)
32037 .nr(4)
32038 .kr(1)
32039 .sr(1)
32040 .m(2)
32041 .n(4)
32042 .k(1)
32043 .qmax(128)
32044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32045 }
32046
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)32047 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
32048 GemmMicrokernelTester()
32049 .mr(2)
32050 .nr(4)
32051 .kr(1)
32052 .sr(1)
32053 .m(2)
32054 .n(4)
32055 .k(1)
32056 .cm_stride(7)
32057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32058 }
32059
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_a_zero_point)32060 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_a_zero_point) {
32061 for (size_t k = 1; k <= 5; k += 2) {
32062 GemmMicrokernelTester()
32063 .mr(2)
32064 .nr(4)
32065 .kr(1)
32066 .sr(1)
32067 .m(2)
32068 .n(4)
32069 .k(k)
32070 .a_zero_point(0)
32071 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32072 }
32073 }
32074
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_b_zero_point)32075 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_b_zero_point) {
32076 for (size_t k = 1; k <= 5; k += 2) {
32077 GemmMicrokernelTester()
32078 .mr(2)
32079 .nr(4)
32080 .kr(1)
32081 .sr(1)
32082 .m(2)
32083 .n(4)
32084 .k(k)
32085 .b_zero_point(0)
32086 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32087 }
32088 }
32089
TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_zero_point)32090 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_zero_point) {
32091 for (size_t k = 1; k <= 5; k += 2) {
32092 GemmMicrokernelTester()
32093 .mr(2)
32094 .nr(4)
32095 .kr(1)
32096 .sr(1)
32097 .m(2)
32098 .n(4)
32099 .k(k)
32100 .a_zero_point(0)
32101 .b_zero_point(0)
32102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32103 }
32104 }
32105 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32106
32107
32108 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)32109 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
32110 GemmMicrokernelTester()
32111 .mr(4)
32112 .nr(2)
32113 .kr(1)
32114 .sr(1)
32115 .m(4)
32116 .n(2)
32117 .k(1)
32118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32119 }
32120
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)32121 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
32122 GemmMicrokernelTester()
32123 .mr(4)
32124 .nr(2)
32125 .kr(1)
32126 .sr(1)
32127 .m(4)
32128 .n(2)
32129 .k(1)
32130 .cn_stride(5)
32131 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32132 }
32133
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_strided_a)32134 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_strided_a) {
32135 GemmMicrokernelTester()
32136 .mr(4)
32137 .nr(2)
32138 .kr(1)
32139 .sr(1)
32140 .m(4)
32141 .n(2)
32142 .k(1)
32143 .a_stride(3)
32144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32145 }
32146
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)32147 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
32148 for (uint32_t n = 1; n <= 2; n++) {
32149 for (uint32_t m = 1; m <= 4; m++) {
32150 GemmMicrokernelTester()
32151 .mr(4)
32152 .nr(2)
32153 .kr(1)
32154 .sr(1)
32155 .m(m)
32156 .n(n)
32157 .k(1)
32158 .iterations(1)
32159 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32160 }
32161 }
32162 }
32163
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)32164 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
32165 for (uint32_t m = 1; m <= 4; m++) {
32166 GemmMicrokernelTester()
32167 .mr(4)
32168 .nr(2)
32169 .kr(1)
32170 .sr(1)
32171 .m(m)
32172 .n(2)
32173 .k(1)
32174 .iterations(1)
32175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32176 }
32177 }
32178
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)32179 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
32180 for (uint32_t n = 1; n <= 2; n++) {
32181 GemmMicrokernelTester()
32182 .mr(4)
32183 .nr(2)
32184 .kr(1)
32185 .sr(1)
32186 .m(4)
32187 .n(n)
32188 .k(1)
32189 .iterations(1)
32190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32191 }
32192 }
32193
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)32194 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
32195 for (size_t k = 2; k < 10; k++) {
32196 GemmMicrokernelTester()
32197 .mr(4)
32198 .nr(2)
32199 .kr(1)
32200 .sr(1)
32201 .m(4)
32202 .n(2)
32203 .k(k)
32204 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32205 }
32206 }
32207
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_strided_a)32208 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_strided_a) {
32209 for (size_t k = 2; k < 10; k++) {
32210 GemmMicrokernelTester()
32211 .mr(4)
32212 .nr(2)
32213 .kr(1)
32214 .sr(1)
32215 .m(4)
32216 .n(2)
32217 .k(k)
32218 .a_stride(11)
32219 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32220 }
32221 }
32222
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)32223 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
32224 for (size_t k = 2; k < 10; k++) {
32225 for (uint32_t n = 1; n <= 2; n++) {
32226 for (uint32_t m = 1; m <= 4; m++) {
32227 GemmMicrokernelTester()
32228 .mr(4)
32229 .nr(2)
32230 .kr(1)
32231 .sr(1)
32232 .m(m)
32233 .n(n)
32234 .k(k)
32235 .iterations(1)
32236 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32237 }
32238 }
32239 }
32240 }
32241
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)32242 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
32243 for (uint32_t n = 3; n < 4; n++) {
32244 for (size_t k = 1; k <= 5; k += 2) {
32245 GemmMicrokernelTester()
32246 .mr(4)
32247 .nr(2)
32248 .kr(1)
32249 .sr(1)
32250 .m(4)
32251 .n(n)
32252 .k(k)
32253 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32254 }
32255 }
32256 }
32257
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)32258 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
32259 for (uint32_t n = 3; n < 4; n++) {
32260 for (size_t k = 1; k <= 5; k += 2) {
32261 GemmMicrokernelTester()
32262 .mr(4)
32263 .nr(2)
32264 .kr(1)
32265 .sr(1)
32266 .m(4)
32267 .n(n)
32268 .k(k)
32269 .cn_stride(5)
32270 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32271 }
32272 }
32273 }
32274
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_a)32275 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_a) {
32276 for (uint32_t n = 3; n < 4; n++) {
32277 for (size_t k = 1; k <= 5; k += 2) {
32278 GemmMicrokernelTester()
32279 .mr(4)
32280 .nr(2)
32281 .kr(1)
32282 .sr(1)
32283 .m(4)
32284 .n(n)
32285 .k(k)
32286 .a_stride(7)
32287 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32288 }
32289 }
32290 }
32291
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)32292 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
32293 for (uint32_t n = 3; n < 4; n++) {
32294 for (size_t k = 1; k <= 5; k += 2) {
32295 for (uint32_t m = 1; m <= 4; m++) {
32296 GemmMicrokernelTester()
32297 .mr(4)
32298 .nr(2)
32299 .kr(1)
32300 .sr(1)
32301 .m(m)
32302 .n(n)
32303 .k(k)
32304 .iterations(1)
32305 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32306 }
32307 }
32308 }
32309 }
32310
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)32311 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
32312 for (uint32_t n = 4; n <= 6; n += 2) {
32313 for (size_t k = 1; k <= 5; k += 2) {
32314 GemmMicrokernelTester()
32315 .mr(4)
32316 .nr(2)
32317 .kr(1)
32318 .sr(1)
32319 .m(4)
32320 .n(n)
32321 .k(k)
32322 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32323 }
32324 }
32325 }
32326
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)32327 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
32328 for (uint32_t n = 4; n <= 6; n += 2) {
32329 for (size_t k = 1; k <= 5; k += 2) {
32330 GemmMicrokernelTester()
32331 .mr(4)
32332 .nr(2)
32333 .kr(1)
32334 .sr(1)
32335 .m(4)
32336 .n(n)
32337 .k(k)
32338 .cn_stride(5)
32339 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32340 }
32341 }
32342 }
32343
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_a)32344 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_a) {
32345 for (uint32_t n = 4; n <= 6; n += 2) {
32346 for (size_t k = 1; k <= 5; k += 2) {
32347 GemmMicrokernelTester()
32348 .mr(4)
32349 .nr(2)
32350 .kr(1)
32351 .sr(1)
32352 .m(4)
32353 .n(n)
32354 .k(k)
32355 .a_stride(7)
32356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32357 }
32358 }
32359 }
32360
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)32361 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
32362 for (uint32_t n = 4; n <= 6; n += 2) {
32363 for (size_t k = 1; k <= 5; k += 2) {
32364 for (uint32_t m = 1; m <= 4; m++) {
32365 GemmMicrokernelTester()
32366 .mr(4)
32367 .nr(2)
32368 .kr(1)
32369 .sr(1)
32370 .m(m)
32371 .n(n)
32372 .k(k)
32373 .iterations(1)
32374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32375 }
32376 }
32377 }
32378 }
32379
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)32380 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
32381 for (size_t k = 1; k <= 5; k += 2) {
32382 for (uint32_t n = 1; n <= 2; n++) {
32383 for (uint32_t m = 1; m <= 4; m++) {
32384 GemmMicrokernelTester()
32385 .mr(4)
32386 .nr(2)
32387 .kr(1)
32388 .sr(1)
32389 .m(m)
32390 .n(n)
32391 .k(k)
32392 .cm_stride(5)
32393 .iterations(1)
32394 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32395 }
32396 }
32397 }
32398 }
32399
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)32400 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
32401 GemmMicrokernelTester()
32402 .mr(4)
32403 .nr(2)
32404 .kr(1)
32405 .sr(1)
32406 .m(4)
32407 .n(2)
32408 .k(1)
32409 .qmin(128)
32410 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32411 }
32412
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)32413 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
32414 GemmMicrokernelTester()
32415 .mr(4)
32416 .nr(2)
32417 .kr(1)
32418 .sr(1)
32419 .m(4)
32420 .n(2)
32421 .k(1)
32422 .qmax(128)
32423 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32424 }
32425
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)32426 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
32427 GemmMicrokernelTester()
32428 .mr(4)
32429 .nr(2)
32430 .kr(1)
32431 .sr(1)
32432 .m(4)
32433 .n(2)
32434 .k(1)
32435 .cm_stride(5)
32436 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32437 }
32438
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_a_zero_point)32439 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_a_zero_point) {
32440 for (size_t k = 1; k <= 5; k += 2) {
32441 GemmMicrokernelTester()
32442 .mr(4)
32443 .nr(2)
32444 .kr(1)
32445 .sr(1)
32446 .m(4)
32447 .n(2)
32448 .k(k)
32449 .a_zero_point(0)
32450 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32451 }
32452 }
32453
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_b_zero_point)32454 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_b_zero_point) {
32455 for (size_t k = 1; k <= 5; k += 2) {
32456 GemmMicrokernelTester()
32457 .mr(4)
32458 .nr(2)
32459 .kr(1)
32460 .sr(1)
32461 .m(4)
32462 .n(2)
32463 .k(k)
32464 .b_zero_point(0)
32465 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32466 }
32467 }
32468
TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_zero_point)32469 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_zero_point) {
32470 for (size_t k = 1; k <= 5; k += 2) {
32471 GemmMicrokernelTester()
32472 .mr(4)
32473 .nr(2)
32474 .kr(1)
32475 .sr(1)
32476 .m(4)
32477 .n(2)
32478 .k(k)
32479 .a_zero_point(0)
32480 .b_zero_point(0)
32481 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32482 }
32483 }
32484 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32485
32486
32487 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)32488 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
32489 GemmMicrokernelTester()
32490 .mr(4)
32491 .nr(4)
32492 .kr(1)
32493 .sr(1)
32494 .m(4)
32495 .n(4)
32496 .k(1)
32497 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32498 }
32499
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)32500 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
32501 GemmMicrokernelTester()
32502 .mr(4)
32503 .nr(4)
32504 .kr(1)
32505 .sr(1)
32506 .m(4)
32507 .n(4)
32508 .k(1)
32509 .cn_stride(7)
32510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32511 }
32512
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_strided_a)32513 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_strided_a) {
32514 GemmMicrokernelTester()
32515 .mr(4)
32516 .nr(4)
32517 .kr(1)
32518 .sr(1)
32519 .m(4)
32520 .n(4)
32521 .k(1)
32522 .a_stride(3)
32523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32524 }
32525
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)32526 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
32527 for (uint32_t n = 1; n <= 4; n++) {
32528 for (uint32_t m = 1; m <= 4; m++) {
32529 GemmMicrokernelTester()
32530 .mr(4)
32531 .nr(4)
32532 .kr(1)
32533 .sr(1)
32534 .m(m)
32535 .n(n)
32536 .k(1)
32537 .iterations(1)
32538 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32539 }
32540 }
32541 }
32542
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)32543 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
32544 for (uint32_t m = 1; m <= 4; m++) {
32545 GemmMicrokernelTester()
32546 .mr(4)
32547 .nr(4)
32548 .kr(1)
32549 .sr(1)
32550 .m(m)
32551 .n(4)
32552 .k(1)
32553 .iterations(1)
32554 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32555 }
32556 }
32557
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)32558 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
32559 for (uint32_t n = 1; n <= 4; n++) {
32560 GemmMicrokernelTester()
32561 .mr(4)
32562 .nr(4)
32563 .kr(1)
32564 .sr(1)
32565 .m(4)
32566 .n(n)
32567 .k(1)
32568 .iterations(1)
32569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32570 }
32571 }
32572
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)32573 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
32574 for (size_t k = 2; k < 10; k++) {
32575 GemmMicrokernelTester()
32576 .mr(4)
32577 .nr(4)
32578 .kr(1)
32579 .sr(1)
32580 .m(4)
32581 .n(4)
32582 .k(k)
32583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32584 }
32585 }
32586
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_strided_a)32587 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_strided_a) {
32588 for (size_t k = 2; k < 10; k++) {
32589 GemmMicrokernelTester()
32590 .mr(4)
32591 .nr(4)
32592 .kr(1)
32593 .sr(1)
32594 .m(4)
32595 .n(4)
32596 .k(k)
32597 .a_stride(11)
32598 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32599 }
32600 }
32601
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)32602 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
32603 for (size_t k = 2; k < 10; k++) {
32604 for (uint32_t n = 1; n <= 4; n++) {
32605 for (uint32_t m = 1; m <= 4; m++) {
32606 GemmMicrokernelTester()
32607 .mr(4)
32608 .nr(4)
32609 .kr(1)
32610 .sr(1)
32611 .m(m)
32612 .n(n)
32613 .k(k)
32614 .iterations(1)
32615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32616 }
32617 }
32618 }
32619 }
32620
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)32621 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
32622 for (uint32_t n = 5; n < 8; n++) {
32623 for (size_t k = 1; k <= 5; k += 2) {
32624 GemmMicrokernelTester()
32625 .mr(4)
32626 .nr(4)
32627 .kr(1)
32628 .sr(1)
32629 .m(4)
32630 .n(n)
32631 .k(k)
32632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32633 }
32634 }
32635 }
32636
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)32637 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
32638 for (uint32_t n = 5; n < 8; n++) {
32639 for (size_t k = 1; k <= 5; k += 2) {
32640 GemmMicrokernelTester()
32641 .mr(4)
32642 .nr(4)
32643 .kr(1)
32644 .sr(1)
32645 .m(4)
32646 .n(n)
32647 .k(k)
32648 .cn_stride(7)
32649 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32650 }
32651 }
32652 }
32653
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_a)32654 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_a) {
32655 for (uint32_t n = 5; n < 8; n++) {
32656 for (size_t k = 1; k <= 5; k += 2) {
32657 GemmMicrokernelTester()
32658 .mr(4)
32659 .nr(4)
32660 .kr(1)
32661 .sr(1)
32662 .m(4)
32663 .n(n)
32664 .k(k)
32665 .a_stride(7)
32666 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32667 }
32668 }
32669 }
32670
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)32671 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
32672 for (uint32_t n = 5; n < 8; n++) {
32673 for (size_t k = 1; k <= 5; k += 2) {
32674 for (uint32_t m = 1; m <= 4; m++) {
32675 GemmMicrokernelTester()
32676 .mr(4)
32677 .nr(4)
32678 .kr(1)
32679 .sr(1)
32680 .m(m)
32681 .n(n)
32682 .k(k)
32683 .iterations(1)
32684 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32685 }
32686 }
32687 }
32688 }
32689
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)32690 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
32691 for (uint32_t n = 8; n <= 12; n += 4) {
32692 for (size_t k = 1; k <= 5; k += 2) {
32693 GemmMicrokernelTester()
32694 .mr(4)
32695 .nr(4)
32696 .kr(1)
32697 .sr(1)
32698 .m(4)
32699 .n(n)
32700 .k(k)
32701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32702 }
32703 }
32704 }
32705
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)32706 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
32707 for (uint32_t n = 8; n <= 12; n += 4) {
32708 for (size_t k = 1; k <= 5; k += 2) {
32709 GemmMicrokernelTester()
32710 .mr(4)
32711 .nr(4)
32712 .kr(1)
32713 .sr(1)
32714 .m(4)
32715 .n(n)
32716 .k(k)
32717 .cn_stride(7)
32718 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32719 }
32720 }
32721 }
32722
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_a)32723 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_a) {
32724 for (uint32_t n = 8; n <= 12; n += 4) {
32725 for (size_t k = 1; k <= 5; k += 2) {
32726 GemmMicrokernelTester()
32727 .mr(4)
32728 .nr(4)
32729 .kr(1)
32730 .sr(1)
32731 .m(4)
32732 .n(n)
32733 .k(k)
32734 .a_stride(7)
32735 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32736 }
32737 }
32738 }
32739
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)32740 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
32741 for (uint32_t n = 8; n <= 12; n += 4) {
32742 for (size_t k = 1; k <= 5; k += 2) {
32743 for (uint32_t m = 1; m <= 4; m++) {
32744 GemmMicrokernelTester()
32745 .mr(4)
32746 .nr(4)
32747 .kr(1)
32748 .sr(1)
32749 .m(m)
32750 .n(n)
32751 .k(k)
32752 .iterations(1)
32753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32754 }
32755 }
32756 }
32757 }
32758
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)32759 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
32760 for (size_t k = 1; k <= 5; k += 2) {
32761 for (uint32_t n = 1; n <= 4; n++) {
32762 for (uint32_t m = 1; m <= 4; m++) {
32763 GemmMicrokernelTester()
32764 .mr(4)
32765 .nr(4)
32766 .kr(1)
32767 .sr(1)
32768 .m(m)
32769 .n(n)
32770 .k(k)
32771 .cm_stride(7)
32772 .iterations(1)
32773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32774 }
32775 }
32776 }
32777 }
32778
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)32779 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
32780 GemmMicrokernelTester()
32781 .mr(4)
32782 .nr(4)
32783 .kr(1)
32784 .sr(1)
32785 .m(4)
32786 .n(4)
32787 .k(1)
32788 .qmin(128)
32789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32790 }
32791
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)32792 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
32793 GemmMicrokernelTester()
32794 .mr(4)
32795 .nr(4)
32796 .kr(1)
32797 .sr(1)
32798 .m(4)
32799 .n(4)
32800 .k(1)
32801 .qmax(128)
32802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32803 }
32804
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)32805 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
32806 GemmMicrokernelTester()
32807 .mr(4)
32808 .nr(4)
32809 .kr(1)
32810 .sr(1)
32811 .m(4)
32812 .n(4)
32813 .k(1)
32814 .cm_stride(7)
32815 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32816 }
32817
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_a_zero_point)32818 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_a_zero_point) {
32819 for (size_t k = 1; k <= 5; k += 2) {
32820 GemmMicrokernelTester()
32821 .mr(4)
32822 .nr(4)
32823 .kr(1)
32824 .sr(1)
32825 .m(4)
32826 .n(4)
32827 .k(k)
32828 .a_zero_point(0)
32829 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32830 }
32831 }
32832
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_b_zero_point)32833 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_b_zero_point) {
32834 for (size_t k = 1; k <= 5; k += 2) {
32835 GemmMicrokernelTester()
32836 .mr(4)
32837 .nr(4)
32838 .kr(1)
32839 .sr(1)
32840 .m(4)
32841 .n(4)
32842 .k(k)
32843 .b_zero_point(0)
32844 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32845 }
32846 }
32847
TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_zero_point)32848 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_zero_point) {
32849 for (size_t k = 1; k <= 5; k += 2) {
32850 GemmMicrokernelTester()
32851 .mr(4)
32852 .nr(4)
32853 .kr(1)
32854 .sr(1)
32855 .m(4)
32856 .n(4)
32857 .k(k)
32858 .a_zero_point(0)
32859 .b_zero_point(0)
32860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32861 }
32862 }
32863 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32864
32865
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)32866 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
32867 GemmMicrokernelTester()
32868 .mr(1)
32869 .nr(2)
32870 .kr(1)
32871 .sr(1)
32872 .m(1)
32873 .n(2)
32874 .k(1)
32875 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32876 }
32877
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)32878 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
32879 GemmMicrokernelTester()
32880 .mr(1)
32881 .nr(2)
32882 .kr(1)
32883 .sr(1)
32884 .m(1)
32885 .n(2)
32886 .k(1)
32887 .cn_stride(5)
32888 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32889 }
32890
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_strided_a)32891 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
32892 GemmMicrokernelTester()
32893 .mr(1)
32894 .nr(2)
32895 .kr(1)
32896 .sr(1)
32897 .m(1)
32898 .n(2)
32899 .k(1)
32900 .a_stride(3)
32901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32902 }
32903
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)32904 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
32905 for (uint32_t n = 1; n <= 2; n++) {
32906 for (uint32_t m = 1; m <= 1; m++) {
32907 GemmMicrokernelTester()
32908 .mr(1)
32909 .nr(2)
32910 .kr(1)
32911 .sr(1)
32912 .m(m)
32913 .n(n)
32914 .k(1)
32915 .iterations(1)
32916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32917 }
32918 }
32919 }
32920
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)32921 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
32922 for (uint32_t m = 1; m <= 1; m++) {
32923 GemmMicrokernelTester()
32924 .mr(1)
32925 .nr(2)
32926 .kr(1)
32927 .sr(1)
32928 .m(m)
32929 .n(2)
32930 .k(1)
32931 .iterations(1)
32932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32933 }
32934 }
32935
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)32936 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
32937 for (uint32_t n = 1; n <= 2; n++) {
32938 GemmMicrokernelTester()
32939 .mr(1)
32940 .nr(2)
32941 .kr(1)
32942 .sr(1)
32943 .m(1)
32944 .n(n)
32945 .k(1)
32946 .iterations(1)
32947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32948 }
32949 }
32950
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)32951 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
32952 for (size_t k = 2; k < 10; k++) {
32953 GemmMicrokernelTester()
32954 .mr(1)
32955 .nr(2)
32956 .kr(1)
32957 .sr(1)
32958 .m(1)
32959 .n(2)
32960 .k(k)
32961 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32962 }
32963 }
32964
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_strided_a)32965 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
32966 for (size_t k = 2; k < 10; k++) {
32967 GemmMicrokernelTester()
32968 .mr(1)
32969 .nr(2)
32970 .kr(1)
32971 .sr(1)
32972 .m(1)
32973 .n(2)
32974 .k(k)
32975 .a_stride(11)
32976 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32977 }
32978 }
32979
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)32980 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
32981 for (size_t k = 2; k < 10; k++) {
32982 for (uint32_t n = 1; n <= 2; n++) {
32983 for (uint32_t m = 1; m <= 1; m++) {
32984 GemmMicrokernelTester()
32985 .mr(1)
32986 .nr(2)
32987 .kr(1)
32988 .sr(1)
32989 .m(m)
32990 .n(n)
32991 .k(k)
32992 .iterations(1)
32993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32994 }
32995 }
32996 }
32997 }
32998
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)32999 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
33000 for (uint32_t n = 3; n < 4; n++) {
33001 for (size_t k = 1; k <= 5; k += 2) {
33002 GemmMicrokernelTester()
33003 .mr(1)
33004 .nr(2)
33005 .kr(1)
33006 .sr(1)
33007 .m(1)
33008 .n(n)
33009 .k(k)
33010 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33011 }
33012 }
33013 }
33014
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)33015 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
33016 for (uint32_t n = 3; n < 4; n++) {
33017 for (size_t k = 1; k <= 5; k += 2) {
33018 GemmMicrokernelTester()
33019 .mr(1)
33020 .nr(2)
33021 .kr(1)
33022 .sr(1)
33023 .m(1)
33024 .n(n)
33025 .k(k)
33026 .cn_stride(5)
33027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33028 }
33029 }
33030 }
33031
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_a)33032 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
33033 for (uint32_t n = 3; n < 4; n++) {
33034 for (size_t k = 1; k <= 5; k += 2) {
33035 GemmMicrokernelTester()
33036 .mr(1)
33037 .nr(2)
33038 .kr(1)
33039 .sr(1)
33040 .m(1)
33041 .n(n)
33042 .k(k)
33043 .a_stride(7)
33044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33045 }
33046 }
33047 }
33048
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)33049 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
33050 for (uint32_t n = 3; n < 4; n++) {
33051 for (size_t k = 1; k <= 5; k += 2) {
33052 for (uint32_t m = 1; m <= 1; m++) {
33053 GemmMicrokernelTester()
33054 .mr(1)
33055 .nr(2)
33056 .kr(1)
33057 .sr(1)
33058 .m(m)
33059 .n(n)
33060 .k(k)
33061 .iterations(1)
33062 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33063 }
33064 }
33065 }
33066 }
33067
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)33068 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
33069 for (uint32_t n = 4; n <= 6; n += 2) {
33070 for (size_t k = 1; k <= 5; k += 2) {
33071 GemmMicrokernelTester()
33072 .mr(1)
33073 .nr(2)
33074 .kr(1)
33075 .sr(1)
33076 .m(1)
33077 .n(n)
33078 .k(k)
33079 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33080 }
33081 }
33082 }
33083
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)33084 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
33085 for (uint32_t n = 4; n <= 6; n += 2) {
33086 for (size_t k = 1; k <= 5; k += 2) {
33087 GemmMicrokernelTester()
33088 .mr(1)
33089 .nr(2)
33090 .kr(1)
33091 .sr(1)
33092 .m(1)
33093 .n(n)
33094 .k(k)
33095 .cn_stride(5)
33096 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33097 }
33098 }
33099 }
33100
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_a)33101 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_a) {
33102 for (uint32_t n = 4; n <= 6; n += 2) {
33103 for (size_t k = 1; k <= 5; k += 2) {
33104 GemmMicrokernelTester()
33105 .mr(1)
33106 .nr(2)
33107 .kr(1)
33108 .sr(1)
33109 .m(1)
33110 .n(n)
33111 .k(k)
33112 .a_stride(7)
33113 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33114 }
33115 }
33116 }
33117
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)33118 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
33119 for (uint32_t n = 4; n <= 6; n += 2) {
33120 for (size_t k = 1; k <= 5; k += 2) {
33121 for (uint32_t m = 1; m <= 1; m++) {
33122 GemmMicrokernelTester()
33123 .mr(1)
33124 .nr(2)
33125 .kr(1)
33126 .sr(1)
33127 .m(m)
33128 .n(n)
33129 .k(k)
33130 .iterations(1)
33131 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33132 }
33133 }
33134 }
33135 }
33136
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)33137 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
33138 for (size_t k = 1; k <= 5; k += 2) {
33139 for (uint32_t n = 1; n <= 2; n++) {
33140 for (uint32_t m = 1; m <= 1; m++) {
33141 GemmMicrokernelTester()
33142 .mr(1)
33143 .nr(2)
33144 .kr(1)
33145 .sr(1)
33146 .m(m)
33147 .n(n)
33148 .k(k)
33149 .cm_stride(5)
33150 .iterations(1)
33151 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33152 }
33153 }
33154 }
33155 }
33156
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)33157 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
33158 GemmMicrokernelTester()
33159 .mr(1)
33160 .nr(2)
33161 .kr(1)
33162 .sr(1)
33163 .m(1)
33164 .n(2)
33165 .k(1)
33166 .qmin(128)
33167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33168 }
33169
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)33170 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
33171 GemmMicrokernelTester()
33172 .mr(1)
33173 .nr(2)
33174 .kr(1)
33175 .sr(1)
33176 .m(1)
33177 .n(2)
33178 .k(1)
33179 .qmax(128)
33180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33181 }
33182
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)33183 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
33184 GemmMicrokernelTester()
33185 .mr(1)
33186 .nr(2)
33187 .kr(1)
33188 .sr(1)
33189 .m(1)
33190 .n(2)
33191 .k(1)
33192 .cm_stride(5)
33193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33194 }
33195
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_a_zero_point)33196 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_a_zero_point) {
33197 for (size_t k = 1; k <= 5; k += 2) {
33198 GemmMicrokernelTester()
33199 .mr(1)
33200 .nr(2)
33201 .kr(1)
33202 .sr(1)
33203 .m(1)
33204 .n(2)
33205 .k(k)
33206 .a_zero_point(0)
33207 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33208 }
33209 }
33210
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_b_zero_point)33211 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_b_zero_point) {
33212 for (size_t k = 1; k <= 5; k += 2) {
33213 GemmMicrokernelTester()
33214 .mr(1)
33215 .nr(2)
33216 .kr(1)
33217 .sr(1)
33218 .m(1)
33219 .n(2)
33220 .k(k)
33221 .b_zero_point(0)
33222 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33223 }
33224 }
33225
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_zero_point)33226 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_zero_point) {
33227 for (size_t k = 1; k <= 5; k += 2) {
33228 GemmMicrokernelTester()
33229 .mr(1)
33230 .nr(2)
33231 .kr(1)
33232 .sr(1)
33233 .m(1)
33234 .n(2)
33235 .k(k)
33236 .a_zero_point(0)
33237 .b_zero_point(0)
33238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33239 }
33240 }
33241
33242
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)33243 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
33244 GemmMicrokernelTester()
33245 .mr(1)
33246 .nr(2)
33247 .kr(1)
33248 .sr(1)
33249 .m(1)
33250 .n(2)
33251 .k(1)
33252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33253 }
33254
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)33255 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
33256 GemmMicrokernelTester()
33257 .mr(1)
33258 .nr(2)
33259 .kr(1)
33260 .sr(1)
33261 .m(1)
33262 .n(2)
33263 .k(1)
33264 .cn_stride(5)
33265 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33266 }
33267
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_strided_a)33268 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
33269 GemmMicrokernelTester()
33270 .mr(1)
33271 .nr(2)
33272 .kr(1)
33273 .sr(1)
33274 .m(1)
33275 .n(2)
33276 .k(1)
33277 .a_stride(3)
33278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33279 }
33280
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)33281 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
33282 for (uint32_t n = 1; n <= 2; n++) {
33283 for (uint32_t m = 1; m <= 1; m++) {
33284 GemmMicrokernelTester()
33285 .mr(1)
33286 .nr(2)
33287 .kr(1)
33288 .sr(1)
33289 .m(m)
33290 .n(n)
33291 .k(1)
33292 .iterations(1)
33293 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33294 }
33295 }
33296 }
33297
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)33298 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
33299 for (uint32_t m = 1; m <= 1; m++) {
33300 GemmMicrokernelTester()
33301 .mr(1)
33302 .nr(2)
33303 .kr(1)
33304 .sr(1)
33305 .m(m)
33306 .n(2)
33307 .k(1)
33308 .iterations(1)
33309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33310 }
33311 }
33312
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)33313 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
33314 for (uint32_t n = 1; n <= 2; n++) {
33315 GemmMicrokernelTester()
33316 .mr(1)
33317 .nr(2)
33318 .kr(1)
33319 .sr(1)
33320 .m(1)
33321 .n(n)
33322 .k(1)
33323 .iterations(1)
33324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33325 }
33326 }
33327
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)33328 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
33329 for (size_t k = 2; k < 10; k++) {
33330 GemmMicrokernelTester()
33331 .mr(1)
33332 .nr(2)
33333 .kr(1)
33334 .sr(1)
33335 .m(1)
33336 .n(2)
33337 .k(k)
33338 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33339 }
33340 }
33341
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_strided_a)33342 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
33343 for (size_t k = 2; k < 10; k++) {
33344 GemmMicrokernelTester()
33345 .mr(1)
33346 .nr(2)
33347 .kr(1)
33348 .sr(1)
33349 .m(1)
33350 .n(2)
33351 .k(k)
33352 .a_stride(11)
33353 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33354 }
33355 }
33356
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)33357 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
33358 for (size_t k = 2; k < 10; k++) {
33359 for (uint32_t n = 1; n <= 2; n++) {
33360 for (uint32_t m = 1; m <= 1; m++) {
33361 GemmMicrokernelTester()
33362 .mr(1)
33363 .nr(2)
33364 .kr(1)
33365 .sr(1)
33366 .m(m)
33367 .n(n)
33368 .k(k)
33369 .iterations(1)
33370 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33371 }
33372 }
33373 }
33374 }
33375
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)33376 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
33377 for (uint32_t n = 3; n < 4; n++) {
33378 for (size_t k = 1; k <= 5; k += 2) {
33379 GemmMicrokernelTester()
33380 .mr(1)
33381 .nr(2)
33382 .kr(1)
33383 .sr(1)
33384 .m(1)
33385 .n(n)
33386 .k(k)
33387 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33388 }
33389 }
33390 }
33391
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)33392 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
33393 for (uint32_t n = 3; n < 4; n++) {
33394 for (size_t k = 1; k <= 5; k += 2) {
33395 GemmMicrokernelTester()
33396 .mr(1)
33397 .nr(2)
33398 .kr(1)
33399 .sr(1)
33400 .m(1)
33401 .n(n)
33402 .k(k)
33403 .cn_stride(5)
33404 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33405 }
33406 }
33407 }
33408
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_a)33409 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
33410 for (uint32_t n = 3; n < 4; n++) {
33411 for (size_t k = 1; k <= 5; k += 2) {
33412 GemmMicrokernelTester()
33413 .mr(1)
33414 .nr(2)
33415 .kr(1)
33416 .sr(1)
33417 .m(1)
33418 .n(n)
33419 .k(k)
33420 .a_stride(7)
33421 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33422 }
33423 }
33424 }
33425
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)33426 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
33427 for (uint32_t n = 3; n < 4; n++) {
33428 for (size_t k = 1; k <= 5; k += 2) {
33429 for (uint32_t m = 1; m <= 1; m++) {
33430 GemmMicrokernelTester()
33431 .mr(1)
33432 .nr(2)
33433 .kr(1)
33434 .sr(1)
33435 .m(m)
33436 .n(n)
33437 .k(k)
33438 .iterations(1)
33439 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33440 }
33441 }
33442 }
33443 }
33444
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)33445 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
33446 for (uint32_t n = 4; n <= 6; n += 2) {
33447 for (size_t k = 1; k <= 5; k += 2) {
33448 GemmMicrokernelTester()
33449 .mr(1)
33450 .nr(2)
33451 .kr(1)
33452 .sr(1)
33453 .m(1)
33454 .n(n)
33455 .k(k)
33456 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33457 }
33458 }
33459 }
33460
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)33461 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
33462 for (uint32_t n = 4; n <= 6; n += 2) {
33463 for (size_t k = 1; k <= 5; k += 2) {
33464 GemmMicrokernelTester()
33465 .mr(1)
33466 .nr(2)
33467 .kr(1)
33468 .sr(1)
33469 .m(1)
33470 .n(n)
33471 .k(k)
33472 .cn_stride(5)
33473 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33474 }
33475 }
33476 }
33477
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_a)33478 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
33479 for (uint32_t n = 4; n <= 6; n += 2) {
33480 for (size_t k = 1; k <= 5; k += 2) {
33481 GemmMicrokernelTester()
33482 .mr(1)
33483 .nr(2)
33484 .kr(1)
33485 .sr(1)
33486 .m(1)
33487 .n(n)
33488 .k(k)
33489 .a_stride(7)
33490 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33491 }
33492 }
33493 }
33494
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)33495 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
33496 for (uint32_t n = 4; n <= 6; n += 2) {
33497 for (size_t k = 1; k <= 5; k += 2) {
33498 for (uint32_t m = 1; m <= 1; m++) {
33499 GemmMicrokernelTester()
33500 .mr(1)
33501 .nr(2)
33502 .kr(1)
33503 .sr(1)
33504 .m(m)
33505 .n(n)
33506 .k(k)
33507 .iterations(1)
33508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33509 }
33510 }
33511 }
33512 }
33513
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)33514 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
33515 for (size_t k = 1; k <= 5; k += 2) {
33516 for (uint32_t n = 1; n <= 2; n++) {
33517 for (uint32_t m = 1; m <= 1; m++) {
33518 GemmMicrokernelTester()
33519 .mr(1)
33520 .nr(2)
33521 .kr(1)
33522 .sr(1)
33523 .m(m)
33524 .n(n)
33525 .k(k)
33526 .cm_stride(5)
33527 .iterations(1)
33528 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33529 }
33530 }
33531 }
33532 }
33533
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)33534 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
33535 GemmMicrokernelTester()
33536 .mr(1)
33537 .nr(2)
33538 .kr(1)
33539 .sr(1)
33540 .m(1)
33541 .n(2)
33542 .k(1)
33543 .qmin(128)
33544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33545 }
33546
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)33547 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
33548 GemmMicrokernelTester()
33549 .mr(1)
33550 .nr(2)
33551 .kr(1)
33552 .sr(1)
33553 .m(1)
33554 .n(2)
33555 .k(1)
33556 .qmax(128)
33557 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33558 }
33559
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)33560 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
33561 GemmMicrokernelTester()
33562 .mr(1)
33563 .nr(2)
33564 .kr(1)
33565 .sr(1)
33566 .m(1)
33567 .n(2)
33568 .k(1)
33569 .cm_stride(5)
33570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33571 }
33572
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_a_zero_point)33573 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
33574 for (size_t k = 1; k <= 5; k += 2) {
33575 GemmMicrokernelTester()
33576 .mr(1)
33577 .nr(2)
33578 .kr(1)
33579 .sr(1)
33580 .m(1)
33581 .n(2)
33582 .k(k)
33583 .a_zero_point(0)
33584 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33585 }
33586 }
33587
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_b_zero_point)33588 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
33589 for (size_t k = 1; k <= 5; k += 2) {
33590 GemmMicrokernelTester()
33591 .mr(1)
33592 .nr(2)
33593 .kr(1)
33594 .sr(1)
33595 .m(1)
33596 .n(2)
33597 .k(k)
33598 .b_zero_point(0)
33599 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33600 }
33601 }
33602
TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_zero_point)33603 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
33604 for (size_t k = 1; k <= 5; k += 2) {
33605 GemmMicrokernelTester()
33606 .mr(1)
33607 .nr(2)
33608 .kr(1)
33609 .sr(1)
33610 .m(1)
33611 .n(2)
33612 .k(k)
33613 .a_zero_point(0)
33614 .b_zero_point(0)
33615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
33616 }
33617 }
33618
33619
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)33620 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
33621 GemmMicrokernelTester()
33622 .mr(1)
33623 .nr(4)
33624 .kr(1)
33625 .sr(1)
33626 .m(1)
33627 .n(4)
33628 .k(1)
33629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33630 }
33631
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)33632 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
33633 GemmMicrokernelTester()
33634 .mr(1)
33635 .nr(4)
33636 .kr(1)
33637 .sr(1)
33638 .m(1)
33639 .n(4)
33640 .k(1)
33641 .cn_stride(7)
33642 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33643 }
33644
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_strided_a)33645 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
33646 GemmMicrokernelTester()
33647 .mr(1)
33648 .nr(4)
33649 .kr(1)
33650 .sr(1)
33651 .m(1)
33652 .n(4)
33653 .k(1)
33654 .a_stride(3)
33655 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33656 }
33657
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)33658 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
33659 for (uint32_t n = 1; n <= 4; n++) {
33660 for (uint32_t m = 1; m <= 1; m++) {
33661 GemmMicrokernelTester()
33662 .mr(1)
33663 .nr(4)
33664 .kr(1)
33665 .sr(1)
33666 .m(m)
33667 .n(n)
33668 .k(1)
33669 .iterations(1)
33670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33671 }
33672 }
33673 }
33674
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)33675 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
33676 for (uint32_t m = 1; m <= 1; m++) {
33677 GemmMicrokernelTester()
33678 .mr(1)
33679 .nr(4)
33680 .kr(1)
33681 .sr(1)
33682 .m(m)
33683 .n(4)
33684 .k(1)
33685 .iterations(1)
33686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33687 }
33688 }
33689
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)33690 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
33691 for (uint32_t n = 1; n <= 4; n++) {
33692 GemmMicrokernelTester()
33693 .mr(1)
33694 .nr(4)
33695 .kr(1)
33696 .sr(1)
33697 .m(1)
33698 .n(n)
33699 .k(1)
33700 .iterations(1)
33701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33702 }
33703 }
33704
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)33705 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
33706 for (size_t k = 2; k < 10; k++) {
33707 GemmMicrokernelTester()
33708 .mr(1)
33709 .nr(4)
33710 .kr(1)
33711 .sr(1)
33712 .m(1)
33713 .n(4)
33714 .k(k)
33715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33716 }
33717 }
33718
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_strided_a)33719 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
33720 for (size_t k = 2; k < 10; k++) {
33721 GemmMicrokernelTester()
33722 .mr(1)
33723 .nr(4)
33724 .kr(1)
33725 .sr(1)
33726 .m(1)
33727 .n(4)
33728 .k(k)
33729 .a_stride(11)
33730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33731 }
33732 }
33733
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)33734 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
33735 for (size_t k = 2; k < 10; k++) {
33736 for (uint32_t n = 1; n <= 4; n++) {
33737 for (uint32_t m = 1; m <= 1; m++) {
33738 GemmMicrokernelTester()
33739 .mr(1)
33740 .nr(4)
33741 .kr(1)
33742 .sr(1)
33743 .m(m)
33744 .n(n)
33745 .k(k)
33746 .iterations(1)
33747 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33748 }
33749 }
33750 }
33751 }
33752
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)33753 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
33754 for (uint32_t n = 5; n < 8; n++) {
33755 for (size_t k = 1; k <= 5; k += 2) {
33756 GemmMicrokernelTester()
33757 .mr(1)
33758 .nr(4)
33759 .kr(1)
33760 .sr(1)
33761 .m(1)
33762 .n(n)
33763 .k(k)
33764 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33765 }
33766 }
33767 }
33768
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)33769 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
33770 for (uint32_t n = 5; n < 8; n++) {
33771 for (size_t k = 1; k <= 5; k += 2) {
33772 GemmMicrokernelTester()
33773 .mr(1)
33774 .nr(4)
33775 .kr(1)
33776 .sr(1)
33777 .m(1)
33778 .n(n)
33779 .k(k)
33780 .cn_stride(7)
33781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33782 }
33783 }
33784 }
33785
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_a)33786 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
33787 for (uint32_t n = 5; n < 8; n++) {
33788 for (size_t k = 1; k <= 5; k += 2) {
33789 GemmMicrokernelTester()
33790 .mr(1)
33791 .nr(4)
33792 .kr(1)
33793 .sr(1)
33794 .m(1)
33795 .n(n)
33796 .k(k)
33797 .a_stride(7)
33798 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33799 }
33800 }
33801 }
33802
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)33803 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
33804 for (uint32_t n = 5; n < 8; n++) {
33805 for (size_t k = 1; k <= 5; k += 2) {
33806 for (uint32_t m = 1; m <= 1; m++) {
33807 GemmMicrokernelTester()
33808 .mr(1)
33809 .nr(4)
33810 .kr(1)
33811 .sr(1)
33812 .m(m)
33813 .n(n)
33814 .k(k)
33815 .iterations(1)
33816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33817 }
33818 }
33819 }
33820 }
33821
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)33822 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
33823 for (uint32_t n = 8; n <= 12; n += 4) {
33824 for (size_t k = 1; k <= 5; k += 2) {
33825 GemmMicrokernelTester()
33826 .mr(1)
33827 .nr(4)
33828 .kr(1)
33829 .sr(1)
33830 .m(1)
33831 .n(n)
33832 .k(k)
33833 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33834 }
33835 }
33836 }
33837
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)33838 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
33839 for (uint32_t n = 8; n <= 12; n += 4) {
33840 for (size_t k = 1; k <= 5; k += 2) {
33841 GemmMicrokernelTester()
33842 .mr(1)
33843 .nr(4)
33844 .kr(1)
33845 .sr(1)
33846 .m(1)
33847 .n(n)
33848 .k(k)
33849 .cn_stride(7)
33850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33851 }
33852 }
33853 }
33854
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_a)33855 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_a) {
33856 for (uint32_t n = 8; n <= 12; n += 4) {
33857 for (size_t k = 1; k <= 5; k += 2) {
33858 GemmMicrokernelTester()
33859 .mr(1)
33860 .nr(4)
33861 .kr(1)
33862 .sr(1)
33863 .m(1)
33864 .n(n)
33865 .k(k)
33866 .a_stride(7)
33867 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33868 }
33869 }
33870 }
33871
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)33872 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
33873 for (uint32_t n = 8; n <= 12; n += 4) {
33874 for (size_t k = 1; k <= 5; k += 2) {
33875 for (uint32_t m = 1; m <= 1; m++) {
33876 GemmMicrokernelTester()
33877 .mr(1)
33878 .nr(4)
33879 .kr(1)
33880 .sr(1)
33881 .m(m)
33882 .n(n)
33883 .k(k)
33884 .iterations(1)
33885 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33886 }
33887 }
33888 }
33889 }
33890
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)33891 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
33892 for (size_t k = 1; k <= 5; k += 2) {
33893 for (uint32_t n = 1; n <= 4; n++) {
33894 for (uint32_t m = 1; m <= 1; m++) {
33895 GemmMicrokernelTester()
33896 .mr(1)
33897 .nr(4)
33898 .kr(1)
33899 .sr(1)
33900 .m(m)
33901 .n(n)
33902 .k(k)
33903 .cm_stride(7)
33904 .iterations(1)
33905 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33906 }
33907 }
33908 }
33909 }
33910
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)33911 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
33912 GemmMicrokernelTester()
33913 .mr(1)
33914 .nr(4)
33915 .kr(1)
33916 .sr(1)
33917 .m(1)
33918 .n(4)
33919 .k(1)
33920 .qmin(128)
33921 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33922 }
33923
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)33924 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
33925 GemmMicrokernelTester()
33926 .mr(1)
33927 .nr(4)
33928 .kr(1)
33929 .sr(1)
33930 .m(1)
33931 .n(4)
33932 .k(1)
33933 .qmax(128)
33934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33935 }
33936
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)33937 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
33938 GemmMicrokernelTester()
33939 .mr(1)
33940 .nr(4)
33941 .kr(1)
33942 .sr(1)
33943 .m(1)
33944 .n(4)
33945 .k(1)
33946 .cm_stride(7)
33947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33948 }
33949
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_a_zero_point)33950 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_a_zero_point) {
33951 for (size_t k = 1; k <= 5; k += 2) {
33952 GemmMicrokernelTester()
33953 .mr(1)
33954 .nr(4)
33955 .kr(1)
33956 .sr(1)
33957 .m(1)
33958 .n(4)
33959 .k(k)
33960 .a_zero_point(0)
33961 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33962 }
33963 }
33964
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_b_zero_point)33965 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_b_zero_point) {
33966 for (size_t k = 1; k <= 5; k += 2) {
33967 GemmMicrokernelTester()
33968 .mr(1)
33969 .nr(4)
33970 .kr(1)
33971 .sr(1)
33972 .m(1)
33973 .n(4)
33974 .k(k)
33975 .b_zero_point(0)
33976 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33977 }
33978 }
33979
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_zero_point)33980 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_zero_point) {
33981 for (size_t k = 1; k <= 5; k += 2) {
33982 GemmMicrokernelTester()
33983 .mr(1)
33984 .nr(4)
33985 .kr(1)
33986 .sr(1)
33987 .m(1)
33988 .n(4)
33989 .k(k)
33990 .a_zero_point(0)
33991 .b_zero_point(0)
33992 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33993 }
33994 }
33995
33996
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)33997 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
33998 GemmMicrokernelTester()
33999 .mr(1)
34000 .nr(4)
34001 .kr(1)
34002 .sr(1)
34003 .m(1)
34004 .n(4)
34005 .k(1)
34006 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34007 }
34008
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)34009 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
34010 GemmMicrokernelTester()
34011 .mr(1)
34012 .nr(4)
34013 .kr(1)
34014 .sr(1)
34015 .m(1)
34016 .n(4)
34017 .k(1)
34018 .cn_stride(7)
34019 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34020 }
34021
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_strided_a)34022 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
34023 GemmMicrokernelTester()
34024 .mr(1)
34025 .nr(4)
34026 .kr(1)
34027 .sr(1)
34028 .m(1)
34029 .n(4)
34030 .k(1)
34031 .a_stride(3)
34032 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34033 }
34034
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)34035 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
34036 for (uint32_t n = 1; n <= 4; n++) {
34037 for (uint32_t m = 1; m <= 1; m++) {
34038 GemmMicrokernelTester()
34039 .mr(1)
34040 .nr(4)
34041 .kr(1)
34042 .sr(1)
34043 .m(m)
34044 .n(n)
34045 .k(1)
34046 .iterations(1)
34047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34048 }
34049 }
34050 }
34051
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)34052 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
34053 for (uint32_t m = 1; m <= 1; m++) {
34054 GemmMicrokernelTester()
34055 .mr(1)
34056 .nr(4)
34057 .kr(1)
34058 .sr(1)
34059 .m(m)
34060 .n(4)
34061 .k(1)
34062 .iterations(1)
34063 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34064 }
34065 }
34066
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)34067 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
34068 for (uint32_t n = 1; n <= 4; n++) {
34069 GemmMicrokernelTester()
34070 .mr(1)
34071 .nr(4)
34072 .kr(1)
34073 .sr(1)
34074 .m(1)
34075 .n(n)
34076 .k(1)
34077 .iterations(1)
34078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34079 }
34080 }
34081
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)34082 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
34083 for (size_t k = 2; k < 10; k++) {
34084 GemmMicrokernelTester()
34085 .mr(1)
34086 .nr(4)
34087 .kr(1)
34088 .sr(1)
34089 .m(1)
34090 .n(4)
34091 .k(k)
34092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34093 }
34094 }
34095
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_strided_a)34096 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
34097 for (size_t k = 2; k < 10; k++) {
34098 GemmMicrokernelTester()
34099 .mr(1)
34100 .nr(4)
34101 .kr(1)
34102 .sr(1)
34103 .m(1)
34104 .n(4)
34105 .k(k)
34106 .a_stride(11)
34107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34108 }
34109 }
34110
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)34111 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
34112 for (size_t k = 2; k < 10; k++) {
34113 for (uint32_t n = 1; n <= 4; n++) {
34114 for (uint32_t m = 1; m <= 1; m++) {
34115 GemmMicrokernelTester()
34116 .mr(1)
34117 .nr(4)
34118 .kr(1)
34119 .sr(1)
34120 .m(m)
34121 .n(n)
34122 .k(k)
34123 .iterations(1)
34124 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34125 }
34126 }
34127 }
34128 }
34129
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)34130 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
34131 for (uint32_t n = 5; n < 8; n++) {
34132 for (size_t k = 1; k <= 5; k += 2) {
34133 GemmMicrokernelTester()
34134 .mr(1)
34135 .nr(4)
34136 .kr(1)
34137 .sr(1)
34138 .m(1)
34139 .n(n)
34140 .k(k)
34141 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34142 }
34143 }
34144 }
34145
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)34146 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
34147 for (uint32_t n = 5; n < 8; n++) {
34148 for (size_t k = 1; k <= 5; k += 2) {
34149 GemmMicrokernelTester()
34150 .mr(1)
34151 .nr(4)
34152 .kr(1)
34153 .sr(1)
34154 .m(1)
34155 .n(n)
34156 .k(k)
34157 .cn_stride(7)
34158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34159 }
34160 }
34161 }
34162
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_a)34163 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
34164 for (uint32_t n = 5; n < 8; n++) {
34165 for (size_t k = 1; k <= 5; k += 2) {
34166 GemmMicrokernelTester()
34167 .mr(1)
34168 .nr(4)
34169 .kr(1)
34170 .sr(1)
34171 .m(1)
34172 .n(n)
34173 .k(k)
34174 .a_stride(7)
34175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34176 }
34177 }
34178 }
34179
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)34180 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
34181 for (uint32_t n = 5; n < 8; n++) {
34182 for (size_t k = 1; k <= 5; k += 2) {
34183 for (uint32_t m = 1; m <= 1; m++) {
34184 GemmMicrokernelTester()
34185 .mr(1)
34186 .nr(4)
34187 .kr(1)
34188 .sr(1)
34189 .m(m)
34190 .n(n)
34191 .k(k)
34192 .iterations(1)
34193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34194 }
34195 }
34196 }
34197 }
34198
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)34199 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
34200 for (uint32_t n = 8; n <= 12; n += 4) {
34201 for (size_t k = 1; k <= 5; k += 2) {
34202 GemmMicrokernelTester()
34203 .mr(1)
34204 .nr(4)
34205 .kr(1)
34206 .sr(1)
34207 .m(1)
34208 .n(n)
34209 .k(k)
34210 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34211 }
34212 }
34213 }
34214
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)34215 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
34216 for (uint32_t n = 8; n <= 12; n += 4) {
34217 for (size_t k = 1; k <= 5; k += 2) {
34218 GemmMicrokernelTester()
34219 .mr(1)
34220 .nr(4)
34221 .kr(1)
34222 .sr(1)
34223 .m(1)
34224 .n(n)
34225 .k(k)
34226 .cn_stride(7)
34227 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34228 }
34229 }
34230 }
34231
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_a)34232 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
34233 for (uint32_t n = 8; n <= 12; n += 4) {
34234 for (size_t k = 1; k <= 5; k += 2) {
34235 GemmMicrokernelTester()
34236 .mr(1)
34237 .nr(4)
34238 .kr(1)
34239 .sr(1)
34240 .m(1)
34241 .n(n)
34242 .k(k)
34243 .a_stride(7)
34244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34245 }
34246 }
34247 }
34248
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)34249 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
34250 for (uint32_t n = 8; n <= 12; n += 4) {
34251 for (size_t k = 1; k <= 5; k += 2) {
34252 for (uint32_t m = 1; m <= 1; m++) {
34253 GemmMicrokernelTester()
34254 .mr(1)
34255 .nr(4)
34256 .kr(1)
34257 .sr(1)
34258 .m(m)
34259 .n(n)
34260 .k(k)
34261 .iterations(1)
34262 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34263 }
34264 }
34265 }
34266 }
34267
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)34268 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
34269 for (size_t k = 1; k <= 5; k += 2) {
34270 for (uint32_t n = 1; n <= 4; n++) {
34271 for (uint32_t m = 1; m <= 1; m++) {
34272 GemmMicrokernelTester()
34273 .mr(1)
34274 .nr(4)
34275 .kr(1)
34276 .sr(1)
34277 .m(m)
34278 .n(n)
34279 .k(k)
34280 .cm_stride(7)
34281 .iterations(1)
34282 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34283 }
34284 }
34285 }
34286 }
34287
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)34288 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
34289 GemmMicrokernelTester()
34290 .mr(1)
34291 .nr(4)
34292 .kr(1)
34293 .sr(1)
34294 .m(1)
34295 .n(4)
34296 .k(1)
34297 .qmin(128)
34298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34299 }
34300
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)34301 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
34302 GemmMicrokernelTester()
34303 .mr(1)
34304 .nr(4)
34305 .kr(1)
34306 .sr(1)
34307 .m(1)
34308 .n(4)
34309 .k(1)
34310 .qmax(128)
34311 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34312 }
34313
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)34314 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
34315 GemmMicrokernelTester()
34316 .mr(1)
34317 .nr(4)
34318 .kr(1)
34319 .sr(1)
34320 .m(1)
34321 .n(4)
34322 .k(1)
34323 .cm_stride(7)
34324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34325 }
34326
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_a_zero_point)34327 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
34328 for (size_t k = 1; k <= 5; k += 2) {
34329 GemmMicrokernelTester()
34330 .mr(1)
34331 .nr(4)
34332 .kr(1)
34333 .sr(1)
34334 .m(1)
34335 .n(4)
34336 .k(k)
34337 .a_zero_point(0)
34338 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34339 }
34340 }
34341
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_b_zero_point)34342 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
34343 for (size_t k = 1; k <= 5; k += 2) {
34344 GemmMicrokernelTester()
34345 .mr(1)
34346 .nr(4)
34347 .kr(1)
34348 .sr(1)
34349 .m(1)
34350 .n(4)
34351 .k(k)
34352 .b_zero_point(0)
34353 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34354 }
34355 }
34356
TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_zero_point)34357 TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
34358 for (size_t k = 1; k <= 5; k += 2) {
34359 GemmMicrokernelTester()
34360 .mr(1)
34361 .nr(4)
34362 .kr(1)
34363 .sr(1)
34364 .m(1)
34365 .n(4)
34366 .k(k)
34367 .a_zero_point(0)
34368 .b_zero_point(0)
34369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34370 }
34371 }
34372
34373
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)34374 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
34375 GemmMicrokernelTester()
34376 .mr(2)
34377 .nr(2)
34378 .kr(1)
34379 .sr(1)
34380 .m(2)
34381 .n(2)
34382 .k(1)
34383 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34384 }
34385
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)34386 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
34387 GemmMicrokernelTester()
34388 .mr(2)
34389 .nr(2)
34390 .kr(1)
34391 .sr(1)
34392 .m(2)
34393 .n(2)
34394 .k(1)
34395 .cn_stride(5)
34396 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34397 }
34398
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_strided_a)34399 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
34400 GemmMicrokernelTester()
34401 .mr(2)
34402 .nr(2)
34403 .kr(1)
34404 .sr(1)
34405 .m(2)
34406 .n(2)
34407 .k(1)
34408 .a_stride(3)
34409 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34410 }
34411
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)34412 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
34413 for (uint32_t n = 1; n <= 2; n++) {
34414 for (uint32_t m = 1; m <= 2; m++) {
34415 GemmMicrokernelTester()
34416 .mr(2)
34417 .nr(2)
34418 .kr(1)
34419 .sr(1)
34420 .m(m)
34421 .n(n)
34422 .k(1)
34423 .iterations(1)
34424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34425 }
34426 }
34427 }
34428
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)34429 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
34430 for (uint32_t m = 1; m <= 2; m++) {
34431 GemmMicrokernelTester()
34432 .mr(2)
34433 .nr(2)
34434 .kr(1)
34435 .sr(1)
34436 .m(m)
34437 .n(2)
34438 .k(1)
34439 .iterations(1)
34440 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34441 }
34442 }
34443
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)34444 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
34445 for (uint32_t n = 1; n <= 2; n++) {
34446 GemmMicrokernelTester()
34447 .mr(2)
34448 .nr(2)
34449 .kr(1)
34450 .sr(1)
34451 .m(2)
34452 .n(n)
34453 .k(1)
34454 .iterations(1)
34455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34456 }
34457 }
34458
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)34459 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
34460 for (size_t k = 2; k < 10; k++) {
34461 GemmMicrokernelTester()
34462 .mr(2)
34463 .nr(2)
34464 .kr(1)
34465 .sr(1)
34466 .m(2)
34467 .n(2)
34468 .k(k)
34469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34470 }
34471 }
34472
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_strided_a)34473 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
34474 for (size_t k = 2; k < 10; k++) {
34475 GemmMicrokernelTester()
34476 .mr(2)
34477 .nr(2)
34478 .kr(1)
34479 .sr(1)
34480 .m(2)
34481 .n(2)
34482 .k(k)
34483 .a_stride(11)
34484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34485 }
34486 }
34487
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)34488 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
34489 for (size_t k = 2; k < 10; k++) {
34490 for (uint32_t n = 1; n <= 2; n++) {
34491 for (uint32_t m = 1; m <= 2; m++) {
34492 GemmMicrokernelTester()
34493 .mr(2)
34494 .nr(2)
34495 .kr(1)
34496 .sr(1)
34497 .m(m)
34498 .n(n)
34499 .k(k)
34500 .iterations(1)
34501 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34502 }
34503 }
34504 }
34505 }
34506
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)34507 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
34508 for (uint32_t n = 3; n < 4; n++) {
34509 for (size_t k = 1; k <= 5; k += 2) {
34510 GemmMicrokernelTester()
34511 .mr(2)
34512 .nr(2)
34513 .kr(1)
34514 .sr(1)
34515 .m(2)
34516 .n(n)
34517 .k(k)
34518 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34519 }
34520 }
34521 }
34522
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)34523 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
34524 for (uint32_t n = 3; n < 4; n++) {
34525 for (size_t k = 1; k <= 5; k += 2) {
34526 GemmMicrokernelTester()
34527 .mr(2)
34528 .nr(2)
34529 .kr(1)
34530 .sr(1)
34531 .m(2)
34532 .n(n)
34533 .k(k)
34534 .cn_stride(5)
34535 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34536 }
34537 }
34538 }
34539
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_a)34540 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
34541 for (uint32_t n = 3; n < 4; n++) {
34542 for (size_t k = 1; k <= 5; k += 2) {
34543 GemmMicrokernelTester()
34544 .mr(2)
34545 .nr(2)
34546 .kr(1)
34547 .sr(1)
34548 .m(2)
34549 .n(n)
34550 .k(k)
34551 .a_stride(7)
34552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34553 }
34554 }
34555 }
34556
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)34557 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
34558 for (uint32_t n = 3; n < 4; n++) {
34559 for (size_t k = 1; k <= 5; k += 2) {
34560 for (uint32_t m = 1; m <= 2; m++) {
34561 GemmMicrokernelTester()
34562 .mr(2)
34563 .nr(2)
34564 .kr(1)
34565 .sr(1)
34566 .m(m)
34567 .n(n)
34568 .k(k)
34569 .iterations(1)
34570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34571 }
34572 }
34573 }
34574 }
34575
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)34576 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
34577 for (uint32_t n = 4; n <= 6; n += 2) {
34578 for (size_t k = 1; k <= 5; k += 2) {
34579 GemmMicrokernelTester()
34580 .mr(2)
34581 .nr(2)
34582 .kr(1)
34583 .sr(1)
34584 .m(2)
34585 .n(n)
34586 .k(k)
34587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34588 }
34589 }
34590 }
34591
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)34592 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
34593 for (uint32_t n = 4; n <= 6; n += 2) {
34594 for (size_t k = 1; k <= 5; k += 2) {
34595 GemmMicrokernelTester()
34596 .mr(2)
34597 .nr(2)
34598 .kr(1)
34599 .sr(1)
34600 .m(2)
34601 .n(n)
34602 .k(k)
34603 .cn_stride(5)
34604 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34605 }
34606 }
34607 }
34608
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_a)34609 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_a) {
34610 for (uint32_t n = 4; n <= 6; n += 2) {
34611 for (size_t k = 1; k <= 5; k += 2) {
34612 GemmMicrokernelTester()
34613 .mr(2)
34614 .nr(2)
34615 .kr(1)
34616 .sr(1)
34617 .m(2)
34618 .n(n)
34619 .k(k)
34620 .a_stride(7)
34621 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34622 }
34623 }
34624 }
34625
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)34626 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
34627 for (uint32_t n = 4; n <= 6; n += 2) {
34628 for (size_t k = 1; k <= 5; k += 2) {
34629 for (uint32_t m = 1; m <= 2; m++) {
34630 GemmMicrokernelTester()
34631 .mr(2)
34632 .nr(2)
34633 .kr(1)
34634 .sr(1)
34635 .m(m)
34636 .n(n)
34637 .k(k)
34638 .iterations(1)
34639 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34640 }
34641 }
34642 }
34643 }
34644
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)34645 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
34646 for (size_t k = 1; k <= 5; k += 2) {
34647 for (uint32_t n = 1; n <= 2; n++) {
34648 for (uint32_t m = 1; m <= 2; m++) {
34649 GemmMicrokernelTester()
34650 .mr(2)
34651 .nr(2)
34652 .kr(1)
34653 .sr(1)
34654 .m(m)
34655 .n(n)
34656 .k(k)
34657 .cm_stride(5)
34658 .iterations(1)
34659 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34660 }
34661 }
34662 }
34663 }
34664
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)34665 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
34666 GemmMicrokernelTester()
34667 .mr(2)
34668 .nr(2)
34669 .kr(1)
34670 .sr(1)
34671 .m(2)
34672 .n(2)
34673 .k(1)
34674 .qmin(128)
34675 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34676 }
34677
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)34678 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
34679 GemmMicrokernelTester()
34680 .mr(2)
34681 .nr(2)
34682 .kr(1)
34683 .sr(1)
34684 .m(2)
34685 .n(2)
34686 .k(1)
34687 .qmax(128)
34688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34689 }
34690
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)34691 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
34692 GemmMicrokernelTester()
34693 .mr(2)
34694 .nr(2)
34695 .kr(1)
34696 .sr(1)
34697 .m(2)
34698 .n(2)
34699 .k(1)
34700 .cm_stride(5)
34701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34702 }
34703
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_a_zero_point)34704 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_a_zero_point) {
34705 for (size_t k = 1; k <= 5; k += 2) {
34706 GemmMicrokernelTester()
34707 .mr(2)
34708 .nr(2)
34709 .kr(1)
34710 .sr(1)
34711 .m(2)
34712 .n(2)
34713 .k(k)
34714 .a_zero_point(0)
34715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34716 }
34717 }
34718
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_b_zero_point)34719 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_b_zero_point) {
34720 for (size_t k = 1; k <= 5; k += 2) {
34721 GemmMicrokernelTester()
34722 .mr(2)
34723 .nr(2)
34724 .kr(1)
34725 .sr(1)
34726 .m(2)
34727 .n(2)
34728 .k(k)
34729 .b_zero_point(0)
34730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34731 }
34732 }
34733
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_zero_point)34734 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_zero_point) {
34735 for (size_t k = 1; k <= 5; k += 2) {
34736 GemmMicrokernelTester()
34737 .mr(2)
34738 .nr(2)
34739 .kr(1)
34740 .sr(1)
34741 .m(2)
34742 .n(2)
34743 .k(k)
34744 .a_zero_point(0)
34745 .b_zero_point(0)
34746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34747 }
34748 }
34749
34750
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)34751 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
34752 GemmMicrokernelTester()
34753 .mr(2)
34754 .nr(2)
34755 .kr(1)
34756 .sr(1)
34757 .m(2)
34758 .n(2)
34759 .k(1)
34760 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34761 }
34762
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)34763 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
34764 GemmMicrokernelTester()
34765 .mr(2)
34766 .nr(2)
34767 .kr(1)
34768 .sr(1)
34769 .m(2)
34770 .n(2)
34771 .k(1)
34772 .cn_stride(5)
34773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34774 }
34775
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_strided_a)34776 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
34777 GemmMicrokernelTester()
34778 .mr(2)
34779 .nr(2)
34780 .kr(1)
34781 .sr(1)
34782 .m(2)
34783 .n(2)
34784 .k(1)
34785 .a_stride(3)
34786 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34787 }
34788
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)34789 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
34790 for (uint32_t n = 1; n <= 2; n++) {
34791 for (uint32_t m = 1; m <= 2; m++) {
34792 GemmMicrokernelTester()
34793 .mr(2)
34794 .nr(2)
34795 .kr(1)
34796 .sr(1)
34797 .m(m)
34798 .n(n)
34799 .k(1)
34800 .iterations(1)
34801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34802 }
34803 }
34804 }
34805
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)34806 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
34807 for (uint32_t m = 1; m <= 2; m++) {
34808 GemmMicrokernelTester()
34809 .mr(2)
34810 .nr(2)
34811 .kr(1)
34812 .sr(1)
34813 .m(m)
34814 .n(2)
34815 .k(1)
34816 .iterations(1)
34817 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34818 }
34819 }
34820
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)34821 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
34822 for (uint32_t n = 1; n <= 2; n++) {
34823 GemmMicrokernelTester()
34824 .mr(2)
34825 .nr(2)
34826 .kr(1)
34827 .sr(1)
34828 .m(2)
34829 .n(n)
34830 .k(1)
34831 .iterations(1)
34832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34833 }
34834 }
34835
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)34836 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
34837 for (size_t k = 2; k < 10; k++) {
34838 GemmMicrokernelTester()
34839 .mr(2)
34840 .nr(2)
34841 .kr(1)
34842 .sr(1)
34843 .m(2)
34844 .n(2)
34845 .k(k)
34846 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34847 }
34848 }
34849
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_strided_a)34850 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
34851 for (size_t k = 2; k < 10; k++) {
34852 GemmMicrokernelTester()
34853 .mr(2)
34854 .nr(2)
34855 .kr(1)
34856 .sr(1)
34857 .m(2)
34858 .n(2)
34859 .k(k)
34860 .a_stride(11)
34861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34862 }
34863 }
34864
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)34865 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
34866 for (size_t k = 2; k < 10; k++) {
34867 for (uint32_t n = 1; n <= 2; n++) {
34868 for (uint32_t m = 1; m <= 2; m++) {
34869 GemmMicrokernelTester()
34870 .mr(2)
34871 .nr(2)
34872 .kr(1)
34873 .sr(1)
34874 .m(m)
34875 .n(n)
34876 .k(k)
34877 .iterations(1)
34878 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34879 }
34880 }
34881 }
34882 }
34883
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)34884 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
34885 for (uint32_t n = 3; n < 4; n++) {
34886 for (size_t k = 1; k <= 5; k += 2) {
34887 GemmMicrokernelTester()
34888 .mr(2)
34889 .nr(2)
34890 .kr(1)
34891 .sr(1)
34892 .m(2)
34893 .n(n)
34894 .k(k)
34895 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34896 }
34897 }
34898 }
34899
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)34900 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
34901 for (uint32_t n = 3; n < 4; n++) {
34902 for (size_t k = 1; k <= 5; k += 2) {
34903 GemmMicrokernelTester()
34904 .mr(2)
34905 .nr(2)
34906 .kr(1)
34907 .sr(1)
34908 .m(2)
34909 .n(n)
34910 .k(k)
34911 .cn_stride(5)
34912 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34913 }
34914 }
34915 }
34916
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_a)34917 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
34918 for (uint32_t n = 3; n < 4; n++) {
34919 for (size_t k = 1; k <= 5; k += 2) {
34920 GemmMicrokernelTester()
34921 .mr(2)
34922 .nr(2)
34923 .kr(1)
34924 .sr(1)
34925 .m(2)
34926 .n(n)
34927 .k(k)
34928 .a_stride(7)
34929 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34930 }
34931 }
34932 }
34933
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)34934 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
34935 for (uint32_t n = 3; n < 4; n++) {
34936 for (size_t k = 1; k <= 5; k += 2) {
34937 for (uint32_t m = 1; m <= 2; m++) {
34938 GemmMicrokernelTester()
34939 .mr(2)
34940 .nr(2)
34941 .kr(1)
34942 .sr(1)
34943 .m(m)
34944 .n(n)
34945 .k(k)
34946 .iterations(1)
34947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34948 }
34949 }
34950 }
34951 }
34952
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)34953 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
34954 for (uint32_t n = 4; n <= 6; n += 2) {
34955 for (size_t k = 1; k <= 5; k += 2) {
34956 GemmMicrokernelTester()
34957 .mr(2)
34958 .nr(2)
34959 .kr(1)
34960 .sr(1)
34961 .m(2)
34962 .n(n)
34963 .k(k)
34964 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34965 }
34966 }
34967 }
34968
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)34969 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
34970 for (uint32_t n = 4; n <= 6; n += 2) {
34971 for (size_t k = 1; k <= 5; k += 2) {
34972 GemmMicrokernelTester()
34973 .mr(2)
34974 .nr(2)
34975 .kr(1)
34976 .sr(1)
34977 .m(2)
34978 .n(n)
34979 .k(k)
34980 .cn_stride(5)
34981 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34982 }
34983 }
34984 }
34985
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_a)34986 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
34987 for (uint32_t n = 4; n <= 6; n += 2) {
34988 for (size_t k = 1; k <= 5; k += 2) {
34989 GemmMicrokernelTester()
34990 .mr(2)
34991 .nr(2)
34992 .kr(1)
34993 .sr(1)
34994 .m(2)
34995 .n(n)
34996 .k(k)
34997 .a_stride(7)
34998 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
34999 }
35000 }
35001 }
35002
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)35003 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
35004 for (uint32_t n = 4; n <= 6; n += 2) {
35005 for (size_t k = 1; k <= 5; k += 2) {
35006 for (uint32_t m = 1; m <= 2; m++) {
35007 GemmMicrokernelTester()
35008 .mr(2)
35009 .nr(2)
35010 .kr(1)
35011 .sr(1)
35012 .m(m)
35013 .n(n)
35014 .k(k)
35015 .iterations(1)
35016 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35017 }
35018 }
35019 }
35020 }
35021
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)35022 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
35023 for (size_t k = 1; k <= 5; k += 2) {
35024 for (uint32_t n = 1; n <= 2; n++) {
35025 for (uint32_t m = 1; m <= 2; m++) {
35026 GemmMicrokernelTester()
35027 .mr(2)
35028 .nr(2)
35029 .kr(1)
35030 .sr(1)
35031 .m(m)
35032 .n(n)
35033 .k(k)
35034 .cm_stride(5)
35035 .iterations(1)
35036 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35037 }
35038 }
35039 }
35040 }
35041
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)35042 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
35043 GemmMicrokernelTester()
35044 .mr(2)
35045 .nr(2)
35046 .kr(1)
35047 .sr(1)
35048 .m(2)
35049 .n(2)
35050 .k(1)
35051 .qmin(128)
35052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35053 }
35054
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)35055 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
35056 GemmMicrokernelTester()
35057 .mr(2)
35058 .nr(2)
35059 .kr(1)
35060 .sr(1)
35061 .m(2)
35062 .n(2)
35063 .k(1)
35064 .qmax(128)
35065 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35066 }
35067
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)35068 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
35069 GemmMicrokernelTester()
35070 .mr(2)
35071 .nr(2)
35072 .kr(1)
35073 .sr(1)
35074 .m(2)
35075 .n(2)
35076 .k(1)
35077 .cm_stride(5)
35078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35079 }
35080
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_a_zero_point)35081 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
35082 for (size_t k = 1; k <= 5; k += 2) {
35083 GemmMicrokernelTester()
35084 .mr(2)
35085 .nr(2)
35086 .kr(1)
35087 .sr(1)
35088 .m(2)
35089 .n(2)
35090 .k(k)
35091 .a_zero_point(0)
35092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35093 }
35094 }
35095
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_b_zero_point)35096 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
35097 for (size_t k = 1; k <= 5; k += 2) {
35098 GemmMicrokernelTester()
35099 .mr(2)
35100 .nr(2)
35101 .kr(1)
35102 .sr(1)
35103 .m(2)
35104 .n(2)
35105 .k(k)
35106 .b_zero_point(0)
35107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35108 }
35109 }
35110
TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_zero_point)35111 TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
35112 for (size_t k = 1; k <= 5; k += 2) {
35113 GemmMicrokernelTester()
35114 .mr(2)
35115 .nr(2)
35116 .kr(1)
35117 .sr(1)
35118 .m(2)
35119 .n(2)
35120 .k(k)
35121 .a_zero_point(0)
35122 .b_zero_point(0)
35123 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35124 }
35125 }
35126
35127
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)35128 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
35129 GemmMicrokernelTester()
35130 .mr(2)
35131 .nr(4)
35132 .kr(1)
35133 .sr(1)
35134 .m(2)
35135 .n(4)
35136 .k(1)
35137 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35138 }
35139
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)35140 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
35141 GemmMicrokernelTester()
35142 .mr(2)
35143 .nr(4)
35144 .kr(1)
35145 .sr(1)
35146 .m(2)
35147 .n(4)
35148 .k(1)
35149 .cn_stride(7)
35150 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35151 }
35152
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_strided_a)35153 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
35154 GemmMicrokernelTester()
35155 .mr(2)
35156 .nr(4)
35157 .kr(1)
35158 .sr(1)
35159 .m(2)
35160 .n(4)
35161 .k(1)
35162 .a_stride(3)
35163 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35164 }
35165
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)35166 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
35167 for (uint32_t n = 1; n <= 4; n++) {
35168 for (uint32_t m = 1; m <= 2; m++) {
35169 GemmMicrokernelTester()
35170 .mr(2)
35171 .nr(4)
35172 .kr(1)
35173 .sr(1)
35174 .m(m)
35175 .n(n)
35176 .k(1)
35177 .iterations(1)
35178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35179 }
35180 }
35181 }
35182
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)35183 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
35184 for (uint32_t m = 1; m <= 2; m++) {
35185 GemmMicrokernelTester()
35186 .mr(2)
35187 .nr(4)
35188 .kr(1)
35189 .sr(1)
35190 .m(m)
35191 .n(4)
35192 .k(1)
35193 .iterations(1)
35194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35195 }
35196 }
35197
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)35198 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
35199 for (uint32_t n = 1; n <= 4; n++) {
35200 GemmMicrokernelTester()
35201 .mr(2)
35202 .nr(4)
35203 .kr(1)
35204 .sr(1)
35205 .m(2)
35206 .n(n)
35207 .k(1)
35208 .iterations(1)
35209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35210 }
35211 }
35212
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)35213 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
35214 for (size_t k = 2; k < 10; k++) {
35215 GemmMicrokernelTester()
35216 .mr(2)
35217 .nr(4)
35218 .kr(1)
35219 .sr(1)
35220 .m(2)
35221 .n(4)
35222 .k(k)
35223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35224 }
35225 }
35226
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_strided_a)35227 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
35228 for (size_t k = 2; k < 10; k++) {
35229 GemmMicrokernelTester()
35230 .mr(2)
35231 .nr(4)
35232 .kr(1)
35233 .sr(1)
35234 .m(2)
35235 .n(4)
35236 .k(k)
35237 .a_stride(11)
35238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35239 }
35240 }
35241
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)35242 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
35243 for (size_t k = 2; k < 10; k++) {
35244 for (uint32_t n = 1; n <= 4; n++) {
35245 for (uint32_t m = 1; m <= 2; m++) {
35246 GemmMicrokernelTester()
35247 .mr(2)
35248 .nr(4)
35249 .kr(1)
35250 .sr(1)
35251 .m(m)
35252 .n(n)
35253 .k(k)
35254 .iterations(1)
35255 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35256 }
35257 }
35258 }
35259 }
35260
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)35261 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
35262 for (uint32_t n = 5; n < 8; n++) {
35263 for (size_t k = 1; k <= 5; k += 2) {
35264 GemmMicrokernelTester()
35265 .mr(2)
35266 .nr(4)
35267 .kr(1)
35268 .sr(1)
35269 .m(2)
35270 .n(n)
35271 .k(k)
35272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35273 }
35274 }
35275 }
35276
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)35277 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
35278 for (uint32_t n = 5; n < 8; n++) {
35279 for (size_t k = 1; k <= 5; k += 2) {
35280 GemmMicrokernelTester()
35281 .mr(2)
35282 .nr(4)
35283 .kr(1)
35284 .sr(1)
35285 .m(2)
35286 .n(n)
35287 .k(k)
35288 .cn_stride(7)
35289 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35290 }
35291 }
35292 }
35293
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_a)35294 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
35295 for (uint32_t n = 5; n < 8; n++) {
35296 for (size_t k = 1; k <= 5; k += 2) {
35297 GemmMicrokernelTester()
35298 .mr(2)
35299 .nr(4)
35300 .kr(1)
35301 .sr(1)
35302 .m(2)
35303 .n(n)
35304 .k(k)
35305 .a_stride(7)
35306 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35307 }
35308 }
35309 }
35310
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)35311 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
35312 for (uint32_t n = 5; n < 8; n++) {
35313 for (size_t k = 1; k <= 5; k += 2) {
35314 for (uint32_t m = 1; m <= 2; m++) {
35315 GemmMicrokernelTester()
35316 .mr(2)
35317 .nr(4)
35318 .kr(1)
35319 .sr(1)
35320 .m(m)
35321 .n(n)
35322 .k(k)
35323 .iterations(1)
35324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35325 }
35326 }
35327 }
35328 }
35329
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)35330 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
35331 for (uint32_t n = 8; n <= 12; n += 4) {
35332 for (size_t k = 1; k <= 5; k += 2) {
35333 GemmMicrokernelTester()
35334 .mr(2)
35335 .nr(4)
35336 .kr(1)
35337 .sr(1)
35338 .m(2)
35339 .n(n)
35340 .k(k)
35341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35342 }
35343 }
35344 }
35345
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)35346 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
35347 for (uint32_t n = 8; n <= 12; n += 4) {
35348 for (size_t k = 1; k <= 5; k += 2) {
35349 GemmMicrokernelTester()
35350 .mr(2)
35351 .nr(4)
35352 .kr(1)
35353 .sr(1)
35354 .m(2)
35355 .n(n)
35356 .k(k)
35357 .cn_stride(7)
35358 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35359 }
35360 }
35361 }
35362
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_a)35363 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_a) {
35364 for (uint32_t n = 8; n <= 12; n += 4) {
35365 for (size_t k = 1; k <= 5; k += 2) {
35366 GemmMicrokernelTester()
35367 .mr(2)
35368 .nr(4)
35369 .kr(1)
35370 .sr(1)
35371 .m(2)
35372 .n(n)
35373 .k(k)
35374 .a_stride(7)
35375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35376 }
35377 }
35378 }
35379
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)35380 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
35381 for (uint32_t n = 8; n <= 12; n += 4) {
35382 for (size_t k = 1; k <= 5; k += 2) {
35383 for (uint32_t m = 1; m <= 2; m++) {
35384 GemmMicrokernelTester()
35385 .mr(2)
35386 .nr(4)
35387 .kr(1)
35388 .sr(1)
35389 .m(m)
35390 .n(n)
35391 .k(k)
35392 .iterations(1)
35393 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35394 }
35395 }
35396 }
35397 }
35398
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)35399 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
35400 for (size_t k = 1; k <= 5; k += 2) {
35401 for (uint32_t n = 1; n <= 4; n++) {
35402 for (uint32_t m = 1; m <= 2; m++) {
35403 GemmMicrokernelTester()
35404 .mr(2)
35405 .nr(4)
35406 .kr(1)
35407 .sr(1)
35408 .m(m)
35409 .n(n)
35410 .k(k)
35411 .cm_stride(7)
35412 .iterations(1)
35413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35414 }
35415 }
35416 }
35417 }
35418
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)35419 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
35420 GemmMicrokernelTester()
35421 .mr(2)
35422 .nr(4)
35423 .kr(1)
35424 .sr(1)
35425 .m(2)
35426 .n(4)
35427 .k(1)
35428 .qmin(128)
35429 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35430 }
35431
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)35432 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
35433 GemmMicrokernelTester()
35434 .mr(2)
35435 .nr(4)
35436 .kr(1)
35437 .sr(1)
35438 .m(2)
35439 .n(4)
35440 .k(1)
35441 .qmax(128)
35442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35443 }
35444
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)35445 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
35446 GemmMicrokernelTester()
35447 .mr(2)
35448 .nr(4)
35449 .kr(1)
35450 .sr(1)
35451 .m(2)
35452 .n(4)
35453 .k(1)
35454 .cm_stride(7)
35455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35456 }
35457
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_a_zero_point)35458 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_a_zero_point) {
35459 for (size_t k = 1; k <= 5; k += 2) {
35460 GemmMicrokernelTester()
35461 .mr(2)
35462 .nr(4)
35463 .kr(1)
35464 .sr(1)
35465 .m(2)
35466 .n(4)
35467 .k(k)
35468 .a_zero_point(0)
35469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35470 }
35471 }
35472
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_b_zero_point)35473 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_b_zero_point) {
35474 for (size_t k = 1; k <= 5; k += 2) {
35475 GemmMicrokernelTester()
35476 .mr(2)
35477 .nr(4)
35478 .kr(1)
35479 .sr(1)
35480 .m(2)
35481 .n(4)
35482 .k(k)
35483 .b_zero_point(0)
35484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35485 }
35486 }
35487
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_zero_point)35488 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_zero_point) {
35489 for (size_t k = 1; k <= 5; k += 2) {
35490 GemmMicrokernelTester()
35491 .mr(2)
35492 .nr(4)
35493 .kr(1)
35494 .sr(1)
35495 .m(2)
35496 .n(4)
35497 .k(k)
35498 .a_zero_point(0)
35499 .b_zero_point(0)
35500 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35501 }
35502 }
35503
35504
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)35505 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
35506 GemmMicrokernelTester()
35507 .mr(2)
35508 .nr(4)
35509 .kr(1)
35510 .sr(1)
35511 .m(2)
35512 .n(4)
35513 .k(1)
35514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35515 }
35516
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)35517 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
35518 GemmMicrokernelTester()
35519 .mr(2)
35520 .nr(4)
35521 .kr(1)
35522 .sr(1)
35523 .m(2)
35524 .n(4)
35525 .k(1)
35526 .cn_stride(7)
35527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35528 }
35529
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_strided_a)35530 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
35531 GemmMicrokernelTester()
35532 .mr(2)
35533 .nr(4)
35534 .kr(1)
35535 .sr(1)
35536 .m(2)
35537 .n(4)
35538 .k(1)
35539 .a_stride(3)
35540 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35541 }
35542
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)35543 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
35544 for (uint32_t n = 1; n <= 4; n++) {
35545 for (uint32_t m = 1; m <= 2; m++) {
35546 GemmMicrokernelTester()
35547 .mr(2)
35548 .nr(4)
35549 .kr(1)
35550 .sr(1)
35551 .m(m)
35552 .n(n)
35553 .k(1)
35554 .iterations(1)
35555 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35556 }
35557 }
35558 }
35559
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)35560 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
35561 for (uint32_t m = 1; m <= 2; m++) {
35562 GemmMicrokernelTester()
35563 .mr(2)
35564 .nr(4)
35565 .kr(1)
35566 .sr(1)
35567 .m(m)
35568 .n(4)
35569 .k(1)
35570 .iterations(1)
35571 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35572 }
35573 }
35574
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)35575 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
35576 for (uint32_t n = 1; n <= 4; n++) {
35577 GemmMicrokernelTester()
35578 .mr(2)
35579 .nr(4)
35580 .kr(1)
35581 .sr(1)
35582 .m(2)
35583 .n(n)
35584 .k(1)
35585 .iterations(1)
35586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35587 }
35588 }
35589
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)35590 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
35591 for (size_t k = 2; k < 10; k++) {
35592 GemmMicrokernelTester()
35593 .mr(2)
35594 .nr(4)
35595 .kr(1)
35596 .sr(1)
35597 .m(2)
35598 .n(4)
35599 .k(k)
35600 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35601 }
35602 }
35603
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_strided_a)35604 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
35605 for (size_t k = 2; k < 10; k++) {
35606 GemmMicrokernelTester()
35607 .mr(2)
35608 .nr(4)
35609 .kr(1)
35610 .sr(1)
35611 .m(2)
35612 .n(4)
35613 .k(k)
35614 .a_stride(11)
35615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35616 }
35617 }
35618
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)35619 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
35620 for (size_t k = 2; k < 10; k++) {
35621 for (uint32_t n = 1; n <= 4; n++) {
35622 for (uint32_t m = 1; m <= 2; m++) {
35623 GemmMicrokernelTester()
35624 .mr(2)
35625 .nr(4)
35626 .kr(1)
35627 .sr(1)
35628 .m(m)
35629 .n(n)
35630 .k(k)
35631 .iterations(1)
35632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35633 }
35634 }
35635 }
35636 }
35637
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)35638 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
35639 for (uint32_t n = 5; n < 8; n++) {
35640 for (size_t k = 1; k <= 5; k += 2) {
35641 GemmMicrokernelTester()
35642 .mr(2)
35643 .nr(4)
35644 .kr(1)
35645 .sr(1)
35646 .m(2)
35647 .n(n)
35648 .k(k)
35649 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35650 }
35651 }
35652 }
35653
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)35654 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
35655 for (uint32_t n = 5; n < 8; n++) {
35656 for (size_t k = 1; k <= 5; k += 2) {
35657 GemmMicrokernelTester()
35658 .mr(2)
35659 .nr(4)
35660 .kr(1)
35661 .sr(1)
35662 .m(2)
35663 .n(n)
35664 .k(k)
35665 .cn_stride(7)
35666 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35667 }
35668 }
35669 }
35670
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_a)35671 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
35672 for (uint32_t n = 5; n < 8; n++) {
35673 for (size_t k = 1; k <= 5; k += 2) {
35674 GemmMicrokernelTester()
35675 .mr(2)
35676 .nr(4)
35677 .kr(1)
35678 .sr(1)
35679 .m(2)
35680 .n(n)
35681 .k(k)
35682 .a_stride(7)
35683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35684 }
35685 }
35686 }
35687
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)35688 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
35689 for (uint32_t n = 5; n < 8; n++) {
35690 for (size_t k = 1; k <= 5; k += 2) {
35691 for (uint32_t m = 1; m <= 2; m++) {
35692 GemmMicrokernelTester()
35693 .mr(2)
35694 .nr(4)
35695 .kr(1)
35696 .sr(1)
35697 .m(m)
35698 .n(n)
35699 .k(k)
35700 .iterations(1)
35701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35702 }
35703 }
35704 }
35705 }
35706
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)35707 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
35708 for (uint32_t n = 8; n <= 12; n += 4) {
35709 for (size_t k = 1; k <= 5; k += 2) {
35710 GemmMicrokernelTester()
35711 .mr(2)
35712 .nr(4)
35713 .kr(1)
35714 .sr(1)
35715 .m(2)
35716 .n(n)
35717 .k(k)
35718 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35719 }
35720 }
35721 }
35722
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)35723 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
35724 for (uint32_t n = 8; n <= 12; n += 4) {
35725 for (size_t k = 1; k <= 5; k += 2) {
35726 GemmMicrokernelTester()
35727 .mr(2)
35728 .nr(4)
35729 .kr(1)
35730 .sr(1)
35731 .m(2)
35732 .n(n)
35733 .k(k)
35734 .cn_stride(7)
35735 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35736 }
35737 }
35738 }
35739
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_a)35740 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
35741 for (uint32_t n = 8; n <= 12; n += 4) {
35742 for (size_t k = 1; k <= 5; k += 2) {
35743 GemmMicrokernelTester()
35744 .mr(2)
35745 .nr(4)
35746 .kr(1)
35747 .sr(1)
35748 .m(2)
35749 .n(n)
35750 .k(k)
35751 .a_stride(7)
35752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35753 }
35754 }
35755 }
35756
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)35757 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
35758 for (uint32_t n = 8; n <= 12; n += 4) {
35759 for (size_t k = 1; k <= 5; k += 2) {
35760 for (uint32_t m = 1; m <= 2; m++) {
35761 GemmMicrokernelTester()
35762 .mr(2)
35763 .nr(4)
35764 .kr(1)
35765 .sr(1)
35766 .m(m)
35767 .n(n)
35768 .k(k)
35769 .iterations(1)
35770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35771 }
35772 }
35773 }
35774 }
35775
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)35776 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
35777 for (size_t k = 1; k <= 5; k += 2) {
35778 for (uint32_t n = 1; n <= 4; n++) {
35779 for (uint32_t m = 1; m <= 2; m++) {
35780 GemmMicrokernelTester()
35781 .mr(2)
35782 .nr(4)
35783 .kr(1)
35784 .sr(1)
35785 .m(m)
35786 .n(n)
35787 .k(k)
35788 .cm_stride(7)
35789 .iterations(1)
35790 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35791 }
35792 }
35793 }
35794 }
35795
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)35796 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
35797 GemmMicrokernelTester()
35798 .mr(2)
35799 .nr(4)
35800 .kr(1)
35801 .sr(1)
35802 .m(2)
35803 .n(4)
35804 .k(1)
35805 .qmin(128)
35806 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35807 }
35808
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)35809 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
35810 GemmMicrokernelTester()
35811 .mr(2)
35812 .nr(4)
35813 .kr(1)
35814 .sr(1)
35815 .m(2)
35816 .n(4)
35817 .k(1)
35818 .qmax(128)
35819 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35820 }
35821
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)35822 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
35823 GemmMicrokernelTester()
35824 .mr(2)
35825 .nr(4)
35826 .kr(1)
35827 .sr(1)
35828 .m(2)
35829 .n(4)
35830 .k(1)
35831 .cm_stride(7)
35832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35833 }
35834
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_a_zero_point)35835 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
35836 for (size_t k = 1; k <= 5; k += 2) {
35837 GemmMicrokernelTester()
35838 .mr(2)
35839 .nr(4)
35840 .kr(1)
35841 .sr(1)
35842 .m(2)
35843 .n(4)
35844 .k(k)
35845 .a_zero_point(0)
35846 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35847 }
35848 }
35849
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_b_zero_point)35850 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
35851 for (size_t k = 1; k <= 5; k += 2) {
35852 GemmMicrokernelTester()
35853 .mr(2)
35854 .nr(4)
35855 .kr(1)
35856 .sr(1)
35857 .m(2)
35858 .n(4)
35859 .k(k)
35860 .b_zero_point(0)
35861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35862 }
35863 }
35864
TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_zero_point)35865 TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
35866 for (size_t k = 1; k <= 5; k += 2) {
35867 GemmMicrokernelTester()
35868 .mr(2)
35869 .nr(4)
35870 .kr(1)
35871 .sr(1)
35872 .m(2)
35873 .n(4)
35874 .k(k)
35875 .a_zero_point(0)
35876 .b_zero_point(0)
35877 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35878 }
35879 }
35880
35881
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)35882 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
35883 GemmMicrokernelTester()
35884 .mr(3)
35885 .nr(2)
35886 .kr(1)
35887 .sr(1)
35888 .m(3)
35889 .n(2)
35890 .k(1)
35891 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35892 }
35893
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)35894 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
35895 GemmMicrokernelTester()
35896 .mr(3)
35897 .nr(2)
35898 .kr(1)
35899 .sr(1)
35900 .m(3)
35901 .n(2)
35902 .k(1)
35903 .cn_stride(5)
35904 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35905 }
35906
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_strided_a)35907 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
35908 GemmMicrokernelTester()
35909 .mr(3)
35910 .nr(2)
35911 .kr(1)
35912 .sr(1)
35913 .m(3)
35914 .n(2)
35915 .k(1)
35916 .a_stride(3)
35917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35918 }
35919
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)35920 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
35921 for (uint32_t n = 1; n <= 2; n++) {
35922 for (uint32_t m = 1; m <= 3; m++) {
35923 GemmMicrokernelTester()
35924 .mr(3)
35925 .nr(2)
35926 .kr(1)
35927 .sr(1)
35928 .m(m)
35929 .n(n)
35930 .k(1)
35931 .iterations(1)
35932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35933 }
35934 }
35935 }
35936
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)35937 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
35938 for (uint32_t m = 1; m <= 3; m++) {
35939 GemmMicrokernelTester()
35940 .mr(3)
35941 .nr(2)
35942 .kr(1)
35943 .sr(1)
35944 .m(m)
35945 .n(2)
35946 .k(1)
35947 .iterations(1)
35948 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35949 }
35950 }
35951
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)35952 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
35953 for (uint32_t n = 1; n <= 2; n++) {
35954 GemmMicrokernelTester()
35955 .mr(3)
35956 .nr(2)
35957 .kr(1)
35958 .sr(1)
35959 .m(3)
35960 .n(n)
35961 .k(1)
35962 .iterations(1)
35963 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35964 }
35965 }
35966
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)35967 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
35968 for (size_t k = 2; k < 10; k++) {
35969 GemmMicrokernelTester()
35970 .mr(3)
35971 .nr(2)
35972 .kr(1)
35973 .sr(1)
35974 .m(3)
35975 .n(2)
35976 .k(k)
35977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35978 }
35979 }
35980
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_strided_a)35981 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
35982 for (size_t k = 2; k < 10; k++) {
35983 GemmMicrokernelTester()
35984 .mr(3)
35985 .nr(2)
35986 .kr(1)
35987 .sr(1)
35988 .m(3)
35989 .n(2)
35990 .k(k)
35991 .a_stride(11)
35992 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
35993 }
35994 }
35995
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)35996 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
35997 for (size_t k = 2; k < 10; k++) {
35998 for (uint32_t n = 1; n <= 2; n++) {
35999 for (uint32_t m = 1; m <= 3; m++) {
36000 GemmMicrokernelTester()
36001 .mr(3)
36002 .nr(2)
36003 .kr(1)
36004 .sr(1)
36005 .m(m)
36006 .n(n)
36007 .k(k)
36008 .iterations(1)
36009 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36010 }
36011 }
36012 }
36013 }
36014
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)36015 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
36016 for (uint32_t n = 3; n < 4; n++) {
36017 for (size_t k = 1; k <= 5; k += 2) {
36018 GemmMicrokernelTester()
36019 .mr(3)
36020 .nr(2)
36021 .kr(1)
36022 .sr(1)
36023 .m(3)
36024 .n(n)
36025 .k(k)
36026 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36027 }
36028 }
36029 }
36030
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)36031 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
36032 for (uint32_t n = 3; n < 4; n++) {
36033 for (size_t k = 1; k <= 5; k += 2) {
36034 GemmMicrokernelTester()
36035 .mr(3)
36036 .nr(2)
36037 .kr(1)
36038 .sr(1)
36039 .m(3)
36040 .n(n)
36041 .k(k)
36042 .cn_stride(5)
36043 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36044 }
36045 }
36046 }
36047
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_a)36048 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
36049 for (uint32_t n = 3; n < 4; n++) {
36050 for (size_t k = 1; k <= 5; k += 2) {
36051 GemmMicrokernelTester()
36052 .mr(3)
36053 .nr(2)
36054 .kr(1)
36055 .sr(1)
36056 .m(3)
36057 .n(n)
36058 .k(k)
36059 .a_stride(7)
36060 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36061 }
36062 }
36063 }
36064
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)36065 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
36066 for (uint32_t n = 3; n < 4; n++) {
36067 for (size_t k = 1; k <= 5; k += 2) {
36068 for (uint32_t m = 1; m <= 3; m++) {
36069 GemmMicrokernelTester()
36070 .mr(3)
36071 .nr(2)
36072 .kr(1)
36073 .sr(1)
36074 .m(m)
36075 .n(n)
36076 .k(k)
36077 .iterations(1)
36078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36079 }
36080 }
36081 }
36082 }
36083
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)36084 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
36085 for (uint32_t n = 4; n <= 6; n += 2) {
36086 for (size_t k = 1; k <= 5; k += 2) {
36087 GemmMicrokernelTester()
36088 .mr(3)
36089 .nr(2)
36090 .kr(1)
36091 .sr(1)
36092 .m(3)
36093 .n(n)
36094 .k(k)
36095 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36096 }
36097 }
36098 }
36099
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)36100 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
36101 for (uint32_t n = 4; n <= 6; n += 2) {
36102 for (size_t k = 1; k <= 5; k += 2) {
36103 GemmMicrokernelTester()
36104 .mr(3)
36105 .nr(2)
36106 .kr(1)
36107 .sr(1)
36108 .m(3)
36109 .n(n)
36110 .k(k)
36111 .cn_stride(5)
36112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36113 }
36114 }
36115 }
36116
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_a)36117 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
36118 for (uint32_t n = 4; n <= 6; n += 2) {
36119 for (size_t k = 1; k <= 5; k += 2) {
36120 GemmMicrokernelTester()
36121 .mr(3)
36122 .nr(2)
36123 .kr(1)
36124 .sr(1)
36125 .m(3)
36126 .n(n)
36127 .k(k)
36128 .a_stride(7)
36129 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36130 }
36131 }
36132 }
36133
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)36134 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
36135 for (uint32_t n = 4; n <= 6; n += 2) {
36136 for (size_t k = 1; k <= 5; k += 2) {
36137 for (uint32_t m = 1; m <= 3; m++) {
36138 GemmMicrokernelTester()
36139 .mr(3)
36140 .nr(2)
36141 .kr(1)
36142 .sr(1)
36143 .m(m)
36144 .n(n)
36145 .k(k)
36146 .iterations(1)
36147 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36148 }
36149 }
36150 }
36151 }
36152
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)36153 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
36154 for (size_t k = 1; k <= 5; k += 2) {
36155 for (uint32_t n = 1; n <= 2; n++) {
36156 for (uint32_t m = 1; m <= 3; m++) {
36157 GemmMicrokernelTester()
36158 .mr(3)
36159 .nr(2)
36160 .kr(1)
36161 .sr(1)
36162 .m(m)
36163 .n(n)
36164 .k(k)
36165 .cm_stride(5)
36166 .iterations(1)
36167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36168 }
36169 }
36170 }
36171 }
36172
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)36173 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
36174 GemmMicrokernelTester()
36175 .mr(3)
36176 .nr(2)
36177 .kr(1)
36178 .sr(1)
36179 .m(3)
36180 .n(2)
36181 .k(1)
36182 .qmin(128)
36183 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36184 }
36185
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)36186 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
36187 GemmMicrokernelTester()
36188 .mr(3)
36189 .nr(2)
36190 .kr(1)
36191 .sr(1)
36192 .m(3)
36193 .n(2)
36194 .k(1)
36195 .qmax(128)
36196 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36197 }
36198
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)36199 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
36200 GemmMicrokernelTester()
36201 .mr(3)
36202 .nr(2)
36203 .kr(1)
36204 .sr(1)
36205 .m(3)
36206 .n(2)
36207 .k(1)
36208 .cm_stride(5)
36209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36210 }
36211
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_a_zero_point)36212 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
36213 for (size_t k = 1; k <= 5; k += 2) {
36214 GemmMicrokernelTester()
36215 .mr(3)
36216 .nr(2)
36217 .kr(1)
36218 .sr(1)
36219 .m(3)
36220 .n(2)
36221 .k(k)
36222 .a_zero_point(0)
36223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36224 }
36225 }
36226
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_b_zero_point)36227 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
36228 for (size_t k = 1; k <= 5; k += 2) {
36229 GemmMicrokernelTester()
36230 .mr(3)
36231 .nr(2)
36232 .kr(1)
36233 .sr(1)
36234 .m(3)
36235 .n(2)
36236 .k(k)
36237 .b_zero_point(0)
36238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36239 }
36240 }
36241
TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_zero_point)36242 TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
36243 for (size_t k = 1; k <= 5; k += 2) {
36244 GemmMicrokernelTester()
36245 .mr(3)
36246 .nr(2)
36247 .kr(1)
36248 .sr(1)
36249 .m(3)
36250 .n(2)
36251 .k(k)
36252 .a_zero_point(0)
36253 .b_zero_point(0)
36254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36255 }
36256 }
36257
36258
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)36259 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
36260 GemmMicrokernelTester()
36261 .mr(3)
36262 .nr(4)
36263 .kr(1)
36264 .sr(1)
36265 .m(3)
36266 .n(4)
36267 .k(1)
36268 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36269 }
36270
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)36271 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
36272 GemmMicrokernelTester()
36273 .mr(3)
36274 .nr(4)
36275 .kr(1)
36276 .sr(1)
36277 .m(3)
36278 .n(4)
36279 .k(1)
36280 .cn_stride(7)
36281 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36282 }
36283
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_strided_a)36284 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
36285 GemmMicrokernelTester()
36286 .mr(3)
36287 .nr(4)
36288 .kr(1)
36289 .sr(1)
36290 .m(3)
36291 .n(4)
36292 .k(1)
36293 .a_stride(3)
36294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36295 }
36296
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)36297 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
36298 for (uint32_t n = 1; n <= 4; n++) {
36299 for (uint32_t m = 1; m <= 3; m++) {
36300 GemmMicrokernelTester()
36301 .mr(3)
36302 .nr(4)
36303 .kr(1)
36304 .sr(1)
36305 .m(m)
36306 .n(n)
36307 .k(1)
36308 .iterations(1)
36309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36310 }
36311 }
36312 }
36313
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)36314 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
36315 for (uint32_t m = 1; m <= 3; m++) {
36316 GemmMicrokernelTester()
36317 .mr(3)
36318 .nr(4)
36319 .kr(1)
36320 .sr(1)
36321 .m(m)
36322 .n(4)
36323 .k(1)
36324 .iterations(1)
36325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36326 }
36327 }
36328
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)36329 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
36330 for (uint32_t n = 1; n <= 4; n++) {
36331 GemmMicrokernelTester()
36332 .mr(3)
36333 .nr(4)
36334 .kr(1)
36335 .sr(1)
36336 .m(3)
36337 .n(n)
36338 .k(1)
36339 .iterations(1)
36340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36341 }
36342 }
36343
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)36344 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
36345 for (size_t k = 2; k < 10; k++) {
36346 GemmMicrokernelTester()
36347 .mr(3)
36348 .nr(4)
36349 .kr(1)
36350 .sr(1)
36351 .m(3)
36352 .n(4)
36353 .k(k)
36354 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36355 }
36356 }
36357
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_strided_a)36358 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
36359 for (size_t k = 2; k < 10; k++) {
36360 GemmMicrokernelTester()
36361 .mr(3)
36362 .nr(4)
36363 .kr(1)
36364 .sr(1)
36365 .m(3)
36366 .n(4)
36367 .k(k)
36368 .a_stride(11)
36369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36370 }
36371 }
36372
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)36373 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
36374 for (size_t k = 2; k < 10; k++) {
36375 for (uint32_t n = 1; n <= 4; n++) {
36376 for (uint32_t m = 1; m <= 3; m++) {
36377 GemmMicrokernelTester()
36378 .mr(3)
36379 .nr(4)
36380 .kr(1)
36381 .sr(1)
36382 .m(m)
36383 .n(n)
36384 .k(k)
36385 .iterations(1)
36386 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36387 }
36388 }
36389 }
36390 }
36391
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)36392 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
36393 for (uint32_t n = 5; n < 8; n++) {
36394 for (size_t k = 1; k <= 5; k += 2) {
36395 GemmMicrokernelTester()
36396 .mr(3)
36397 .nr(4)
36398 .kr(1)
36399 .sr(1)
36400 .m(3)
36401 .n(n)
36402 .k(k)
36403 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36404 }
36405 }
36406 }
36407
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)36408 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
36409 for (uint32_t n = 5; n < 8; n++) {
36410 for (size_t k = 1; k <= 5; k += 2) {
36411 GemmMicrokernelTester()
36412 .mr(3)
36413 .nr(4)
36414 .kr(1)
36415 .sr(1)
36416 .m(3)
36417 .n(n)
36418 .k(k)
36419 .cn_stride(7)
36420 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36421 }
36422 }
36423 }
36424
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_a)36425 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
36426 for (uint32_t n = 5; n < 8; n++) {
36427 for (size_t k = 1; k <= 5; k += 2) {
36428 GemmMicrokernelTester()
36429 .mr(3)
36430 .nr(4)
36431 .kr(1)
36432 .sr(1)
36433 .m(3)
36434 .n(n)
36435 .k(k)
36436 .a_stride(7)
36437 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36438 }
36439 }
36440 }
36441
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)36442 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
36443 for (uint32_t n = 5; n < 8; n++) {
36444 for (size_t k = 1; k <= 5; k += 2) {
36445 for (uint32_t m = 1; m <= 3; m++) {
36446 GemmMicrokernelTester()
36447 .mr(3)
36448 .nr(4)
36449 .kr(1)
36450 .sr(1)
36451 .m(m)
36452 .n(n)
36453 .k(k)
36454 .iterations(1)
36455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36456 }
36457 }
36458 }
36459 }
36460
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)36461 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
36462 for (uint32_t n = 8; n <= 12; n += 4) {
36463 for (size_t k = 1; k <= 5; k += 2) {
36464 GemmMicrokernelTester()
36465 .mr(3)
36466 .nr(4)
36467 .kr(1)
36468 .sr(1)
36469 .m(3)
36470 .n(n)
36471 .k(k)
36472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36473 }
36474 }
36475 }
36476
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)36477 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
36478 for (uint32_t n = 8; n <= 12; n += 4) {
36479 for (size_t k = 1; k <= 5; k += 2) {
36480 GemmMicrokernelTester()
36481 .mr(3)
36482 .nr(4)
36483 .kr(1)
36484 .sr(1)
36485 .m(3)
36486 .n(n)
36487 .k(k)
36488 .cn_stride(7)
36489 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36490 }
36491 }
36492 }
36493
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_a)36494 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
36495 for (uint32_t n = 8; n <= 12; n += 4) {
36496 for (size_t k = 1; k <= 5; k += 2) {
36497 GemmMicrokernelTester()
36498 .mr(3)
36499 .nr(4)
36500 .kr(1)
36501 .sr(1)
36502 .m(3)
36503 .n(n)
36504 .k(k)
36505 .a_stride(7)
36506 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36507 }
36508 }
36509 }
36510
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)36511 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
36512 for (uint32_t n = 8; n <= 12; n += 4) {
36513 for (size_t k = 1; k <= 5; k += 2) {
36514 for (uint32_t m = 1; m <= 3; m++) {
36515 GemmMicrokernelTester()
36516 .mr(3)
36517 .nr(4)
36518 .kr(1)
36519 .sr(1)
36520 .m(m)
36521 .n(n)
36522 .k(k)
36523 .iterations(1)
36524 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36525 }
36526 }
36527 }
36528 }
36529
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)36530 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
36531 for (size_t k = 1; k <= 5; k += 2) {
36532 for (uint32_t n = 1; n <= 4; n++) {
36533 for (uint32_t m = 1; m <= 3; m++) {
36534 GemmMicrokernelTester()
36535 .mr(3)
36536 .nr(4)
36537 .kr(1)
36538 .sr(1)
36539 .m(m)
36540 .n(n)
36541 .k(k)
36542 .cm_stride(7)
36543 .iterations(1)
36544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36545 }
36546 }
36547 }
36548 }
36549
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)36550 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
36551 GemmMicrokernelTester()
36552 .mr(3)
36553 .nr(4)
36554 .kr(1)
36555 .sr(1)
36556 .m(3)
36557 .n(4)
36558 .k(1)
36559 .qmin(128)
36560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36561 }
36562
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)36563 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
36564 GemmMicrokernelTester()
36565 .mr(3)
36566 .nr(4)
36567 .kr(1)
36568 .sr(1)
36569 .m(3)
36570 .n(4)
36571 .k(1)
36572 .qmax(128)
36573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36574 }
36575
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)36576 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
36577 GemmMicrokernelTester()
36578 .mr(3)
36579 .nr(4)
36580 .kr(1)
36581 .sr(1)
36582 .m(3)
36583 .n(4)
36584 .k(1)
36585 .cm_stride(7)
36586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36587 }
36588
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_a_zero_point)36589 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
36590 for (size_t k = 1; k <= 5; k += 2) {
36591 GemmMicrokernelTester()
36592 .mr(3)
36593 .nr(4)
36594 .kr(1)
36595 .sr(1)
36596 .m(3)
36597 .n(4)
36598 .k(k)
36599 .a_zero_point(0)
36600 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36601 }
36602 }
36603
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_b_zero_point)36604 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
36605 for (size_t k = 1; k <= 5; k += 2) {
36606 GemmMicrokernelTester()
36607 .mr(3)
36608 .nr(4)
36609 .kr(1)
36610 .sr(1)
36611 .m(3)
36612 .n(4)
36613 .k(k)
36614 .b_zero_point(0)
36615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36616 }
36617 }
36618
TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_zero_point)36619 TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
36620 for (size_t k = 1; k <= 5; k += 2) {
36621 GemmMicrokernelTester()
36622 .mr(3)
36623 .nr(4)
36624 .kr(1)
36625 .sr(1)
36626 .m(3)
36627 .n(4)
36628 .k(k)
36629 .a_zero_point(0)
36630 .b_zero_point(0)
36631 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36632 }
36633 }
36634
36635
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)36636 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
36637 GemmMicrokernelTester()
36638 .mr(4)
36639 .nr(2)
36640 .kr(1)
36641 .sr(1)
36642 .m(4)
36643 .n(2)
36644 .k(1)
36645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36646 }
36647
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)36648 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
36649 GemmMicrokernelTester()
36650 .mr(4)
36651 .nr(2)
36652 .kr(1)
36653 .sr(1)
36654 .m(4)
36655 .n(2)
36656 .k(1)
36657 .cn_stride(5)
36658 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36659 }
36660
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_strided_a)36661 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
36662 GemmMicrokernelTester()
36663 .mr(4)
36664 .nr(2)
36665 .kr(1)
36666 .sr(1)
36667 .m(4)
36668 .n(2)
36669 .k(1)
36670 .a_stride(3)
36671 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36672 }
36673
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)36674 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
36675 for (uint32_t n = 1; n <= 2; n++) {
36676 for (uint32_t m = 1; m <= 4; m++) {
36677 GemmMicrokernelTester()
36678 .mr(4)
36679 .nr(2)
36680 .kr(1)
36681 .sr(1)
36682 .m(m)
36683 .n(n)
36684 .k(1)
36685 .iterations(1)
36686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36687 }
36688 }
36689 }
36690
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)36691 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
36692 for (uint32_t m = 1; m <= 4; m++) {
36693 GemmMicrokernelTester()
36694 .mr(4)
36695 .nr(2)
36696 .kr(1)
36697 .sr(1)
36698 .m(m)
36699 .n(2)
36700 .k(1)
36701 .iterations(1)
36702 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36703 }
36704 }
36705
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)36706 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
36707 for (uint32_t n = 1; n <= 2; n++) {
36708 GemmMicrokernelTester()
36709 .mr(4)
36710 .nr(2)
36711 .kr(1)
36712 .sr(1)
36713 .m(4)
36714 .n(n)
36715 .k(1)
36716 .iterations(1)
36717 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36718 }
36719 }
36720
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)36721 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
36722 for (size_t k = 2; k < 10; k++) {
36723 GemmMicrokernelTester()
36724 .mr(4)
36725 .nr(2)
36726 .kr(1)
36727 .sr(1)
36728 .m(4)
36729 .n(2)
36730 .k(k)
36731 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36732 }
36733 }
36734
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_strided_a)36735 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
36736 for (size_t k = 2; k < 10; k++) {
36737 GemmMicrokernelTester()
36738 .mr(4)
36739 .nr(2)
36740 .kr(1)
36741 .sr(1)
36742 .m(4)
36743 .n(2)
36744 .k(k)
36745 .a_stride(11)
36746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36747 }
36748 }
36749
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)36750 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
36751 for (size_t k = 2; k < 10; k++) {
36752 for (uint32_t n = 1; n <= 2; n++) {
36753 for (uint32_t m = 1; m <= 4; m++) {
36754 GemmMicrokernelTester()
36755 .mr(4)
36756 .nr(2)
36757 .kr(1)
36758 .sr(1)
36759 .m(m)
36760 .n(n)
36761 .k(k)
36762 .iterations(1)
36763 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36764 }
36765 }
36766 }
36767 }
36768
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)36769 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
36770 for (uint32_t n = 3; n < 4; n++) {
36771 for (size_t k = 1; k <= 5; k += 2) {
36772 GemmMicrokernelTester()
36773 .mr(4)
36774 .nr(2)
36775 .kr(1)
36776 .sr(1)
36777 .m(4)
36778 .n(n)
36779 .k(k)
36780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36781 }
36782 }
36783 }
36784
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)36785 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
36786 for (uint32_t n = 3; n < 4; n++) {
36787 for (size_t k = 1; k <= 5; k += 2) {
36788 GemmMicrokernelTester()
36789 .mr(4)
36790 .nr(2)
36791 .kr(1)
36792 .sr(1)
36793 .m(4)
36794 .n(n)
36795 .k(k)
36796 .cn_stride(5)
36797 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36798 }
36799 }
36800 }
36801
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_a)36802 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
36803 for (uint32_t n = 3; n < 4; n++) {
36804 for (size_t k = 1; k <= 5; k += 2) {
36805 GemmMicrokernelTester()
36806 .mr(4)
36807 .nr(2)
36808 .kr(1)
36809 .sr(1)
36810 .m(4)
36811 .n(n)
36812 .k(k)
36813 .a_stride(7)
36814 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36815 }
36816 }
36817 }
36818
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)36819 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
36820 for (uint32_t n = 3; n < 4; n++) {
36821 for (size_t k = 1; k <= 5; k += 2) {
36822 for (uint32_t m = 1; m <= 4; m++) {
36823 GemmMicrokernelTester()
36824 .mr(4)
36825 .nr(2)
36826 .kr(1)
36827 .sr(1)
36828 .m(m)
36829 .n(n)
36830 .k(k)
36831 .iterations(1)
36832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36833 }
36834 }
36835 }
36836 }
36837
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)36838 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
36839 for (uint32_t n = 4; n <= 6; n += 2) {
36840 for (size_t k = 1; k <= 5; k += 2) {
36841 GemmMicrokernelTester()
36842 .mr(4)
36843 .nr(2)
36844 .kr(1)
36845 .sr(1)
36846 .m(4)
36847 .n(n)
36848 .k(k)
36849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36850 }
36851 }
36852 }
36853
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)36854 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
36855 for (uint32_t n = 4; n <= 6; n += 2) {
36856 for (size_t k = 1; k <= 5; k += 2) {
36857 GemmMicrokernelTester()
36858 .mr(4)
36859 .nr(2)
36860 .kr(1)
36861 .sr(1)
36862 .m(4)
36863 .n(n)
36864 .k(k)
36865 .cn_stride(5)
36866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36867 }
36868 }
36869 }
36870
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_a)36871 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
36872 for (uint32_t n = 4; n <= 6; n += 2) {
36873 for (size_t k = 1; k <= 5; k += 2) {
36874 GemmMicrokernelTester()
36875 .mr(4)
36876 .nr(2)
36877 .kr(1)
36878 .sr(1)
36879 .m(4)
36880 .n(n)
36881 .k(k)
36882 .a_stride(7)
36883 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36884 }
36885 }
36886 }
36887
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)36888 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
36889 for (uint32_t n = 4; n <= 6; n += 2) {
36890 for (size_t k = 1; k <= 5; k += 2) {
36891 for (uint32_t m = 1; m <= 4; m++) {
36892 GemmMicrokernelTester()
36893 .mr(4)
36894 .nr(2)
36895 .kr(1)
36896 .sr(1)
36897 .m(m)
36898 .n(n)
36899 .k(k)
36900 .iterations(1)
36901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36902 }
36903 }
36904 }
36905 }
36906
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)36907 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
36908 for (size_t k = 1; k <= 5; k += 2) {
36909 for (uint32_t n = 1; n <= 2; n++) {
36910 for (uint32_t m = 1; m <= 4; m++) {
36911 GemmMicrokernelTester()
36912 .mr(4)
36913 .nr(2)
36914 .kr(1)
36915 .sr(1)
36916 .m(m)
36917 .n(n)
36918 .k(k)
36919 .cm_stride(5)
36920 .iterations(1)
36921 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36922 }
36923 }
36924 }
36925 }
36926
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)36927 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
36928 GemmMicrokernelTester()
36929 .mr(4)
36930 .nr(2)
36931 .kr(1)
36932 .sr(1)
36933 .m(4)
36934 .n(2)
36935 .k(1)
36936 .qmin(128)
36937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36938 }
36939
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)36940 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
36941 GemmMicrokernelTester()
36942 .mr(4)
36943 .nr(2)
36944 .kr(1)
36945 .sr(1)
36946 .m(4)
36947 .n(2)
36948 .k(1)
36949 .qmax(128)
36950 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36951 }
36952
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)36953 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
36954 GemmMicrokernelTester()
36955 .mr(4)
36956 .nr(2)
36957 .kr(1)
36958 .sr(1)
36959 .m(4)
36960 .n(2)
36961 .k(1)
36962 .cm_stride(5)
36963 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36964 }
36965
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_a_zero_point)36966 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
36967 for (size_t k = 1; k <= 5; k += 2) {
36968 GemmMicrokernelTester()
36969 .mr(4)
36970 .nr(2)
36971 .kr(1)
36972 .sr(1)
36973 .m(4)
36974 .n(2)
36975 .k(k)
36976 .a_zero_point(0)
36977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36978 }
36979 }
36980
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_b_zero_point)36981 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
36982 for (size_t k = 1; k <= 5; k += 2) {
36983 GemmMicrokernelTester()
36984 .mr(4)
36985 .nr(2)
36986 .kr(1)
36987 .sr(1)
36988 .m(4)
36989 .n(2)
36990 .k(k)
36991 .b_zero_point(0)
36992 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
36993 }
36994 }
36995
TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_zero_point)36996 TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
36997 for (size_t k = 1; k <= 5; k += 2) {
36998 GemmMicrokernelTester()
36999 .mr(4)
37000 .nr(2)
37001 .kr(1)
37002 .sr(1)
37003 .m(4)
37004 .n(2)
37005 .k(k)
37006 .a_zero_point(0)
37007 .b_zero_point(0)
37008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37009 }
37010 }
37011
37012
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)37013 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
37014 GemmMicrokernelTester()
37015 .mr(4)
37016 .nr(4)
37017 .kr(1)
37018 .sr(1)
37019 .m(4)
37020 .n(4)
37021 .k(1)
37022 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37023 }
37024
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)37025 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
37026 GemmMicrokernelTester()
37027 .mr(4)
37028 .nr(4)
37029 .kr(1)
37030 .sr(1)
37031 .m(4)
37032 .n(4)
37033 .k(1)
37034 .cn_stride(7)
37035 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37036 }
37037
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_strided_a)37038 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
37039 GemmMicrokernelTester()
37040 .mr(4)
37041 .nr(4)
37042 .kr(1)
37043 .sr(1)
37044 .m(4)
37045 .n(4)
37046 .k(1)
37047 .a_stride(3)
37048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37049 }
37050
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)37051 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
37052 for (uint32_t n = 1; n <= 4; n++) {
37053 for (uint32_t m = 1; m <= 4; m++) {
37054 GemmMicrokernelTester()
37055 .mr(4)
37056 .nr(4)
37057 .kr(1)
37058 .sr(1)
37059 .m(m)
37060 .n(n)
37061 .k(1)
37062 .iterations(1)
37063 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37064 }
37065 }
37066 }
37067
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)37068 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
37069 for (uint32_t m = 1; m <= 4; m++) {
37070 GemmMicrokernelTester()
37071 .mr(4)
37072 .nr(4)
37073 .kr(1)
37074 .sr(1)
37075 .m(m)
37076 .n(4)
37077 .k(1)
37078 .iterations(1)
37079 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37080 }
37081 }
37082
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)37083 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
37084 for (uint32_t n = 1; n <= 4; n++) {
37085 GemmMicrokernelTester()
37086 .mr(4)
37087 .nr(4)
37088 .kr(1)
37089 .sr(1)
37090 .m(4)
37091 .n(n)
37092 .k(1)
37093 .iterations(1)
37094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37095 }
37096 }
37097
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)37098 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
37099 for (size_t k = 2; k < 10; k++) {
37100 GemmMicrokernelTester()
37101 .mr(4)
37102 .nr(4)
37103 .kr(1)
37104 .sr(1)
37105 .m(4)
37106 .n(4)
37107 .k(k)
37108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37109 }
37110 }
37111
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_strided_a)37112 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
37113 for (size_t k = 2; k < 10; k++) {
37114 GemmMicrokernelTester()
37115 .mr(4)
37116 .nr(4)
37117 .kr(1)
37118 .sr(1)
37119 .m(4)
37120 .n(4)
37121 .k(k)
37122 .a_stride(11)
37123 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37124 }
37125 }
37126
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)37127 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
37128 for (size_t k = 2; k < 10; k++) {
37129 for (uint32_t n = 1; n <= 4; n++) {
37130 for (uint32_t m = 1; m <= 4; m++) {
37131 GemmMicrokernelTester()
37132 .mr(4)
37133 .nr(4)
37134 .kr(1)
37135 .sr(1)
37136 .m(m)
37137 .n(n)
37138 .k(k)
37139 .iterations(1)
37140 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37141 }
37142 }
37143 }
37144 }
37145
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)37146 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
37147 for (uint32_t n = 5; n < 8; n++) {
37148 for (size_t k = 1; k <= 5; k += 2) {
37149 GemmMicrokernelTester()
37150 .mr(4)
37151 .nr(4)
37152 .kr(1)
37153 .sr(1)
37154 .m(4)
37155 .n(n)
37156 .k(k)
37157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37158 }
37159 }
37160 }
37161
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)37162 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
37163 for (uint32_t n = 5; n < 8; n++) {
37164 for (size_t k = 1; k <= 5; k += 2) {
37165 GemmMicrokernelTester()
37166 .mr(4)
37167 .nr(4)
37168 .kr(1)
37169 .sr(1)
37170 .m(4)
37171 .n(n)
37172 .k(k)
37173 .cn_stride(7)
37174 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37175 }
37176 }
37177 }
37178
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_a)37179 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
37180 for (uint32_t n = 5; n < 8; n++) {
37181 for (size_t k = 1; k <= 5; k += 2) {
37182 GemmMicrokernelTester()
37183 .mr(4)
37184 .nr(4)
37185 .kr(1)
37186 .sr(1)
37187 .m(4)
37188 .n(n)
37189 .k(k)
37190 .a_stride(7)
37191 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37192 }
37193 }
37194 }
37195
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)37196 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
37197 for (uint32_t n = 5; n < 8; n++) {
37198 for (size_t k = 1; k <= 5; k += 2) {
37199 for (uint32_t m = 1; m <= 4; m++) {
37200 GemmMicrokernelTester()
37201 .mr(4)
37202 .nr(4)
37203 .kr(1)
37204 .sr(1)
37205 .m(m)
37206 .n(n)
37207 .k(k)
37208 .iterations(1)
37209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37210 }
37211 }
37212 }
37213 }
37214
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)37215 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
37216 for (uint32_t n = 8; n <= 12; n += 4) {
37217 for (size_t k = 1; k <= 5; k += 2) {
37218 GemmMicrokernelTester()
37219 .mr(4)
37220 .nr(4)
37221 .kr(1)
37222 .sr(1)
37223 .m(4)
37224 .n(n)
37225 .k(k)
37226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37227 }
37228 }
37229 }
37230
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)37231 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
37232 for (uint32_t n = 8; n <= 12; n += 4) {
37233 for (size_t k = 1; k <= 5; k += 2) {
37234 GemmMicrokernelTester()
37235 .mr(4)
37236 .nr(4)
37237 .kr(1)
37238 .sr(1)
37239 .m(4)
37240 .n(n)
37241 .k(k)
37242 .cn_stride(7)
37243 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37244 }
37245 }
37246 }
37247
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_a)37248 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
37249 for (uint32_t n = 8; n <= 12; n += 4) {
37250 for (size_t k = 1; k <= 5; k += 2) {
37251 GemmMicrokernelTester()
37252 .mr(4)
37253 .nr(4)
37254 .kr(1)
37255 .sr(1)
37256 .m(4)
37257 .n(n)
37258 .k(k)
37259 .a_stride(7)
37260 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37261 }
37262 }
37263 }
37264
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)37265 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
37266 for (uint32_t n = 8; n <= 12; n += 4) {
37267 for (size_t k = 1; k <= 5; k += 2) {
37268 for (uint32_t m = 1; m <= 4; m++) {
37269 GemmMicrokernelTester()
37270 .mr(4)
37271 .nr(4)
37272 .kr(1)
37273 .sr(1)
37274 .m(m)
37275 .n(n)
37276 .k(k)
37277 .iterations(1)
37278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37279 }
37280 }
37281 }
37282 }
37283
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)37284 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
37285 for (size_t k = 1; k <= 5; k += 2) {
37286 for (uint32_t n = 1; n <= 4; n++) {
37287 for (uint32_t m = 1; m <= 4; m++) {
37288 GemmMicrokernelTester()
37289 .mr(4)
37290 .nr(4)
37291 .kr(1)
37292 .sr(1)
37293 .m(m)
37294 .n(n)
37295 .k(k)
37296 .cm_stride(7)
37297 .iterations(1)
37298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37299 }
37300 }
37301 }
37302 }
37303
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)37304 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
37305 GemmMicrokernelTester()
37306 .mr(4)
37307 .nr(4)
37308 .kr(1)
37309 .sr(1)
37310 .m(4)
37311 .n(4)
37312 .k(1)
37313 .qmin(128)
37314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37315 }
37316
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)37317 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
37318 GemmMicrokernelTester()
37319 .mr(4)
37320 .nr(4)
37321 .kr(1)
37322 .sr(1)
37323 .m(4)
37324 .n(4)
37325 .k(1)
37326 .qmax(128)
37327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37328 }
37329
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)37330 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
37331 GemmMicrokernelTester()
37332 .mr(4)
37333 .nr(4)
37334 .kr(1)
37335 .sr(1)
37336 .m(4)
37337 .n(4)
37338 .k(1)
37339 .cm_stride(7)
37340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37341 }
37342
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_a_zero_point)37343 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
37344 for (size_t k = 1; k <= 5; k += 2) {
37345 GemmMicrokernelTester()
37346 .mr(4)
37347 .nr(4)
37348 .kr(1)
37349 .sr(1)
37350 .m(4)
37351 .n(4)
37352 .k(k)
37353 .a_zero_point(0)
37354 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37355 }
37356 }
37357
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_b_zero_point)37358 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
37359 for (size_t k = 1; k <= 5; k += 2) {
37360 GemmMicrokernelTester()
37361 .mr(4)
37362 .nr(4)
37363 .kr(1)
37364 .sr(1)
37365 .m(4)
37366 .n(4)
37367 .k(k)
37368 .b_zero_point(0)
37369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37370 }
37371 }
37372
TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_zero_point)37373 TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
37374 for (size_t k = 1; k <= 5; k += 2) {
37375 GemmMicrokernelTester()
37376 .mr(4)
37377 .nr(4)
37378 .kr(1)
37379 .sr(1)
37380 .m(4)
37381 .n(4)
37382 .k(k)
37383 .a_zero_point(0)
37384 .b_zero_point(0)
37385 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
37386 }
37387 }
37388