1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qu8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)28 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
29 TEST_REQUIRES_ARM_NEON_DOT;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(16)
33 .kr(4)
34 .sr(1)
35 .m(4)
36 .n(16)
37 .k(16)
38 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
39 }
40
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)41 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_DOT;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(16)
46 .kr(4)
47 .sr(1)
48 .m(4)
49 .n(16)
50 .k(16)
51 .cn_stride(19)
52 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
53 }
54
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)55 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
56 TEST_REQUIRES_ARM_NEON_DOT;
57 for (uint32_t n = 1; n <= 16; n++) {
58 for (uint32_t m = 1; m <= 4; m++) {
59 GemmMicrokernelTester()
60 .mr(4)
61 .nr(16)
62 .kr(4)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(16)
67 .iterations(1)
68 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)73 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
74 TEST_REQUIRES_ARM_NEON_DOT;
75 for (uint32_t m = 1; m <= 4; m++) {
76 GemmMicrokernelTester()
77 .mr(4)
78 .nr(16)
79 .kr(4)
80 .sr(1)
81 .m(m)
82 .n(16)
83 .k(16)
84 .iterations(1)
85 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
86 }
87 }
88
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)89 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
90 TEST_REQUIRES_ARM_NEON_DOT;
91 for (uint32_t n = 1; n <= 16; n++) {
92 GemmMicrokernelTester()
93 .mr(4)
94 .nr(16)
95 .kr(4)
96 .sr(1)
97 .m(4)
98 .n(n)
99 .k(16)
100 .iterations(1)
101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
102 }
103 }
104
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)105 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
106 TEST_REQUIRES_ARM_NEON_DOT;
107 for (size_t k = 1; k < 16; k++) {
108 GemmMicrokernelTester()
109 .mr(4)
110 .nr(16)
111 .kr(4)
112 .sr(1)
113 .m(4)
114 .n(16)
115 .k(k)
116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
117 }
118 }
119
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)120 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
121 TEST_REQUIRES_ARM_NEON_DOT;
122 for (size_t k = 1; k < 16; k++) {
123 for (uint32_t n = 1; n <= 16; n++) {
124 for (uint32_t m = 1; m <= 4; m++) {
125 GemmMicrokernelTester()
126 .mr(4)
127 .nr(16)
128 .kr(4)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)140 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
141 TEST_REQUIRES_ARM_NEON_DOT;
142 for (size_t k = 17; k < 32; k++) {
143 GemmMicrokernelTester()
144 .mr(4)
145 .nr(16)
146 .kr(4)
147 .sr(1)
148 .m(4)
149 .n(16)
150 .k(k)
151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
152 }
153 }
154
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)155 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
156 TEST_REQUIRES_ARM_NEON_DOT;
157 for (size_t k = 17; k < 32; k++) {
158 for (uint32_t n = 1; n <= 16; n++) {
159 for (uint32_t m = 1; m <= 4; m++) {
160 GemmMicrokernelTester()
161 .mr(4)
162 .nr(16)
163 .kr(4)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)175 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
176 TEST_REQUIRES_ARM_NEON_DOT;
177 for (size_t k = 32; k <= 160; k += 16) {
178 GemmMicrokernelTester()
179 .mr(4)
180 .nr(16)
181 .kr(4)
182 .sr(1)
183 .m(4)
184 .n(16)
185 .k(k)
186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
187 }
188 }
189
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)190 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
191 TEST_REQUIRES_ARM_NEON_DOT;
192 for (size_t k = 32; k <= 160; k += 16) {
193 for (uint32_t n = 1; n <= 16; n++) {
194 for (uint32_t m = 1; m <= 4; m++) {
195 GemmMicrokernelTester()
196 .mr(4)
197 .nr(16)
198 .kr(4)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)210 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
211 TEST_REQUIRES_ARM_NEON_DOT;
212 for (uint32_t n = 17; n < 32; n++) {
213 for (size_t k = 1; k <= 80; k += 17) {
214 GemmMicrokernelTester()
215 .mr(4)
216 .nr(16)
217 .kr(4)
218 .sr(1)
219 .m(4)
220 .n(n)
221 .k(k)
222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)227 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
228 TEST_REQUIRES_ARM_NEON_DOT;
229 for (uint32_t n = 17; n < 32; n++) {
230 for (size_t k = 1; k <= 80; k += 17) {
231 GemmMicrokernelTester()
232 .mr(4)
233 .nr(16)
234 .kr(4)
235 .sr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .cn_stride(19)
240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)245 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
246 TEST_REQUIRES_ARM_NEON_DOT;
247 for (uint32_t n = 17; n < 32; n++) {
248 for (size_t k = 1; k <= 80; k += 17) {
249 for (uint32_t m = 1; m <= 4; m++) {
250 GemmMicrokernelTester()
251 .mr(4)
252 .nr(16)
253 .kr(4)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)265 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
266 TEST_REQUIRES_ARM_NEON_DOT;
267 for (uint32_t n = 32; n <= 48; n += 16) {
268 for (size_t k = 1; k <= 80; k += 17) {
269 GemmMicrokernelTester()
270 .mr(4)
271 .nr(16)
272 .kr(4)
273 .sr(1)
274 .m(4)
275 .n(n)
276 .k(k)
277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)282 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
283 TEST_REQUIRES_ARM_NEON_DOT;
284 for (uint32_t n = 32; n <= 48; n += 16) {
285 for (size_t k = 1; k <= 80; k += 17) {
286 GemmMicrokernelTester()
287 .mr(4)
288 .nr(16)
289 .kr(4)
290 .sr(1)
291 .m(4)
292 .n(n)
293 .k(k)
294 .cn_stride(19)
295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)300 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
301 TEST_REQUIRES_ARM_NEON_DOT;
302 for (uint32_t n = 32; n <= 48; n += 16) {
303 for (size_t k = 1; k <= 80; k += 17) {
304 for (uint32_t m = 1; m <= 4; m++) {
305 GemmMicrokernelTester()
306 .mr(4)
307 .nr(16)
308 .kr(4)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)320 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
321 TEST_REQUIRES_ARM_NEON_DOT;
322 for (size_t k = 1; k <= 80; k += 17) {
323 GemmMicrokernelTester()
324 .mr(4)
325 .nr(16)
326 .kr(4)
327 .sr(1)
328 .m(4)
329 .n(16)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
333 }
334 }
335
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)336 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_NEON_DOT;
338 for (size_t k = 1; k <= 80; k += 17) {
339 for (uint32_t n = 1; n <= 16; n++) {
340 for (uint32_t m = 1; m <= 4; m++) {
341 GemmMicrokernelTester()
342 .mr(4)
343 .nr(16)
344 .kr(4)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)357 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
358 TEST_REQUIRES_ARM_NEON_DOT;
359 for (uint32_t n = 17; n < 32; n++) {
360 for (size_t k = 1; k <= 80; k += 17) {
361 GemmMicrokernelTester()
362 .mr(4)
363 .nr(16)
364 .kr(4)
365 .sr(1)
366 .m(4)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)375 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
376 TEST_REQUIRES_ARM_NEON_DOT;
377 for (uint32_t n = 32; n <= 48; n += 16) {
378 for (size_t k = 1; k <= 80; k += 17) {
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(16)
382 .kr(4)
383 .sr(1)
384 .m(4)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)393 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_NEON_DOT;
395 for (size_t k = 1; k <= 80; k += 17) {
396 for (uint32_t n = 1; n <= 16; n++) {
397 for (uint32_t m = 1; m <= 4; m++) {
398 GemmMicrokernelTester()
399 .mr(4)
400 .nr(16)
401 .kr(4)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(19)
407 .iterations(1)
408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,a_offset)414 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
415 TEST_REQUIRES_ARM_NEON_DOT;
416 for (size_t k = 1; k <= 80; k += 17) {
417 GemmMicrokernelTester()
418 .mr(4)
419 .nr(16)
420 .kr(4)
421 .sr(1)
422 .m(4)
423 .n(16)
424 .k(k)
425 .ks(3)
426 .a_offset(331)
427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
428 }
429 }
430
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,zero)431 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, zero) {
432 TEST_REQUIRES_ARM_NEON_DOT;
433 for (size_t k = 1; k <= 80; k += 17) {
434 for (uint32_t mz = 0; mz < 4; mz++) {
435 GemmMicrokernelTester()
436 .mr(4)
437 .nr(16)
438 .kr(4)
439 .sr(1)
440 .m(4)
441 .n(16)
442 .k(k)
443 .ks(3)
444 .a_offset(331)
445 .zero_index(mz)
446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)451 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
452 TEST_REQUIRES_ARM_NEON_DOT;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(16)
456 .kr(4)
457 .sr(1)
458 .m(4)
459 .n(16)
460 .k(16)
461 .qmin(128)
462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
463 }
464
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)465 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
466 TEST_REQUIRES_ARM_NEON_DOT;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(16)
470 .kr(4)
471 .sr(1)
472 .m(4)
473 .n(16)
474 .k(16)
475 .qmax(128)
476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
477 }
478
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)479 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
480 TEST_REQUIRES_ARM_NEON_DOT;
481 GemmMicrokernelTester()
482 .mr(4)
483 .nr(16)
484 .kr(4)
485 .sr(1)
486 .m(4)
487 .n(16)
488 .k(16)
489 .cm_stride(19)
490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
491 }
492
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_a_zero_point)493 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
494 TEST_REQUIRES_ARM_NEON_DOT;
495 for (size_t k = 1; k <= 80; k += 17) {
496 GemmMicrokernelTester()
497 .mr(4)
498 .nr(16)
499 .kr(4)
500 .sr(1)
501 .m(4)
502 .n(16)
503 .k(k)
504 .a_zero_point(0)
505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
506 }
507 }
508
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_b_zero_point)509 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
510 TEST_REQUIRES_ARM_NEON_DOT;
511 for (size_t k = 1; k <= 80; k += 17) {
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(16)
515 .kr(4)
516 .sr(1)
517 .m(4)
518 .n(16)
519 .k(k)
520 .b_zero_point(0)
521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
522 }
523 }
524
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_zero_point)525 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
526 TEST_REQUIRES_ARM_NEON_DOT;
527 for (size_t k = 1; k <= 80; k += 17) {
528 GemmMicrokernelTester()
529 .mr(4)
530 .nr(16)
531 .kr(4)
532 .sr(1)
533 .m(4)
534 .n(16)
535 .k(k)
536 .a_zero_point(0)
537 .b_zero_point(0)
538 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
539 }
540 }
541 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
542
543
544 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)545 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
546 TEST_REQUIRES_ARM_SIMD32;
547 GemmMicrokernelTester()
548 .mr(1)
549 .nr(1)
550 .kr(4)
551 .sr(1)
552 .m(1)
553 .n(1)
554 .k(4)
555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
556 }
557
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)558 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
559 TEST_REQUIRES_ARM_SIMD32;
560 GemmMicrokernelTester()
561 .mr(1)
562 .nr(1)
563 .kr(4)
564 .sr(1)
565 .m(1)
566 .n(1)
567 .k(4)
568 .cn_stride(3)
569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
570 }
571
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)572 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
573 TEST_REQUIRES_ARM_SIMD32;
574 for (uint32_t n = 1; n <= 1; n++) {
575 for (uint32_t m = 1; m <= 1; m++) {
576 GemmMicrokernelTester()
577 .mr(1)
578 .nr(1)
579 .kr(4)
580 .sr(1)
581 .m(m)
582 .n(n)
583 .k(4)
584 .iterations(1)
585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
586 }
587 }
588 }
589
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)590 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
591 TEST_REQUIRES_ARM_SIMD32;
592 for (uint32_t m = 1; m <= 1; m++) {
593 GemmMicrokernelTester()
594 .mr(1)
595 .nr(1)
596 .kr(4)
597 .sr(1)
598 .m(m)
599 .n(1)
600 .k(4)
601 .iterations(1)
602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
603 }
604 }
605
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)606 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
607 TEST_REQUIRES_ARM_SIMD32;
608 for (uint32_t n = 1; n <= 1; n++) {
609 GemmMicrokernelTester()
610 .mr(1)
611 .nr(1)
612 .kr(4)
613 .sr(1)
614 .m(1)
615 .n(n)
616 .k(4)
617 .iterations(1)
618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
619 }
620 }
621
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)622 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
623 TEST_REQUIRES_ARM_SIMD32;
624 for (size_t k = 1; k < 4; k++) {
625 GemmMicrokernelTester()
626 .mr(1)
627 .nr(1)
628 .kr(4)
629 .sr(1)
630 .m(1)
631 .n(1)
632 .k(k)
633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
634 }
635 }
636
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)637 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
638 TEST_REQUIRES_ARM_SIMD32;
639 for (size_t k = 1; k < 4; k++) {
640 for (uint32_t n = 1; n <= 1; n++) {
641 for (uint32_t m = 1; m <= 1; m++) {
642 GemmMicrokernelTester()
643 .mr(1)
644 .nr(1)
645 .kr(4)
646 .sr(1)
647 .m(m)
648 .n(n)
649 .k(k)
650 .iterations(1)
651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
652 }
653 }
654 }
655 }
656
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)657 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
658 TEST_REQUIRES_ARM_SIMD32;
659 for (size_t k = 5; k < 8; k++) {
660 GemmMicrokernelTester()
661 .mr(1)
662 .nr(1)
663 .kr(4)
664 .sr(1)
665 .m(1)
666 .n(1)
667 .k(k)
668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
669 }
670 }
671
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)672 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
673 TEST_REQUIRES_ARM_SIMD32;
674 for (size_t k = 5; k < 8; k++) {
675 for (uint32_t n = 1; n <= 1; n++) {
676 for (uint32_t m = 1; m <= 1; m++) {
677 GemmMicrokernelTester()
678 .mr(1)
679 .nr(1)
680 .kr(4)
681 .sr(1)
682 .m(m)
683 .n(n)
684 .k(k)
685 .iterations(1)
686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
687 }
688 }
689 }
690 }
691
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)692 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
693 TEST_REQUIRES_ARM_SIMD32;
694 for (size_t k = 8; k <= 40; k += 4) {
695 GemmMicrokernelTester()
696 .mr(1)
697 .nr(1)
698 .kr(4)
699 .sr(1)
700 .m(1)
701 .n(1)
702 .k(k)
703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
704 }
705 }
706
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)707 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
708 TEST_REQUIRES_ARM_SIMD32;
709 for (size_t k = 8; k <= 40; k += 4) {
710 for (uint32_t n = 1; n <= 1; n++) {
711 for (uint32_t m = 1; m <= 1; m++) {
712 GemmMicrokernelTester()
713 .mr(1)
714 .nr(1)
715 .kr(4)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
722 }
723 }
724 }
725 }
726
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)727 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
728 TEST_REQUIRES_ARM_SIMD32;
729 for (uint32_t n = 2; n < 2; n++) {
730 for (size_t k = 1; k <= 20; k += 5) {
731 GemmMicrokernelTester()
732 .mr(1)
733 .nr(1)
734 .kr(4)
735 .sr(1)
736 .m(1)
737 .n(n)
738 .k(k)
739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
740 }
741 }
742 }
743
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)744 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
745 TEST_REQUIRES_ARM_SIMD32;
746 for (uint32_t n = 2; n < 2; n++) {
747 for (size_t k = 1; k <= 20; k += 5) {
748 GemmMicrokernelTester()
749 .mr(1)
750 .nr(1)
751 .kr(4)
752 .sr(1)
753 .m(1)
754 .n(n)
755 .k(k)
756 .cn_stride(3)
757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
758 }
759 }
760 }
761
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)762 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
763 TEST_REQUIRES_ARM_SIMD32;
764 for (uint32_t n = 2; n < 2; n++) {
765 for (size_t k = 1; k <= 20; k += 5) {
766 for (uint32_t m = 1; m <= 1; m++) {
767 GemmMicrokernelTester()
768 .mr(1)
769 .nr(1)
770 .kr(4)
771 .sr(1)
772 .m(m)
773 .n(n)
774 .k(k)
775 .iterations(1)
776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
777 }
778 }
779 }
780 }
781
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)782 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
783 TEST_REQUIRES_ARM_SIMD32;
784 for (uint32_t n = 2; n <= 3; n += 1) {
785 for (size_t k = 1; k <= 20; k += 5) {
786 GemmMicrokernelTester()
787 .mr(1)
788 .nr(1)
789 .kr(4)
790 .sr(1)
791 .m(1)
792 .n(n)
793 .k(k)
794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
795 }
796 }
797 }
798
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)799 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
800 TEST_REQUIRES_ARM_SIMD32;
801 for (uint32_t n = 2; n <= 3; n += 1) {
802 for (size_t k = 1; k <= 20; k += 5) {
803 GemmMicrokernelTester()
804 .mr(1)
805 .nr(1)
806 .kr(4)
807 .sr(1)
808 .m(1)
809 .n(n)
810 .k(k)
811 .cn_stride(3)
812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
813 }
814 }
815 }
816
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)817 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
818 TEST_REQUIRES_ARM_SIMD32;
819 for (uint32_t n = 2; n <= 3; n += 1) {
820 for (size_t k = 1; k <= 20; k += 5) {
821 for (uint32_t m = 1; m <= 1; m++) {
822 GemmMicrokernelTester()
823 .mr(1)
824 .nr(1)
825 .kr(4)
826 .sr(1)
827 .m(m)
828 .n(n)
829 .k(k)
830 .iterations(1)
831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
832 }
833 }
834 }
835 }
836
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel)837 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel) {
838 TEST_REQUIRES_ARM_SIMD32;
839 for (size_t k = 1; k <= 20; k += 5) {
840 GemmMicrokernelTester()
841 .mr(1)
842 .nr(1)
843 .kr(4)
844 .sr(1)
845 .m(1)
846 .n(1)
847 .k(k)
848 .ks(3)
849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
850 }
851 }
852
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel_subtile)853 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel_subtile) {
854 TEST_REQUIRES_ARM_SIMD32;
855 for (size_t k = 1; k <= 20; k += 5) {
856 for (uint32_t n = 1; n <= 1; n++) {
857 for (uint32_t m = 1; m <= 1; m++) {
858 GemmMicrokernelTester()
859 .mr(1)
860 .nr(1)
861 .kr(4)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .ks(3)
867 .iterations(1)
868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
869 }
870 }
871 }
872 }
873
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_small_kernel)874 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_small_kernel) {
875 TEST_REQUIRES_ARM_SIMD32;
876 for (uint32_t n = 2; n < 2; n++) {
877 for (size_t k = 1; k <= 20; k += 5) {
878 GemmMicrokernelTester()
879 .mr(1)
880 .nr(1)
881 .kr(4)
882 .sr(1)
883 .m(1)
884 .n(n)
885 .k(k)
886 .ks(3)
887 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
888 }
889 }
890 }
891
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_small_kernel)892 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_small_kernel) {
893 TEST_REQUIRES_ARM_SIMD32;
894 for (uint32_t n = 2; n <= 3; n += 1) {
895 for (size_t k = 1; k <= 20; k += 5) {
896 GemmMicrokernelTester()
897 .mr(1)
898 .nr(1)
899 .kr(4)
900 .sr(1)
901 .m(1)
902 .n(n)
903 .k(k)
904 .ks(3)
905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
906 }
907 }
908 }
909
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)910 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
911 TEST_REQUIRES_ARM_SIMD32;
912 for (size_t k = 1; k <= 20; k += 5) {
913 for (uint32_t n = 1; n <= 1; n++) {
914 for (uint32_t m = 1; m <= 1; m++) {
915 GemmMicrokernelTester()
916 .mr(1)
917 .nr(1)
918 .kr(4)
919 .sr(1)
920 .m(m)
921 .n(n)
922 .k(k)
923 .cm_stride(3)
924 .iterations(1)
925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
926 }
927 }
928 }
929 }
930
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,a_offset)931 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, a_offset) {
932 TEST_REQUIRES_ARM_SIMD32;
933 for (size_t k = 1; k <= 20; k += 5) {
934 GemmMicrokernelTester()
935 .mr(1)
936 .nr(1)
937 .kr(4)
938 .sr(1)
939 .m(1)
940 .n(1)
941 .k(k)
942 .ks(3)
943 .a_offset(23)
944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
945 }
946 }
947
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,zero)948 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, zero) {
949 TEST_REQUIRES_ARM_SIMD32;
950 for (size_t k = 1; k <= 20; k += 5) {
951 for (uint32_t mz = 0; mz < 1; mz++) {
952 GemmMicrokernelTester()
953 .mr(1)
954 .nr(1)
955 .kr(4)
956 .sr(1)
957 .m(1)
958 .n(1)
959 .k(k)
960 .ks(3)
961 .a_offset(23)
962 .zero_index(mz)
963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
964 }
965 }
966 }
967
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)968 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
969 TEST_REQUIRES_ARM_SIMD32;
970 GemmMicrokernelTester()
971 .mr(1)
972 .nr(1)
973 .kr(4)
974 .sr(1)
975 .m(1)
976 .n(1)
977 .k(4)
978 .qmin(128)
979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
980 }
981
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)982 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
983 TEST_REQUIRES_ARM_SIMD32;
984 GemmMicrokernelTester()
985 .mr(1)
986 .nr(1)
987 .kr(4)
988 .sr(1)
989 .m(1)
990 .n(1)
991 .k(4)
992 .qmax(128)
993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
994 }
995
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)996 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
997 TEST_REQUIRES_ARM_SIMD32;
998 GemmMicrokernelTester()
999 .mr(1)
1000 .nr(1)
1001 .kr(4)
1002 .sr(1)
1003 .m(1)
1004 .n(1)
1005 .k(4)
1006 .cm_stride(3)
1007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1008 }
1009
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_a_zero_point)1010 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_a_zero_point) {
1011 TEST_REQUIRES_ARM_SIMD32;
1012 for (size_t k = 1; k <= 20; k += 5) {
1013 GemmMicrokernelTester()
1014 .mr(1)
1015 .nr(1)
1016 .kr(4)
1017 .sr(1)
1018 .m(1)
1019 .n(1)
1020 .k(k)
1021 .a_zero_point(0)
1022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1023 }
1024 }
1025
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_b_zero_point)1026 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_b_zero_point) {
1027 TEST_REQUIRES_ARM_SIMD32;
1028 for (size_t k = 1; k <= 20; k += 5) {
1029 GemmMicrokernelTester()
1030 .mr(1)
1031 .nr(1)
1032 .kr(4)
1033 .sr(1)
1034 .m(1)
1035 .n(1)
1036 .k(k)
1037 .b_zero_point(0)
1038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1039 }
1040 }
1041
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_zero_point)1042 TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_zero_point) {
1043 TEST_REQUIRES_ARM_SIMD32;
1044 for (size_t k = 1; k <= 20; k += 5) {
1045 GemmMicrokernelTester()
1046 .mr(1)
1047 .nr(1)
1048 .kr(4)
1049 .sr(1)
1050 .m(1)
1051 .n(1)
1052 .k(k)
1053 .a_zero_point(0)
1054 .b_zero_point(0)
1055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1056 }
1057 }
1058 #endif // XNN_ARCH_ARM
1059
1060
1061 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)1062 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
1063 TEST_REQUIRES_ARM_SIMD32;
1064 GemmMicrokernelTester()
1065 .mr(2)
1066 .nr(1)
1067 .kr(4)
1068 .sr(1)
1069 .m(2)
1070 .n(1)
1071 .k(4)
1072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1073 }
1074
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)1075 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
1076 TEST_REQUIRES_ARM_SIMD32;
1077 GemmMicrokernelTester()
1078 .mr(2)
1079 .nr(1)
1080 .kr(4)
1081 .sr(1)
1082 .m(2)
1083 .n(1)
1084 .k(4)
1085 .cn_stride(3)
1086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1087 }
1088
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)1089 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
1090 TEST_REQUIRES_ARM_SIMD32;
1091 for (uint32_t n = 1; n <= 1; n++) {
1092 for (uint32_t m = 1; m <= 2; m++) {
1093 GemmMicrokernelTester()
1094 .mr(2)
1095 .nr(1)
1096 .kr(4)
1097 .sr(1)
1098 .m(m)
1099 .n(n)
1100 .k(4)
1101 .iterations(1)
1102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1103 }
1104 }
1105 }
1106
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)1107 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
1108 TEST_REQUIRES_ARM_SIMD32;
1109 for (uint32_t m = 1; m <= 2; m++) {
1110 GemmMicrokernelTester()
1111 .mr(2)
1112 .nr(1)
1113 .kr(4)
1114 .sr(1)
1115 .m(m)
1116 .n(1)
1117 .k(4)
1118 .iterations(1)
1119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1120 }
1121 }
1122
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)1123 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
1124 TEST_REQUIRES_ARM_SIMD32;
1125 for (uint32_t n = 1; n <= 1; n++) {
1126 GemmMicrokernelTester()
1127 .mr(2)
1128 .nr(1)
1129 .kr(4)
1130 .sr(1)
1131 .m(2)
1132 .n(n)
1133 .k(4)
1134 .iterations(1)
1135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1136 }
1137 }
1138
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)1139 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
1140 TEST_REQUIRES_ARM_SIMD32;
1141 for (size_t k = 1; k < 4; k++) {
1142 GemmMicrokernelTester()
1143 .mr(2)
1144 .nr(1)
1145 .kr(4)
1146 .sr(1)
1147 .m(2)
1148 .n(1)
1149 .k(k)
1150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1151 }
1152 }
1153
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)1154 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
1155 TEST_REQUIRES_ARM_SIMD32;
1156 for (size_t k = 1; k < 4; k++) {
1157 for (uint32_t n = 1; n <= 1; n++) {
1158 for (uint32_t m = 1; m <= 2; m++) {
1159 GemmMicrokernelTester()
1160 .mr(2)
1161 .nr(1)
1162 .kr(4)
1163 .sr(1)
1164 .m(m)
1165 .n(n)
1166 .k(k)
1167 .iterations(1)
1168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1169 }
1170 }
1171 }
1172 }
1173
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)1174 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
1175 TEST_REQUIRES_ARM_SIMD32;
1176 for (size_t k = 5; k < 8; k++) {
1177 GemmMicrokernelTester()
1178 .mr(2)
1179 .nr(1)
1180 .kr(4)
1181 .sr(1)
1182 .m(2)
1183 .n(1)
1184 .k(k)
1185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1186 }
1187 }
1188
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)1189 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
1190 TEST_REQUIRES_ARM_SIMD32;
1191 for (size_t k = 5; k < 8; k++) {
1192 for (uint32_t n = 1; n <= 1; n++) {
1193 for (uint32_t m = 1; m <= 2; m++) {
1194 GemmMicrokernelTester()
1195 .mr(2)
1196 .nr(1)
1197 .kr(4)
1198 .sr(1)
1199 .m(m)
1200 .n(n)
1201 .k(k)
1202 .iterations(1)
1203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1204 }
1205 }
1206 }
1207 }
1208
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)1209 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
1210 TEST_REQUIRES_ARM_SIMD32;
1211 for (size_t k = 8; k <= 40; k += 4) {
1212 GemmMicrokernelTester()
1213 .mr(2)
1214 .nr(1)
1215 .kr(4)
1216 .sr(1)
1217 .m(2)
1218 .n(1)
1219 .k(k)
1220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1221 }
1222 }
1223
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)1224 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
1225 TEST_REQUIRES_ARM_SIMD32;
1226 for (size_t k = 8; k <= 40; k += 4) {
1227 for (uint32_t n = 1; n <= 1; n++) {
1228 for (uint32_t m = 1; m <= 2; m++) {
1229 GemmMicrokernelTester()
1230 .mr(2)
1231 .nr(1)
1232 .kr(4)
1233 .sr(1)
1234 .m(m)
1235 .n(n)
1236 .k(k)
1237 .iterations(1)
1238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1239 }
1240 }
1241 }
1242 }
1243
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)1244 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
1245 TEST_REQUIRES_ARM_SIMD32;
1246 for (uint32_t n = 2; n < 2; n++) {
1247 for (size_t k = 1; k <= 20; k += 5) {
1248 GemmMicrokernelTester()
1249 .mr(2)
1250 .nr(1)
1251 .kr(4)
1252 .sr(1)
1253 .m(2)
1254 .n(n)
1255 .k(k)
1256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1257 }
1258 }
1259 }
1260
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)1261 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
1262 TEST_REQUIRES_ARM_SIMD32;
1263 for (uint32_t n = 2; n < 2; n++) {
1264 for (size_t k = 1; k <= 20; k += 5) {
1265 GemmMicrokernelTester()
1266 .mr(2)
1267 .nr(1)
1268 .kr(4)
1269 .sr(1)
1270 .m(2)
1271 .n(n)
1272 .k(k)
1273 .cn_stride(3)
1274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1275 }
1276 }
1277 }
1278
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)1279 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
1280 TEST_REQUIRES_ARM_SIMD32;
1281 for (uint32_t n = 2; n < 2; n++) {
1282 for (size_t k = 1; k <= 20; k += 5) {
1283 for (uint32_t m = 1; m <= 2; m++) {
1284 GemmMicrokernelTester()
1285 .mr(2)
1286 .nr(1)
1287 .kr(4)
1288 .sr(1)
1289 .m(m)
1290 .n(n)
1291 .k(k)
1292 .iterations(1)
1293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1294 }
1295 }
1296 }
1297 }
1298
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)1299 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
1300 TEST_REQUIRES_ARM_SIMD32;
1301 for (uint32_t n = 2; n <= 3; n += 1) {
1302 for (size_t k = 1; k <= 20; k += 5) {
1303 GemmMicrokernelTester()
1304 .mr(2)
1305 .nr(1)
1306 .kr(4)
1307 .sr(1)
1308 .m(2)
1309 .n(n)
1310 .k(k)
1311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1312 }
1313 }
1314 }
1315
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)1316 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
1317 TEST_REQUIRES_ARM_SIMD32;
1318 for (uint32_t n = 2; n <= 3; n += 1) {
1319 for (size_t k = 1; k <= 20; k += 5) {
1320 GemmMicrokernelTester()
1321 .mr(2)
1322 .nr(1)
1323 .kr(4)
1324 .sr(1)
1325 .m(2)
1326 .n(n)
1327 .k(k)
1328 .cn_stride(3)
1329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1330 }
1331 }
1332 }
1333
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)1334 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
1335 TEST_REQUIRES_ARM_SIMD32;
1336 for (uint32_t n = 2; n <= 3; n += 1) {
1337 for (size_t k = 1; k <= 20; k += 5) {
1338 for (uint32_t m = 1; m <= 2; m++) {
1339 GemmMicrokernelTester()
1340 .mr(2)
1341 .nr(1)
1342 .kr(4)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .iterations(1)
1348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1349 }
1350 }
1351 }
1352 }
1353
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel)1354 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel) {
1355 TEST_REQUIRES_ARM_SIMD32;
1356 for (size_t k = 1; k <= 20; k += 5) {
1357 GemmMicrokernelTester()
1358 .mr(2)
1359 .nr(1)
1360 .kr(4)
1361 .sr(1)
1362 .m(2)
1363 .n(1)
1364 .k(k)
1365 .ks(3)
1366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1367 }
1368 }
1369
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel_subtile)1370 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel_subtile) {
1371 TEST_REQUIRES_ARM_SIMD32;
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 for (uint32_t n = 1; n <= 1; n++) {
1374 for (uint32_t m = 1; m <= 2; m++) {
1375 GemmMicrokernelTester()
1376 .mr(2)
1377 .nr(1)
1378 .kr(4)
1379 .sr(1)
1380 .m(m)
1381 .n(n)
1382 .k(k)
1383 .ks(3)
1384 .iterations(1)
1385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1386 }
1387 }
1388 }
1389 }
1390
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_small_kernel)1391 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_small_kernel) {
1392 TEST_REQUIRES_ARM_SIMD32;
1393 for (uint32_t n = 2; n < 2; n++) {
1394 for (size_t k = 1; k <= 20; k += 5) {
1395 GemmMicrokernelTester()
1396 .mr(2)
1397 .nr(1)
1398 .kr(4)
1399 .sr(1)
1400 .m(2)
1401 .n(n)
1402 .k(k)
1403 .ks(3)
1404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1405 }
1406 }
1407 }
1408
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_small_kernel)1409 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_small_kernel) {
1410 TEST_REQUIRES_ARM_SIMD32;
1411 for (uint32_t n = 2; n <= 3; n += 1) {
1412 for (size_t k = 1; k <= 20; k += 5) {
1413 GemmMicrokernelTester()
1414 .mr(2)
1415 .nr(1)
1416 .kr(4)
1417 .sr(1)
1418 .m(2)
1419 .n(n)
1420 .k(k)
1421 .ks(3)
1422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1423 }
1424 }
1425 }
1426
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)1427 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
1428 TEST_REQUIRES_ARM_SIMD32;
1429 for (size_t k = 1; k <= 20; k += 5) {
1430 for (uint32_t n = 1; n <= 1; n++) {
1431 for (uint32_t m = 1; m <= 2; m++) {
1432 GemmMicrokernelTester()
1433 .mr(2)
1434 .nr(1)
1435 .kr(4)
1436 .sr(1)
1437 .m(m)
1438 .n(n)
1439 .k(k)
1440 .cm_stride(3)
1441 .iterations(1)
1442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1443 }
1444 }
1445 }
1446 }
1447
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,a_offset)1448 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, a_offset) {
1449 TEST_REQUIRES_ARM_SIMD32;
1450 for (size_t k = 1; k <= 20; k += 5) {
1451 GemmMicrokernelTester()
1452 .mr(2)
1453 .nr(1)
1454 .kr(4)
1455 .sr(1)
1456 .m(2)
1457 .n(1)
1458 .k(k)
1459 .ks(3)
1460 .a_offset(43)
1461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1462 }
1463 }
1464
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,zero)1465 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, zero) {
1466 TEST_REQUIRES_ARM_SIMD32;
1467 for (size_t k = 1; k <= 20; k += 5) {
1468 for (uint32_t mz = 0; mz < 2; mz++) {
1469 GemmMicrokernelTester()
1470 .mr(2)
1471 .nr(1)
1472 .kr(4)
1473 .sr(1)
1474 .m(2)
1475 .n(1)
1476 .k(k)
1477 .ks(3)
1478 .a_offset(43)
1479 .zero_index(mz)
1480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1481 }
1482 }
1483 }
1484
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)1485 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
1486 TEST_REQUIRES_ARM_SIMD32;
1487 GemmMicrokernelTester()
1488 .mr(2)
1489 .nr(1)
1490 .kr(4)
1491 .sr(1)
1492 .m(2)
1493 .n(1)
1494 .k(4)
1495 .qmin(128)
1496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1497 }
1498
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)1499 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
1500 TEST_REQUIRES_ARM_SIMD32;
1501 GemmMicrokernelTester()
1502 .mr(2)
1503 .nr(1)
1504 .kr(4)
1505 .sr(1)
1506 .m(2)
1507 .n(1)
1508 .k(4)
1509 .qmax(128)
1510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1511 }
1512
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)1513 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
1514 TEST_REQUIRES_ARM_SIMD32;
1515 GemmMicrokernelTester()
1516 .mr(2)
1517 .nr(1)
1518 .kr(4)
1519 .sr(1)
1520 .m(2)
1521 .n(1)
1522 .k(4)
1523 .cm_stride(3)
1524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1525 }
1526
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_a_zero_point)1527 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_a_zero_point) {
1528 TEST_REQUIRES_ARM_SIMD32;
1529 for (size_t k = 1; k <= 20; k += 5) {
1530 GemmMicrokernelTester()
1531 .mr(2)
1532 .nr(1)
1533 .kr(4)
1534 .sr(1)
1535 .m(2)
1536 .n(1)
1537 .k(k)
1538 .a_zero_point(0)
1539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1540 }
1541 }
1542
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_b_zero_point)1543 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_b_zero_point) {
1544 TEST_REQUIRES_ARM_SIMD32;
1545 for (size_t k = 1; k <= 20; k += 5) {
1546 GemmMicrokernelTester()
1547 .mr(2)
1548 .nr(1)
1549 .kr(4)
1550 .sr(1)
1551 .m(2)
1552 .n(1)
1553 .k(k)
1554 .b_zero_point(0)
1555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1556 }
1557 }
1558
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_zero_point)1559 TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_zero_point) {
1560 TEST_REQUIRES_ARM_SIMD32;
1561 for (size_t k = 1; k <= 20; k += 5) {
1562 GemmMicrokernelTester()
1563 .mr(2)
1564 .nr(1)
1565 .kr(4)
1566 .sr(1)
1567 .m(2)
1568 .n(1)
1569 .k(k)
1570 .a_zero_point(0)
1571 .b_zero_point(0)
1572 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1573 }
1574 }
1575 #endif // XNN_ARCH_ARM
1576
1577
1578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8)1579 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
1580 TEST_REQUIRES_ARM_NEON_V8;
1581 GemmMicrokernelTester()
1582 .mr(1)
1583 .nr(16)
1584 .kr(1)
1585 .sr(1)
1586 .m(1)
1587 .n(16)
1588 .k(8)
1589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1590 }
1591
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cn)1592 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
1593 TEST_REQUIRES_ARM_NEON_V8;
1594 GemmMicrokernelTester()
1595 .mr(1)
1596 .nr(16)
1597 .kr(1)
1598 .sr(1)
1599 .m(1)
1600 .n(16)
1601 .k(8)
1602 .cn_stride(19)
1603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1604 }
1605
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile)1606 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1607 TEST_REQUIRES_ARM_NEON_V8;
1608 for (uint32_t n = 1; n <= 16; n++) {
1609 for (uint32_t m = 1; m <= 1; m++) {
1610 GemmMicrokernelTester()
1611 .mr(1)
1612 .nr(16)
1613 .kr(1)
1614 .sr(1)
1615 .m(m)
1616 .n(n)
1617 .k(8)
1618 .iterations(1)
1619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1620 }
1621 }
1622 }
1623
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)1624 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1625 TEST_REQUIRES_ARM_NEON_V8;
1626 for (uint32_t m = 1; m <= 1; m++) {
1627 GemmMicrokernelTester()
1628 .mr(1)
1629 .nr(16)
1630 .kr(1)
1631 .sr(1)
1632 .m(m)
1633 .n(16)
1634 .k(8)
1635 .iterations(1)
1636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1637 }
1638 }
1639
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)1640 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1641 TEST_REQUIRES_ARM_NEON_V8;
1642 for (uint32_t n = 1; n <= 16; n++) {
1643 GemmMicrokernelTester()
1644 .mr(1)
1645 .nr(16)
1646 .kr(1)
1647 .sr(1)
1648 .m(1)
1649 .n(n)
1650 .k(8)
1651 .iterations(1)
1652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1653 }
1654 }
1655
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8)1656 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
1657 TEST_REQUIRES_ARM_NEON_V8;
1658 for (size_t k = 1; k < 8; k++) {
1659 GemmMicrokernelTester()
1660 .mr(1)
1661 .nr(16)
1662 .kr(1)
1663 .sr(1)
1664 .m(1)
1665 .n(16)
1666 .k(k)
1667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1668 }
1669 }
1670
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_subtile)1671 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1672 TEST_REQUIRES_ARM_NEON_V8;
1673 for (size_t k = 1; k < 8; k++) {
1674 for (uint32_t n = 1; n <= 16; n++) {
1675 for (uint32_t m = 1; m <= 1; m++) {
1676 GemmMicrokernelTester()
1677 .mr(1)
1678 .nr(16)
1679 .kr(1)
1680 .sr(1)
1681 .m(m)
1682 .n(n)
1683 .k(k)
1684 .iterations(1)
1685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1686 }
1687 }
1688 }
1689 }
1690
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8)1691 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
1692 TEST_REQUIRES_ARM_NEON_V8;
1693 for (size_t k = 9; k < 16; k++) {
1694 GemmMicrokernelTester()
1695 .mr(1)
1696 .nr(16)
1697 .kr(1)
1698 .sr(1)
1699 .m(1)
1700 .n(16)
1701 .k(k)
1702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1703 }
1704 }
1705
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_subtile)1706 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1707 TEST_REQUIRES_ARM_NEON_V8;
1708 for (size_t k = 9; k < 16; k++) {
1709 for (uint32_t n = 1; n <= 16; n++) {
1710 for (uint32_t m = 1; m <= 1; m++) {
1711 GemmMicrokernelTester()
1712 .mr(1)
1713 .nr(16)
1714 .kr(1)
1715 .sr(1)
1716 .m(m)
1717 .n(n)
1718 .k(k)
1719 .iterations(1)
1720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1721 }
1722 }
1723 }
1724 }
1725
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8)1726 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
1727 TEST_REQUIRES_ARM_NEON_V8;
1728 for (size_t k = 16; k <= 80; k += 8) {
1729 GemmMicrokernelTester()
1730 .mr(1)
1731 .nr(16)
1732 .kr(1)
1733 .sr(1)
1734 .m(1)
1735 .n(16)
1736 .k(k)
1737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1738 }
1739 }
1740
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_subtile)1741 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1742 TEST_REQUIRES_ARM_NEON_V8;
1743 for (size_t k = 16; k <= 80; k += 8) {
1744 for (uint32_t n = 1; n <= 16; n++) {
1745 for (uint32_t m = 1; m <= 1; m++) {
1746 GemmMicrokernelTester()
1747 .mr(1)
1748 .nr(16)
1749 .kr(1)
1750 .sr(1)
1751 .m(m)
1752 .n(n)
1753 .k(k)
1754 .iterations(1)
1755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16)1761 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
1762 TEST_REQUIRES_ARM_NEON_V8;
1763 for (uint32_t n = 17; n < 32; n++) {
1764 for (size_t k = 1; k <= 40; k += 9) {
1765 GemmMicrokernelTester()
1766 .mr(1)
1767 .nr(16)
1768 .kr(1)
1769 .sr(1)
1770 .m(1)
1771 .n(n)
1772 .k(k)
1773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1774 }
1775 }
1776 }
1777
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)1778 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1779 TEST_REQUIRES_ARM_NEON_V8;
1780 for (uint32_t n = 17; n < 32; n++) {
1781 for (size_t k = 1; k <= 40; k += 9) {
1782 GemmMicrokernelTester()
1783 .mr(1)
1784 .nr(16)
1785 .kr(1)
1786 .sr(1)
1787 .m(1)
1788 .n(n)
1789 .k(k)
1790 .cn_stride(19)
1791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1792 }
1793 }
1794 }
1795
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_subtile)1796 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1797 TEST_REQUIRES_ARM_NEON_V8;
1798 for (uint32_t n = 17; n < 32; n++) {
1799 for (size_t k = 1; k <= 40; k += 9) {
1800 for (uint32_t m = 1; m <= 1; m++) {
1801 GemmMicrokernelTester()
1802 .mr(1)
1803 .nr(16)
1804 .kr(1)
1805 .sr(1)
1806 .m(m)
1807 .n(n)
1808 .k(k)
1809 .iterations(1)
1810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1811 }
1812 }
1813 }
1814 }
1815
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16)1816 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
1817 TEST_REQUIRES_ARM_NEON_V8;
1818 for (uint32_t n = 32; n <= 48; n += 16) {
1819 for (size_t k = 1; k <= 40; k += 9) {
1820 GemmMicrokernelTester()
1821 .mr(1)
1822 .nr(16)
1823 .kr(1)
1824 .sr(1)
1825 .m(1)
1826 .n(n)
1827 .k(k)
1828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1829 }
1830 }
1831 }
1832
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)1833 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1834 TEST_REQUIRES_ARM_NEON_V8;
1835 for (uint32_t n = 32; n <= 48; n += 16) {
1836 for (size_t k = 1; k <= 40; k += 9) {
1837 GemmMicrokernelTester()
1838 .mr(1)
1839 .nr(16)
1840 .kr(1)
1841 .sr(1)
1842 .m(1)
1843 .n(n)
1844 .k(k)
1845 .cn_stride(19)
1846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1847 }
1848 }
1849 }
1850
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_subtile)1851 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1852 TEST_REQUIRES_ARM_NEON_V8;
1853 for (uint32_t n = 32; n <= 48; n += 16) {
1854 for (size_t k = 1; k <= 40; k += 9) {
1855 for (uint32_t m = 1; m <= 1; m++) {
1856 GemmMicrokernelTester()
1857 .mr(1)
1858 .nr(16)
1859 .kr(1)
1860 .sr(1)
1861 .m(m)
1862 .n(n)
1863 .k(k)
1864 .iterations(1)
1865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1866 }
1867 }
1868 }
1869 }
1870
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel)1871 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
1872 TEST_REQUIRES_ARM_NEON_V8;
1873 for (size_t k = 1; k <= 40; k += 9) {
1874 GemmMicrokernelTester()
1875 .mr(1)
1876 .nr(16)
1877 .kr(1)
1878 .sr(1)
1879 .m(1)
1880 .n(16)
1881 .k(k)
1882 .ks(3)
1883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1884 }
1885 }
1886
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel_subtile)1887 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1888 TEST_REQUIRES_ARM_NEON_V8;
1889 for (size_t k = 1; k <= 40; k += 9) {
1890 for (uint32_t n = 1; n <= 16; n++) {
1891 for (uint32_t m = 1; m <= 1; m++) {
1892 GemmMicrokernelTester()
1893 .mr(1)
1894 .nr(16)
1895 .kr(1)
1896 .sr(1)
1897 .m(m)
1898 .n(n)
1899 .k(k)
1900 .ks(3)
1901 .iterations(1)
1902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1903 }
1904 }
1905 }
1906 }
1907
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)1908 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1909 TEST_REQUIRES_ARM_NEON_V8;
1910 for (uint32_t n = 17; n < 32; n++) {
1911 for (size_t k = 1; k <= 40; k += 9) {
1912 GemmMicrokernelTester()
1913 .mr(1)
1914 .nr(16)
1915 .kr(1)
1916 .sr(1)
1917 .m(1)
1918 .n(n)
1919 .k(k)
1920 .ks(3)
1921 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1922 }
1923 }
1924 }
1925
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)1926 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1927 TEST_REQUIRES_ARM_NEON_V8;
1928 for (uint32_t n = 32; n <= 48; n += 16) {
1929 for (size_t k = 1; k <= 40; k += 9) {
1930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(16)
1933 .kr(1)
1934 .sr(1)
1935 .m(1)
1936 .n(n)
1937 .k(k)
1938 .ks(3)
1939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1940 }
1941 }
1942 }
1943
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm_subtile)1944 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1945 TEST_REQUIRES_ARM_NEON_V8;
1946 for (size_t k = 1; k <= 40; k += 9) {
1947 for (uint32_t n = 1; n <= 16; n++) {
1948 for (uint32_t m = 1; m <= 1; m++) {
1949 GemmMicrokernelTester()
1950 .mr(1)
1951 .nr(16)
1952 .kr(1)
1953 .sr(1)
1954 .m(m)
1955 .n(n)
1956 .k(k)
1957 .cm_stride(19)
1958 .iterations(1)
1959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1960 }
1961 }
1962 }
1963 }
1964
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,a_offset)1965 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
1966 TEST_REQUIRES_ARM_NEON_V8;
1967 for (size_t k = 1; k <= 40; k += 9) {
1968 GemmMicrokernelTester()
1969 .mr(1)
1970 .nr(16)
1971 .kr(1)
1972 .sr(1)
1973 .m(1)
1974 .n(16)
1975 .k(k)
1976 .ks(3)
1977 .a_offset(43)
1978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1979 }
1980 }
1981
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,zero)1982 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
1983 TEST_REQUIRES_ARM_NEON_V8;
1984 for (size_t k = 1; k <= 40; k += 9) {
1985 for (uint32_t mz = 0; mz < 1; mz++) {
1986 GemmMicrokernelTester()
1987 .mr(1)
1988 .nr(16)
1989 .kr(1)
1990 .sr(1)
1991 .m(1)
1992 .n(16)
1993 .k(k)
1994 .ks(3)
1995 .a_offset(43)
1996 .zero_index(mz)
1997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1998 }
1999 }
2000 }
2001
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmin)2002 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
2003 TEST_REQUIRES_ARM_NEON_V8;
2004 GemmMicrokernelTester()
2005 .mr(1)
2006 .nr(16)
2007 .kr(1)
2008 .sr(1)
2009 .m(1)
2010 .n(16)
2011 .k(8)
2012 .qmin(128)
2013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2014 }
2015
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmax)2016 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
2017 TEST_REQUIRES_ARM_NEON_V8;
2018 GemmMicrokernelTester()
2019 .mr(1)
2020 .nr(16)
2021 .kr(1)
2022 .sr(1)
2023 .m(1)
2024 .n(16)
2025 .k(8)
2026 .qmax(128)
2027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2028 }
2029
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm)2030 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
2031 TEST_REQUIRES_ARM_NEON_V8;
2032 GemmMicrokernelTester()
2033 .mr(1)
2034 .nr(16)
2035 .kr(1)
2036 .sr(1)
2037 .m(1)
2038 .n(16)
2039 .k(8)
2040 .cm_stride(19)
2041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2042 }
2043
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_a_zero_point)2044 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_a_zero_point) {
2045 TEST_REQUIRES_ARM_NEON_V8;
2046 for (size_t k = 1; k <= 40; k += 9) {
2047 GemmMicrokernelTester()
2048 .mr(1)
2049 .nr(16)
2050 .kr(1)
2051 .sr(1)
2052 .m(1)
2053 .n(16)
2054 .k(k)
2055 .a_zero_point(0)
2056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2057 }
2058 }
2059
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_b_zero_point)2060 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_b_zero_point) {
2061 TEST_REQUIRES_ARM_NEON_V8;
2062 for (size_t k = 1; k <= 40; k += 9) {
2063 GemmMicrokernelTester()
2064 .mr(1)
2065 .nr(16)
2066 .kr(1)
2067 .sr(1)
2068 .m(1)
2069 .n(16)
2070 .k(k)
2071 .b_zero_point(0)
2072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2073 }
2074 }
2075
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_zero_point)2076 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_zero_point) {
2077 TEST_REQUIRES_ARM_NEON_V8;
2078 for (size_t k = 1; k <= 40; k += 9) {
2079 GemmMicrokernelTester()
2080 .mr(1)
2081 .nr(16)
2082 .kr(1)
2083 .sr(1)
2084 .m(1)
2085 .n(16)
2086 .k(k)
2087 .a_zero_point(0)
2088 .b_zero_point(0)
2089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2090 }
2091 }
2092 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2093
2094
2095 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8)2096 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
2097 TEST_REQUIRES_ARM_NEON_DOT;
2098 GemmMicrokernelTester()
2099 .mr(1)
2100 .nr(16)
2101 .kr(4)
2102 .sr(1)
2103 .m(1)
2104 .n(16)
2105 .k(8)
2106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2107 }
2108
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cn)2109 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
2110 TEST_REQUIRES_ARM_NEON_DOT;
2111 GemmMicrokernelTester()
2112 .mr(1)
2113 .nr(16)
2114 .kr(4)
2115 .sr(1)
2116 .m(1)
2117 .n(16)
2118 .k(8)
2119 .cn_stride(19)
2120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2121 }
2122
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile)2123 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
2124 TEST_REQUIRES_ARM_NEON_DOT;
2125 for (uint32_t n = 1; n <= 16; n++) {
2126 for (uint32_t m = 1; m <= 1; m++) {
2127 GemmMicrokernelTester()
2128 .mr(1)
2129 .nr(16)
2130 .kr(4)
2131 .sr(1)
2132 .m(m)
2133 .n(n)
2134 .k(8)
2135 .iterations(1)
2136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2137 }
2138 }
2139 }
2140
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_m)2141 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
2142 TEST_REQUIRES_ARM_NEON_DOT;
2143 for (uint32_t m = 1; m <= 1; m++) {
2144 GemmMicrokernelTester()
2145 .mr(1)
2146 .nr(16)
2147 .kr(4)
2148 .sr(1)
2149 .m(m)
2150 .n(16)
2151 .k(8)
2152 .iterations(1)
2153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2154 }
2155 }
2156
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_n)2157 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
2158 TEST_REQUIRES_ARM_NEON_DOT;
2159 for (uint32_t n = 1; n <= 16; n++) {
2160 GemmMicrokernelTester()
2161 .mr(1)
2162 .nr(16)
2163 .kr(4)
2164 .sr(1)
2165 .m(1)
2166 .n(n)
2167 .k(8)
2168 .iterations(1)
2169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2170 }
2171 }
2172
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8)2173 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
2174 TEST_REQUIRES_ARM_NEON_DOT;
2175 for (size_t k = 1; k < 8; k++) {
2176 GemmMicrokernelTester()
2177 .mr(1)
2178 .nr(16)
2179 .kr(4)
2180 .sr(1)
2181 .m(1)
2182 .n(16)
2183 .k(k)
2184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2185 }
2186 }
2187
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_subtile)2188 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
2189 TEST_REQUIRES_ARM_NEON_DOT;
2190 for (size_t k = 1; k < 8; k++) {
2191 for (uint32_t n = 1; n <= 16; n++) {
2192 for (uint32_t m = 1; m <= 1; m++) {
2193 GemmMicrokernelTester()
2194 .mr(1)
2195 .nr(16)
2196 .kr(4)
2197 .sr(1)
2198 .m(m)
2199 .n(n)
2200 .k(k)
2201 .iterations(1)
2202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2203 }
2204 }
2205 }
2206 }
2207
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8)2208 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
2209 TEST_REQUIRES_ARM_NEON_DOT;
2210 for (size_t k = 9; k < 16; k++) {
2211 GemmMicrokernelTester()
2212 .mr(1)
2213 .nr(16)
2214 .kr(4)
2215 .sr(1)
2216 .m(1)
2217 .n(16)
2218 .k(k)
2219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2220 }
2221 }
2222
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_subtile)2223 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
2224 TEST_REQUIRES_ARM_NEON_DOT;
2225 for (size_t k = 9; k < 16; k++) {
2226 for (uint32_t n = 1; n <= 16; n++) {
2227 for (uint32_t m = 1; m <= 1; m++) {
2228 GemmMicrokernelTester()
2229 .mr(1)
2230 .nr(16)
2231 .kr(4)
2232 .sr(1)
2233 .m(m)
2234 .n(n)
2235 .k(k)
2236 .iterations(1)
2237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2238 }
2239 }
2240 }
2241 }
2242
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8)2243 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
2244 TEST_REQUIRES_ARM_NEON_DOT;
2245 for (size_t k = 16; k <= 80; k += 8) {
2246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(16)
2249 .kr(4)
2250 .sr(1)
2251 .m(1)
2252 .n(16)
2253 .k(k)
2254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2255 }
2256 }
2257
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_subtile)2258 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
2259 TEST_REQUIRES_ARM_NEON_DOT;
2260 for (size_t k = 16; k <= 80; k += 8) {
2261 for (uint32_t n = 1; n <= 16; n++) {
2262 for (uint32_t m = 1; m <= 1; m++) {
2263 GemmMicrokernelTester()
2264 .mr(1)
2265 .nr(16)
2266 .kr(4)
2267 .sr(1)
2268 .m(m)
2269 .n(n)
2270 .k(k)
2271 .iterations(1)
2272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2273 }
2274 }
2275 }
2276 }
2277
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16)2278 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
2279 TEST_REQUIRES_ARM_NEON_DOT;
2280 for (uint32_t n = 17; n < 32; n++) {
2281 for (size_t k = 1; k <= 40; k += 9) {
2282 GemmMicrokernelTester()
2283 .mr(1)
2284 .nr(16)
2285 .kr(4)
2286 .sr(1)
2287 .m(1)
2288 .n(n)
2289 .k(k)
2290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2291 }
2292 }
2293 }
2294
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_cn)2295 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
2296 TEST_REQUIRES_ARM_NEON_DOT;
2297 for (uint32_t n = 17; n < 32; n++) {
2298 for (size_t k = 1; k <= 40; k += 9) {
2299 GemmMicrokernelTester()
2300 .mr(1)
2301 .nr(16)
2302 .kr(4)
2303 .sr(1)
2304 .m(1)
2305 .n(n)
2306 .k(k)
2307 .cn_stride(19)
2308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2309 }
2310 }
2311 }
2312
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_subtile)2313 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
2314 TEST_REQUIRES_ARM_NEON_DOT;
2315 for (uint32_t n = 17; n < 32; n++) {
2316 for (size_t k = 1; k <= 40; k += 9) {
2317 for (uint32_t m = 1; m <= 1; m++) {
2318 GemmMicrokernelTester()
2319 .mr(1)
2320 .nr(16)
2321 .kr(4)
2322 .sr(1)
2323 .m(m)
2324 .n(n)
2325 .k(k)
2326 .iterations(1)
2327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2328 }
2329 }
2330 }
2331 }
2332
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16)2333 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
2334 TEST_REQUIRES_ARM_NEON_DOT;
2335 for (uint32_t n = 32; n <= 48; n += 16) {
2336 for (size_t k = 1; k <= 40; k += 9) {
2337 GemmMicrokernelTester()
2338 .mr(1)
2339 .nr(16)
2340 .kr(4)
2341 .sr(1)
2342 .m(1)
2343 .n(n)
2344 .k(k)
2345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2346 }
2347 }
2348 }
2349
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_cn)2350 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
2351 TEST_REQUIRES_ARM_NEON_DOT;
2352 for (uint32_t n = 32; n <= 48; n += 16) {
2353 for (size_t k = 1; k <= 40; k += 9) {
2354 GemmMicrokernelTester()
2355 .mr(1)
2356 .nr(16)
2357 .kr(4)
2358 .sr(1)
2359 .m(1)
2360 .n(n)
2361 .k(k)
2362 .cn_stride(19)
2363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2364 }
2365 }
2366 }
2367
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_subtile)2368 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
2369 TEST_REQUIRES_ARM_NEON_DOT;
2370 for (uint32_t n = 32; n <= 48; n += 16) {
2371 for (size_t k = 1; k <= 40; k += 9) {
2372 for (uint32_t m = 1; m <= 1; m++) {
2373 GemmMicrokernelTester()
2374 .mr(1)
2375 .nr(16)
2376 .kr(4)
2377 .sr(1)
2378 .m(m)
2379 .n(n)
2380 .k(k)
2381 .iterations(1)
2382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2383 }
2384 }
2385 }
2386 }
2387
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel)2388 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel) {
2389 TEST_REQUIRES_ARM_NEON_DOT;
2390 for (size_t k = 1; k <= 40; k += 9) {
2391 GemmMicrokernelTester()
2392 .mr(1)
2393 .nr(16)
2394 .kr(4)
2395 .sr(1)
2396 .m(1)
2397 .n(16)
2398 .k(k)
2399 .ks(3)
2400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2401 }
2402 }
2403
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel_subtile)2404 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel_subtile) {
2405 TEST_REQUIRES_ARM_NEON_DOT;
2406 for (size_t k = 1; k <= 40; k += 9) {
2407 for (uint32_t n = 1; n <= 16; n++) {
2408 for (uint32_t m = 1; m <= 1; m++) {
2409 GemmMicrokernelTester()
2410 .mr(1)
2411 .nr(16)
2412 .kr(4)
2413 .sr(1)
2414 .m(m)
2415 .n(n)
2416 .k(k)
2417 .ks(3)
2418 .iterations(1)
2419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2420 }
2421 }
2422 }
2423 }
2424
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_small_kernel)2425 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_small_kernel) {
2426 TEST_REQUIRES_ARM_NEON_DOT;
2427 for (uint32_t n = 17; n < 32; n++) {
2428 for (size_t k = 1; k <= 40; k += 9) {
2429 GemmMicrokernelTester()
2430 .mr(1)
2431 .nr(16)
2432 .kr(4)
2433 .sr(1)
2434 .m(1)
2435 .n(n)
2436 .k(k)
2437 .ks(3)
2438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2439 }
2440 }
2441 }
2442
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_small_kernel)2443 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_small_kernel) {
2444 TEST_REQUIRES_ARM_NEON_DOT;
2445 for (uint32_t n = 32; n <= 48; n += 16) {
2446 for (size_t k = 1; k <= 40; k += 9) {
2447 GemmMicrokernelTester()
2448 .mr(1)
2449 .nr(16)
2450 .kr(4)
2451 .sr(1)
2452 .m(1)
2453 .n(n)
2454 .k(k)
2455 .ks(3)
2456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2457 }
2458 }
2459 }
2460
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm_subtile)2461 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
2462 TEST_REQUIRES_ARM_NEON_DOT;
2463 for (size_t k = 1; k <= 40; k += 9) {
2464 for (uint32_t n = 1; n <= 16; n++) {
2465 for (uint32_t m = 1; m <= 1; m++) {
2466 GemmMicrokernelTester()
2467 .mr(1)
2468 .nr(16)
2469 .kr(4)
2470 .sr(1)
2471 .m(m)
2472 .n(n)
2473 .k(k)
2474 .cm_stride(19)
2475 .iterations(1)
2476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2477 }
2478 }
2479 }
2480 }
2481
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,a_offset)2482 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, a_offset) {
2483 TEST_REQUIRES_ARM_NEON_DOT;
2484 for (size_t k = 1; k <= 40; k += 9) {
2485 GemmMicrokernelTester()
2486 .mr(1)
2487 .nr(16)
2488 .kr(4)
2489 .sr(1)
2490 .m(1)
2491 .n(16)
2492 .k(k)
2493 .ks(3)
2494 .a_offset(43)
2495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2496 }
2497 }
2498
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,zero)2499 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, zero) {
2500 TEST_REQUIRES_ARM_NEON_DOT;
2501 for (size_t k = 1; k <= 40; k += 9) {
2502 for (uint32_t mz = 0; mz < 1; mz++) {
2503 GemmMicrokernelTester()
2504 .mr(1)
2505 .nr(16)
2506 .kr(4)
2507 .sr(1)
2508 .m(1)
2509 .n(16)
2510 .k(k)
2511 .ks(3)
2512 .a_offset(43)
2513 .zero_index(mz)
2514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2515 }
2516 }
2517 }
2518
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmin)2519 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
2520 TEST_REQUIRES_ARM_NEON_DOT;
2521 GemmMicrokernelTester()
2522 .mr(1)
2523 .nr(16)
2524 .kr(4)
2525 .sr(1)
2526 .m(1)
2527 .n(16)
2528 .k(8)
2529 .qmin(128)
2530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2531 }
2532
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmax)2533 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
2534 TEST_REQUIRES_ARM_NEON_DOT;
2535 GemmMicrokernelTester()
2536 .mr(1)
2537 .nr(16)
2538 .kr(4)
2539 .sr(1)
2540 .m(1)
2541 .n(16)
2542 .k(8)
2543 .qmax(128)
2544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2545 }
2546
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm)2547 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
2548 TEST_REQUIRES_ARM_NEON_DOT;
2549 GemmMicrokernelTester()
2550 .mr(1)
2551 .nr(16)
2552 .kr(4)
2553 .sr(1)
2554 .m(1)
2555 .n(16)
2556 .k(8)
2557 .cm_stride(19)
2558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2559 }
2560
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_a_zero_point)2561 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_a_zero_point) {
2562 TEST_REQUIRES_ARM_NEON_DOT;
2563 for (size_t k = 1; k <= 40; k += 9) {
2564 GemmMicrokernelTester()
2565 .mr(1)
2566 .nr(16)
2567 .kr(4)
2568 .sr(1)
2569 .m(1)
2570 .n(16)
2571 .k(k)
2572 .a_zero_point(0)
2573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2574 }
2575 }
2576
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_b_zero_point)2577 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_b_zero_point) {
2578 TEST_REQUIRES_ARM_NEON_DOT;
2579 for (size_t k = 1; k <= 40; k += 9) {
2580 GemmMicrokernelTester()
2581 .mr(1)
2582 .nr(16)
2583 .kr(4)
2584 .sr(1)
2585 .m(1)
2586 .n(16)
2587 .k(k)
2588 .b_zero_point(0)
2589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2590 }
2591 }
2592
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_zero_point)2593 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_zero_point) {
2594 TEST_REQUIRES_ARM_NEON_DOT;
2595 for (size_t k = 1; k <= 40; k += 9) {
2596 GemmMicrokernelTester()
2597 .mr(1)
2598 .nr(16)
2599 .kr(4)
2600 .sr(1)
2601 .m(1)
2602 .n(16)
2603 .k(k)
2604 .a_zero_point(0)
2605 .b_zero_point(0)
2606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2607 }
2608 }
2609 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
2610
2611
2612 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)2613 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
2614 TEST_REQUIRES_ARM_NEON;
2615 GemmMicrokernelTester()
2616 .mr(4)
2617 .nr(16)
2618 .kr(1)
2619 .sr(1)
2620 .m(4)
2621 .n(16)
2622 .k(8)
2623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2624 }
2625
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)2626 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
2627 TEST_REQUIRES_ARM_NEON;
2628 GemmMicrokernelTester()
2629 .mr(4)
2630 .nr(16)
2631 .kr(1)
2632 .sr(1)
2633 .m(4)
2634 .n(16)
2635 .k(8)
2636 .cn_stride(19)
2637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2638 }
2639
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)2640 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON;
2642 for (uint32_t n = 1; n <= 16; n++) {
2643 for (uint32_t m = 1; m <= 4; m++) {
2644 GemmMicrokernelTester()
2645 .mr(4)
2646 .nr(16)
2647 .kr(1)
2648 .sr(1)
2649 .m(m)
2650 .n(n)
2651 .k(8)
2652 .iterations(1)
2653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2654 }
2655 }
2656 }
2657
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2658 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2659 TEST_REQUIRES_ARM_NEON;
2660 for (uint32_t m = 1; m <= 4; m++) {
2661 GemmMicrokernelTester()
2662 .mr(4)
2663 .nr(16)
2664 .kr(1)
2665 .sr(1)
2666 .m(m)
2667 .n(16)
2668 .k(8)
2669 .iterations(1)
2670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2671 }
2672 }
2673
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2674 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2675 TEST_REQUIRES_ARM_NEON;
2676 for (uint32_t n = 1; n <= 16; n++) {
2677 GemmMicrokernelTester()
2678 .mr(4)
2679 .nr(16)
2680 .kr(1)
2681 .sr(1)
2682 .m(4)
2683 .n(n)
2684 .k(8)
2685 .iterations(1)
2686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2687 }
2688 }
2689
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)2690 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
2691 TEST_REQUIRES_ARM_NEON;
2692 for (size_t k = 1; k < 8; k++) {
2693 GemmMicrokernelTester()
2694 .mr(4)
2695 .nr(16)
2696 .kr(1)
2697 .sr(1)
2698 .m(4)
2699 .n(16)
2700 .k(k)
2701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2702 }
2703 }
2704
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)2705 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2706 TEST_REQUIRES_ARM_NEON;
2707 for (size_t k = 1; k < 8; k++) {
2708 for (uint32_t n = 1; n <= 16; n++) {
2709 for (uint32_t m = 1; m <= 4; m++) {
2710 GemmMicrokernelTester()
2711 .mr(4)
2712 .nr(16)
2713 .kr(1)
2714 .sr(1)
2715 .m(m)
2716 .n(n)
2717 .k(k)
2718 .iterations(1)
2719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2720 }
2721 }
2722 }
2723 }
2724
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)2725 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
2726 TEST_REQUIRES_ARM_NEON;
2727 for (size_t k = 9; k < 16; k++) {
2728 GemmMicrokernelTester()
2729 .mr(4)
2730 .nr(16)
2731 .kr(1)
2732 .sr(1)
2733 .m(4)
2734 .n(16)
2735 .k(k)
2736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2737 }
2738 }
2739
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)2740 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2741 TEST_REQUIRES_ARM_NEON;
2742 for (size_t k = 9; k < 16; k++) {
2743 for (uint32_t n = 1; n <= 16; n++) {
2744 for (uint32_t m = 1; m <= 4; m++) {
2745 GemmMicrokernelTester()
2746 .mr(4)
2747 .nr(16)
2748 .kr(1)
2749 .sr(1)
2750 .m(m)
2751 .n(n)
2752 .k(k)
2753 .iterations(1)
2754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2755 }
2756 }
2757 }
2758 }
2759
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)2760 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
2761 TEST_REQUIRES_ARM_NEON;
2762 for (size_t k = 16; k <= 80; k += 8) {
2763 GemmMicrokernelTester()
2764 .mr(4)
2765 .nr(16)
2766 .kr(1)
2767 .sr(1)
2768 .m(4)
2769 .n(16)
2770 .k(k)
2771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2772 }
2773 }
2774
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)2775 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2776 TEST_REQUIRES_ARM_NEON;
2777 for (size_t k = 16; k <= 80; k += 8) {
2778 for (uint32_t n = 1; n <= 16; n++) {
2779 for (uint32_t m = 1; m <= 4; m++) {
2780 GemmMicrokernelTester()
2781 .mr(4)
2782 .nr(16)
2783 .kr(1)
2784 .sr(1)
2785 .m(m)
2786 .n(n)
2787 .k(k)
2788 .iterations(1)
2789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2790 }
2791 }
2792 }
2793 }
2794
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)2795 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
2796 TEST_REQUIRES_ARM_NEON;
2797 for (uint32_t n = 17; n < 32; n++) {
2798 for (size_t k = 1; k <= 40; k += 9) {
2799 GemmMicrokernelTester()
2800 .mr(4)
2801 .nr(16)
2802 .kr(1)
2803 .sr(1)
2804 .m(4)
2805 .n(n)
2806 .k(k)
2807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2808 }
2809 }
2810 }
2811
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2812 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2813 TEST_REQUIRES_ARM_NEON;
2814 for (uint32_t n = 17; n < 32; n++) {
2815 for (size_t k = 1; k <= 40; k += 9) {
2816 GemmMicrokernelTester()
2817 .mr(4)
2818 .nr(16)
2819 .kr(1)
2820 .sr(1)
2821 .m(4)
2822 .n(n)
2823 .k(k)
2824 .cn_stride(19)
2825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2826 }
2827 }
2828 }
2829
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)2830 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2831 TEST_REQUIRES_ARM_NEON;
2832 for (uint32_t n = 17; n < 32; n++) {
2833 for (size_t k = 1; k <= 40; k += 9) {
2834 for (uint32_t m = 1; m <= 4; m++) {
2835 GemmMicrokernelTester()
2836 .mr(4)
2837 .nr(16)
2838 .kr(1)
2839 .sr(1)
2840 .m(m)
2841 .n(n)
2842 .k(k)
2843 .iterations(1)
2844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2845 }
2846 }
2847 }
2848 }
2849
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)2850 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
2851 TEST_REQUIRES_ARM_NEON;
2852 for (uint32_t n = 32; n <= 48; n += 16) {
2853 for (size_t k = 1; k <= 40; k += 9) {
2854 GemmMicrokernelTester()
2855 .mr(4)
2856 .nr(16)
2857 .kr(1)
2858 .sr(1)
2859 .m(4)
2860 .n(n)
2861 .k(k)
2862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2863 }
2864 }
2865 }
2866
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)2867 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2868 TEST_REQUIRES_ARM_NEON;
2869 for (uint32_t n = 32; n <= 48; n += 16) {
2870 for (size_t k = 1; k <= 40; k += 9) {
2871 GemmMicrokernelTester()
2872 .mr(4)
2873 .nr(16)
2874 .kr(1)
2875 .sr(1)
2876 .m(4)
2877 .n(n)
2878 .k(k)
2879 .cn_stride(19)
2880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2881 }
2882 }
2883 }
2884
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)2885 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2886 TEST_REQUIRES_ARM_NEON;
2887 for (uint32_t n = 32; n <= 48; n += 16) {
2888 for (size_t k = 1; k <= 40; k += 9) {
2889 for (uint32_t m = 1; m <= 4; m++) {
2890 GemmMicrokernelTester()
2891 .mr(4)
2892 .nr(16)
2893 .kr(1)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
2899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2900 }
2901 }
2902 }
2903 }
2904
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel)2905 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
2906 TEST_REQUIRES_ARM_NEON;
2907 for (size_t k = 1; k <= 40; k += 9) {
2908 GemmMicrokernelTester()
2909 .mr(4)
2910 .nr(16)
2911 .kr(1)
2912 .sr(1)
2913 .m(4)
2914 .n(16)
2915 .k(k)
2916 .ks(3)
2917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2918 }
2919 }
2920
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel_subtile)2921 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
2922 TEST_REQUIRES_ARM_NEON;
2923 for (size_t k = 1; k <= 40; k += 9) {
2924 for (uint32_t n = 1; n <= 16; n++) {
2925 for (uint32_t m = 1; m <= 4; m++) {
2926 GemmMicrokernelTester()
2927 .mr(4)
2928 .nr(16)
2929 .kr(1)
2930 .sr(1)
2931 .m(m)
2932 .n(n)
2933 .k(k)
2934 .ks(3)
2935 .iterations(1)
2936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2937 }
2938 }
2939 }
2940 }
2941
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2942 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2943 TEST_REQUIRES_ARM_NEON;
2944 for (uint32_t n = 17; n < 32; n++) {
2945 for (size_t k = 1; k <= 40; k += 9) {
2946 GemmMicrokernelTester()
2947 .mr(4)
2948 .nr(16)
2949 .kr(1)
2950 .sr(1)
2951 .m(4)
2952 .n(n)
2953 .k(k)
2954 .ks(3)
2955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2956 }
2957 }
2958 }
2959
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)2960 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2961 TEST_REQUIRES_ARM_NEON;
2962 for (uint32_t n = 32; n <= 48; n += 16) {
2963 for (size_t k = 1; k <= 40; k += 9) {
2964 GemmMicrokernelTester()
2965 .mr(4)
2966 .nr(16)
2967 .kr(1)
2968 .sr(1)
2969 .m(4)
2970 .n(n)
2971 .k(k)
2972 .ks(3)
2973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2974 }
2975 }
2976 }
2977
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)2978 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2979 TEST_REQUIRES_ARM_NEON;
2980 for (size_t k = 1; k <= 40; k += 9) {
2981 for (uint32_t n = 1; n <= 16; n++) {
2982 for (uint32_t m = 1; m <= 4; m++) {
2983 GemmMicrokernelTester()
2984 .mr(4)
2985 .nr(16)
2986 .kr(1)
2987 .sr(1)
2988 .m(m)
2989 .n(n)
2990 .k(k)
2991 .cm_stride(19)
2992 .iterations(1)
2993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2994 }
2995 }
2996 }
2997 }
2998
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,a_offset)2999 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
3000 TEST_REQUIRES_ARM_NEON;
3001 for (size_t k = 1; k <= 40; k += 9) {
3002 GemmMicrokernelTester()
3003 .mr(4)
3004 .nr(16)
3005 .kr(1)
3006 .sr(1)
3007 .m(4)
3008 .n(16)
3009 .k(k)
3010 .ks(3)
3011 .a_offset(163)
3012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3013 }
3014 }
3015
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,zero)3016 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
3017 TEST_REQUIRES_ARM_NEON;
3018 for (size_t k = 1; k <= 40; k += 9) {
3019 for (uint32_t mz = 0; mz < 4; mz++) {
3020 GemmMicrokernelTester()
3021 .mr(4)
3022 .nr(16)
3023 .kr(1)
3024 .sr(1)
3025 .m(4)
3026 .n(16)
3027 .k(k)
3028 .ks(3)
3029 .a_offset(163)
3030 .zero_index(mz)
3031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3032 }
3033 }
3034 }
3035
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)3036 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
3037 TEST_REQUIRES_ARM_NEON;
3038 GemmMicrokernelTester()
3039 .mr(4)
3040 .nr(16)
3041 .kr(1)
3042 .sr(1)
3043 .m(4)
3044 .n(16)
3045 .k(8)
3046 .qmin(128)
3047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3048 }
3049
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)3050 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
3051 TEST_REQUIRES_ARM_NEON;
3052 GemmMicrokernelTester()
3053 .mr(4)
3054 .nr(16)
3055 .kr(1)
3056 .sr(1)
3057 .m(4)
3058 .n(16)
3059 .k(8)
3060 .qmax(128)
3061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3062 }
3063
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)3064 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
3065 TEST_REQUIRES_ARM_NEON;
3066 GemmMicrokernelTester()
3067 .mr(4)
3068 .nr(16)
3069 .kr(1)
3070 .sr(1)
3071 .m(4)
3072 .n(16)
3073 .k(8)
3074 .cm_stride(19)
3075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3076 }
3077
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_a_zero_point)3078 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
3079 TEST_REQUIRES_ARM_NEON;
3080 for (size_t k = 1; k <= 40; k += 9) {
3081 GemmMicrokernelTester()
3082 .mr(4)
3083 .nr(16)
3084 .kr(1)
3085 .sr(1)
3086 .m(4)
3087 .n(16)
3088 .k(k)
3089 .a_zero_point(0)
3090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3091 }
3092 }
3093
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_b_zero_point)3094 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
3095 TEST_REQUIRES_ARM_NEON;
3096 for (size_t k = 1; k <= 40; k += 9) {
3097 GemmMicrokernelTester()
3098 .mr(4)
3099 .nr(16)
3100 .kr(1)
3101 .sr(1)
3102 .m(4)
3103 .n(16)
3104 .k(k)
3105 .b_zero_point(0)
3106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3107 }
3108 }
3109
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_zero_point)3110 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
3111 TEST_REQUIRES_ARM_NEON;
3112 for (size_t k = 1; k <= 40; k += 9) {
3113 GemmMicrokernelTester()
3114 .mr(4)
3115 .nr(16)
3116 .kr(1)
3117 .sr(1)
3118 .m(4)
3119 .n(16)
3120 .k(k)
3121 .a_zero_point(0)
3122 .b_zero_point(0)
3123 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3124 }
3125 }
3126 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3127
3128
3129 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)3130 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
3131 TEST_REQUIRES_ARM_NEON_V8;
3132 GemmMicrokernelTester()
3133 .mr(4)
3134 .nr(16)
3135 .kr(1)
3136 .sr(1)
3137 .m(4)
3138 .n(16)
3139 .k(8)
3140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3141 }
3142
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)3143 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
3144 TEST_REQUIRES_ARM_NEON_V8;
3145 GemmMicrokernelTester()
3146 .mr(4)
3147 .nr(16)
3148 .kr(1)
3149 .sr(1)
3150 .m(4)
3151 .n(16)
3152 .k(8)
3153 .cn_stride(19)
3154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3155 }
3156
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)3157 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
3158 TEST_REQUIRES_ARM_NEON_V8;
3159 for (uint32_t n = 1; n <= 16; n++) {
3160 for (uint32_t m = 1; m <= 4; m++) {
3161 GemmMicrokernelTester()
3162 .mr(4)
3163 .nr(16)
3164 .kr(1)
3165 .sr(1)
3166 .m(m)
3167 .n(n)
3168 .k(8)
3169 .iterations(1)
3170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3171 }
3172 }
3173 }
3174
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)3175 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
3176 TEST_REQUIRES_ARM_NEON_V8;
3177 for (uint32_t m = 1; m <= 4; m++) {
3178 GemmMicrokernelTester()
3179 .mr(4)
3180 .nr(16)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(16)
3185 .k(8)
3186 .iterations(1)
3187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3188 }
3189 }
3190
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)3191 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
3192 TEST_REQUIRES_ARM_NEON_V8;
3193 for (uint32_t n = 1; n <= 16; n++) {
3194 GemmMicrokernelTester()
3195 .mr(4)
3196 .nr(16)
3197 .kr(1)
3198 .sr(1)
3199 .m(4)
3200 .n(n)
3201 .k(8)
3202 .iterations(1)
3203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3204 }
3205 }
3206
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)3207 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
3208 TEST_REQUIRES_ARM_NEON_V8;
3209 for (size_t k = 1; k < 8; k++) {
3210 GemmMicrokernelTester()
3211 .mr(4)
3212 .nr(16)
3213 .kr(1)
3214 .sr(1)
3215 .m(4)
3216 .n(16)
3217 .k(k)
3218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3219 }
3220 }
3221
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)3222 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
3223 TEST_REQUIRES_ARM_NEON_V8;
3224 for (size_t k = 1; k < 8; k++) {
3225 for (uint32_t n = 1; n <= 16; n++) {
3226 for (uint32_t m = 1; m <= 4; m++) {
3227 GemmMicrokernelTester()
3228 .mr(4)
3229 .nr(16)
3230 .kr(1)
3231 .sr(1)
3232 .m(m)
3233 .n(n)
3234 .k(k)
3235 .iterations(1)
3236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3237 }
3238 }
3239 }
3240 }
3241
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)3242 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
3243 TEST_REQUIRES_ARM_NEON_V8;
3244 for (size_t k = 9; k < 16; k++) {
3245 GemmMicrokernelTester()
3246 .mr(4)
3247 .nr(16)
3248 .kr(1)
3249 .sr(1)
3250 .m(4)
3251 .n(16)
3252 .k(k)
3253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3254 }
3255 }
3256
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)3257 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
3258 TEST_REQUIRES_ARM_NEON_V8;
3259 for (size_t k = 9; k < 16; k++) {
3260 for (uint32_t n = 1; n <= 16; n++) {
3261 for (uint32_t m = 1; m <= 4; m++) {
3262 GemmMicrokernelTester()
3263 .mr(4)
3264 .nr(16)
3265 .kr(1)
3266 .sr(1)
3267 .m(m)
3268 .n(n)
3269 .k(k)
3270 .iterations(1)
3271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3272 }
3273 }
3274 }
3275 }
3276
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)3277 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
3278 TEST_REQUIRES_ARM_NEON_V8;
3279 for (size_t k = 16; k <= 80; k += 8) {
3280 GemmMicrokernelTester()
3281 .mr(4)
3282 .nr(16)
3283 .kr(1)
3284 .sr(1)
3285 .m(4)
3286 .n(16)
3287 .k(k)
3288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3289 }
3290 }
3291
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)3292 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
3293 TEST_REQUIRES_ARM_NEON_V8;
3294 for (size_t k = 16; k <= 80; k += 8) {
3295 for (uint32_t n = 1; n <= 16; n++) {
3296 for (uint32_t m = 1; m <= 4; m++) {
3297 GemmMicrokernelTester()
3298 .mr(4)
3299 .nr(16)
3300 .kr(1)
3301 .sr(1)
3302 .m(m)
3303 .n(n)
3304 .k(k)
3305 .iterations(1)
3306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3307 }
3308 }
3309 }
3310 }
3311
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)3312 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
3313 TEST_REQUIRES_ARM_NEON_V8;
3314 for (uint32_t n = 17; n < 32; n++) {
3315 for (size_t k = 1; k <= 40; k += 9) {
3316 GemmMicrokernelTester()
3317 .mr(4)
3318 .nr(16)
3319 .kr(1)
3320 .sr(1)
3321 .m(4)
3322 .n(n)
3323 .k(k)
3324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3325 }
3326 }
3327 }
3328
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)3329 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
3330 TEST_REQUIRES_ARM_NEON_V8;
3331 for (uint32_t n = 17; n < 32; n++) {
3332 for (size_t k = 1; k <= 40; k += 9) {
3333 GemmMicrokernelTester()
3334 .mr(4)
3335 .nr(16)
3336 .kr(1)
3337 .sr(1)
3338 .m(4)
3339 .n(n)
3340 .k(k)
3341 .cn_stride(19)
3342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3343 }
3344 }
3345 }
3346
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)3347 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
3348 TEST_REQUIRES_ARM_NEON_V8;
3349 for (uint32_t n = 17; n < 32; n++) {
3350 for (size_t k = 1; k <= 40; k += 9) {
3351 for (uint32_t m = 1; m <= 4; m++) {
3352 GemmMicrokernelTester()
3353 .mr(4)
3354 .nr(16)
3355 .kr(1)
3356 .sr(1)
3357 .m(m)
3358 .n(n)
3359 .k(k)
3360 .iterations(1)
3361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3362 }
3363 }
3364 }
3365 }
3366
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)3367 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
3368 TEST_REQUIRES_ARM_NEON_V8;
3369 for (uint32_t n = 32; n <= 48; n += 16) {
3370 for (size_t k = 1; k <= 40; k += 9) {
3371 GemmMicrokernelTester()
3372 .mr(4)
3373 .nr(16)
3374 .kr(1)
3375 .sr(1)
3376 .m(4)
3377 .n(n)
3378 .k(k)
3379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3380 }
3381 }
3382 }
3383
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)3384 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
3385 TEST_REQUIRES_ARM_NEON_V8;
3386 for (uint32_t n = 32; n <= 48; n += 16) {
3387 for (size_t k = 1; k <= 40; k += 9) {
3388 GemmMicrokernelTester()
3389 .mr(4)
3390 .nr(16)
3391 .kr(1)
3392 .sr(1)
3393 .m(4)
3394 .n(n)
3395 .k(k)
3396 .cn_stride(19)
3397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3398 }
3399 }
3400 }
3401
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)3402 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
3403 TEST_REQUIRES_ARM_NEON_V8;
3404 for (uint32_t n = 32; n <= 48; n += 16) {
3405 for (size_t k = 1; k <= 40; k += 9) {
3406 for (uint32_t m = 1; m <= 4; m++) {
3407 GemmMicrokernelTester()
3408 .mr(4)
3409 .nr(16)
3410 .kr(1)
3411 .sr(1)
3412 .m(m)
3413 .n(n)
3414 .k(k)
3415 .iterations(1)
3416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3417 }
3418 }
3419 }
3420 }
3421
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel)3422 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
3423 TEST_REQUIRES_ARM_NEON_V8;
3424 for (size_t k = 1; k <= 40; k += 9) {
3425 GemmMicrokernelTester()
3426 .mr(4)
3427 .nr(16)
3428 .kr(1)
3429 .sr(1)
3430 .m(4)
3431 .n(16)
3432 .k(k)
3433 .ks(3)
3434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3435 }
3436 }
3437
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel_subtile)3438 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
3439 TEST_REQUIRES_ARM_NEON_V8;
3440 for (size_t k = 1; k <= 40; k += 9) {
3441 for (uint32_t n = 1; n <= 16; n++) {
3442 for (uint32_t m = 1; m <= 4; m++) {
3443 GemmMicrokernelTester()
3444 .mr(4)
3445 .nr(16)
3446 .kr(1)
3447 .sr(1)
3448 .m(m)
3449 .n(n)
3450 .k(k)
3451 .ks(3)
3452 .iterations(1)
3453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3454 }
3455 }
3456 }
3457 }
3458
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)3459 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
3460 TEST_REQUIRES_ARM_NEON_V8;
3461 for (uint32_t n = 17; n < 32; n++) {
3462 for (size_t k = 1; k <= 40; k += 9) {
3463 GemmMicrokernelTester()
3464 .mr(4)
3465 .nr(16)
3466 .kr(1)
3467 .sr(1)
3468 .m(4)
3469 .n(n)
3470 .k(k)
3471 .ks(3)
3472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3473 }
3474 }
3475 }
3476
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)3477 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
3478 TEST_REQUIRES_ARM_NEON_V8;
3479 for (uint32_t n = 32; n <= 48; n += 16) {
3480 for (size_t k = 1; k <= 40; k += 9) {
3481 GemmMicrokernelTester()
3482 .mr(4)
3483 .nr(16)
3484 .kr(1)
3485 .sr(1)
3486 .m(4)
3487 .n(n)
3488 .k(k)
3489 .ks(3)
3490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3491 }
3492 }
3493 }
3494
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)3495 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
3496 TEST_REQUIRES_ARM_NEON_V8;
3497 for (size_t k = 1; k <= 40; k += 9) {
3498 for (uint32_t n = 1; n <= 16; n++) {
3499 for (uint32_t m = 1; m <= 4; m++) {
3500 GemmMicrokernelTester()
3501 .mr(4)
3502 .nr(16)
3503 .kr(1)
3504 .sr(1)
3505 .m(m)
3506 .n(n)
3507 .k(k)
3508 .cm_stride(19)
3509 .iterations(1)
3510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3511 }
3512 }
3513 }
3514 }
3515
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,a_offset)3516 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
3517 TEST_REQUIRES_ARM_NEON_V8;
3518 for (size_t k = 1; k <= 40; k += 9) {
3519 GemmMicrokernelTester()
3520 .mr(4)
3521 .nr(16)
3522 .kr(1)
3523 .sr(1)
3524 .m(4)
3525 .n(16)
3526 .k(k)
3527 .ks(3)
3528 .a_offset(163)
3529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3530 }
3531 }
3532
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,zero)3533 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
3534 TEST_REQUIRES_ARM_NEON_V8;
3535 for (size_t k = 1; k <= 40; k += 9) {
3536 for (uint32_t mz = 0; mz < 4; mz++) {
3537 GemmMicrokernelTester()
3538 .mr(4)
3539 .nr(16)
3540 .kr(1)
3541 .sr(1)
3542 .m(4)
3543 .n(16)
3544 .k(k)
3545 .ks(3)
3546 .a_offset(163)
3547 .zero_index(mz)
3548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3549 }
3550 }
3551 }
3552
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)3553 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
3554 TEST_REQUIRES_ARM_NEON_V8;
3555 GemmMicrokernelTester()
3556 .mr(4)
3557 .nr(16)
3558 .kr(1)
3559 .sr(1)
3560 .m(4)
3561 .n(16)
3562 .k(8)
3563 .qmin(128)
3564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3565 }
3566
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)3567 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
3568 TEST_REQUIRES_ARM_NEON_V8;
3569 GemmMicrokernelTester()
3570 .mr(4)
3571 .nr(16)
3572 .kr(1)
3573 .sr(1)
3574 .m(4)
3575 .n(16)
3576 .k(8)
3577 .qmax(128)
3578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3579 }
3580
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)3581 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
3582 TEST_REQUIRES_ARM_NEON_V8;
3583 GemmMicrokernelTester()
3584 .mr(4)
3585 .nr(16)
3586 .kr(1)
3587 .sr(1)
3588 .m(4)
3589 .n(16)
3590 .k(8)
3591 .cm_stride(19)
3592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3593 }
3594
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_a_zero_point)3595 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_a_zero_point) {
3596 TEST_REQUIRES_ARM_NEON_V8;
3597 for (size_t k = 1; k <= 40; k += 9) {
3598 GemmMicrokernelTester()
3599 .mr(4)
3600 .nr(16)
3601 .kr(1)
3602 .sr(1)
3603 .m(4)
3604 .n(16)
3605 .k(k)
3606 .a_zero_point(0)
3607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3608 }
3609 }
3610
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_b_zero_point)3611 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_b_zero_point) {
3612 TEST_REQUIRES_ARM_NEON_V8;
3613 for (size_t k = 1; k <= 40; k += 9) {
3614 GemmMicrokernelTester()
3615 .mr(4)
3616 .nr(16)
3617 .kr(1)
3618 .sr(1)
3619 .m(4)
3620 .n(16)
3621 .k(k)
3622 .b_zero_point(0)
3623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3624 }
3625 }
3626
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_zero_point)3627 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_zero_point) {
3628 TEST_REQUIRES_ARM_NEON_V8;
3629 for (size_t k = 1; k <= 40; k += 9) {
3630 GemmMicrokernelTester()
3631 .mr(4)
3632 .nr(16)
3633 .kr(1)
3634 .sr(1)
3635 .m(4)
3636 .n(16)
3637 .k(k)
3638 .a_zero_point(0)
3639 .b_zero_point(0)
3640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3641 }
3642 }
3643 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3644
3645
3646 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)3647 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
3648 TEST_REQUIRES_X86_SSE41;
3649 GemmMicrokernelTester()
3650 .mr(1)
3651 .nr(4)
3652 .kr(2)
3653 .sr(1)
3654 .m(1)
3655 .n(4)
3656 .k(8)
3657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3658 }
3659
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)3660 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
3661 TEST_REQUIRES_X86_SSE41;
3662 GemmMicrokernelTester()
3663 .mr(1)
3664 .nr(4)
3665 .kr(2)
3666 .sr(1)
3667 .m(1)
3668 .n(4)
3669 .k(8)
3670 .cn_stride(7)
3671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3672 }
3673
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)3674 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
3675 TEST_REQUIRES_X86_SSE41;
3676 for (uint32_t n = 1; n <= 4; n++) {
3677 for (uint32_t m = 1; m <= 1; m++) {
3678 GemmMicrokernelTester()
3679 .mr(1)
3680 .nr(4)
3681 .kr(2)
3682 .sr(1)
3683 .m(m)
3684 .n(n)
3685 .k(8)
3686 .iterations(1)
3687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3688 }
3689 }
3690 }
3691
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)3692 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
3693 TEST_REQUIRES_X86_SSE41;
3694 for (uint32_t m = 1; m <= 1; m++) {
3695 GemmMicrokernelTester()
3696 .mr(1)
3697 .nr(4)
3698 .kr(2)
3699 .sr(1)
3700 .m(m)
3701 .n(4)
3702 .k(8)
3703 .iterations(1)
3704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3705 }
3706 }
3707
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)3708 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
3709 TEST_REQUIRES_X86_SSE41;
3710 for (uint32_t n = 1; n <= 4; n++) {
3711 GemmMicrokernelTester()
3712 .mr(1)
3713 .nr(4)
3714 .kr(2)
3715 .sr(1)
3716 .m(1)
3717 .n(n)
3718 .k(8)
3719 .iterations(1)
3720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3721 }
3722 }
3723
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)3724 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
3725 TEST_REQUIRES_X86_SSE41;
3726 for (size_t k = 1; k < 8; k++) {
3727 GemmMicrokernelTester()
3728 .mr(1)
3729 .nr(4)
3730 .kr(2)
3731 .sr(1)
3732 .m(1)
3733 .n(4)
3734 .k(k)
3735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3736 }
3737 }
3738
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)3739 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
3740 TEST_REQUIRES_X86_SSE41;
3741 for (size_t k = 1; k < 8; k++) {
3742 for (uint32_t n = 1; n <= 4; n++) {
3743 for (uint32_t m = 1; m <= 1; m++) {
3744 GemmMicrokernelTester()
3745 .mr(1)
3746 .nr(4)
3747 .kr(2)
3748 .sr(1)
3749 .m(m)
3750 .n(n)
3751 .k(k)
3752 .iterations(1)
3753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3754 }
3755 }
3756 }
3757 }
3758
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)3759 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
3760 TEST_REQUIRES_X86_SSE41;
3761 for (size_t k = 9; k < 16; k++) {
3762 GemmMicrokernelTester()
3763 .mr(1)
3764 .nr(4)
3765 .kr(2)
3766 .sr(1)
3767 .m(1)
3768 .n(4)
3769 .k(k)
3770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3771 }
3772 }
3773
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)3774 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
3775 TEST_REQUIRES_X86_SSE41;
3776 for (size_t k = 9; k < 16; k++) {
3777 for (uint32_t n = 1; n <= 4; n++) {
3778 for (uint32_t m = 1; m <= 1; m++) {
3779 GemmMicrokernelTester()
3780 .mr(1)
3781 .nr(4)
3782 .kr(2)
3783 .sr(1)
3784 .m(m)
3785 .n(n)
3786 .k(k)
3787 .iterations(1)
3788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3789 }
3790 }
3791 }
3792 }
3793
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)3794 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
3795 TEST_REQUIRES_X86_SSE41;
3796 for (size_t k = 16; k <= 80; k += 8) {
3797 GemmMicrokernelTester()
3798 .mr(1)
3799 .nr(4)
3800 .kr(2)
3801 .sr(1)
3802 .m(1)
3803 .n(4)
3804 .k(k)
3805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3806 }
3807 }
3808
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)3809 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
3810 TEST_REQUIRES_X86_SSE41;
3811 for (size_t k = 16; k <= 80; k += 8) {
3812 for (uint32_t n = 1; n <= 4; n++) {
3813 for (uint32_t m = 1; m <= 1; m++) {
3814 GemmMicrokernelTester()
3815 .mr(1)
3816 .nr(4)
3817 .kr(2)
3818 .sr(1)
3819 .m(m)
3820 .n(n)
3821 .k(k)
3822 .iterations(1)
3823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3824 }
3825 }
3826 }
3827 }
3828
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)3829 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
3830 TEST_REQUIRES_X86_SSE41;
3831 for (uint32_t n = 5; n < 8; n++) {
3832 for (size_t k = 1; k <= 40; k += 9) {
3833 GemmMicrokernelTester()
3834 .mr(1)
3835 .nr(4)
3836 .kr(2)
3837 .sr(1)
3838 .m(1)
3839 .n(n)
3840 .k(k)
3841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3842 }
3843 }
3844 }
3845
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)3846 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
3847 TEST_REQUIRES_X86_SSE41;
3848 for (uint32_t n = 5; n < 8; n++) {
3849 for (size_t k = 1; k <= 40; k += 9) {
3850 GemmMicrokernelTester()
3851 .mr(1)
3852 .nr(4)
3853 .kr(2)
3854 .sr(1)
3855 .m(1)
3856 .n(n)
3857 .k(k)
3858 .cn_stride(7)
3859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3860 }
3861 }
3862 }
3863
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)3864 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
3865 TEST_REQUIRES_X86_SSE41;
3866 for (uint32_t n = 5; n < 8; n++) {
3867 for (size_t k = 1; k <= 40; k += 9) {
3868 for (uint32_t m = 1; m <= 1; m++) {
3869 GemmMicrokernelTester()
3870 .mr(1)
3871 .nr(4)
3872 .kr(2)
3873 .sr(1)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)3884 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
3885 TEST_REQUIRES_X86_SSE41;
3886 for (uint32_t n = 8; n <= 12; n += 4) {
3887 for (size_t k = 1; k <= 40; k += 9) {
3888 GemmMicrokernelTester()
3889 .mr(1)
3890 .nr(4)
3891 .kr(2)
3892 .sr(1)
3893 .m(1)
3894 .n(n)
3895 .k(k)
3896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3897 }
3898 }
3899 }
3900
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)3901 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
3902 TEST_REQUIRES_X86_SSE41;
3903 for (uint32_t n = 8; n <= 12; n += 4) {
3904 for (size_t k = 1; k <= 40; k += 9) {
3905 GemmMicrokernelTester()
3906 .mr(1)
3907 .nr(4)
3908 .kr(2)
3909 .sr(1)
3910 .m(1)
3911 .n(n)
3912 .k(k)
3913 .cn_stride(7)
3914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3915 }
3916 }
3917 }
3918
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)3919 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
3920 TEST_REQUIRES_X86_SSE41;
3921 for (uint32_t n = 8; n <= 12; n += 4) {
3922 for (size_t k = 1; k <= 40; k += 9) {
3923 for (uint32_t m = 1; m <= 1; m++) {
3924 GemmMicrokernelTester()
3925 .mr(1)
3926 .nr(4)
3927 .kr(2)
3928 .sr(1)
3929 .m(m)
3930 .n(n)
3931 .k(k)
3932 .iterations(1)
3933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3934 }
3935 }
3936 }
3937 }
3938
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel)3939 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
3940 TEST_REQUIRES_X86_SSE41;
3941 for (size_t k = 1; k <= 40; k += 9) {
3942 GemmMicrokernelTester()
3943 .mr(1)
3944 .nr(4)
3945 .kr(2)
3946 .sr(1)
3947 .m(1)
3948 .n(4)
3949 .k(k)
3950 .ks(3)
3951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3952 }
3953 }
3954
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel_subtile)3955 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
3956 TEST_REQUIRES_X86_SSE41;
3957 for (size_t k = 1; k <= 40; k += 9) {
3958 for (uint32_t n = 1; n <= 4; n++) {
3959 for (uint32_t m = 1; m <= 1; m++) {
3960 GemmMicrokernelTester()
3961 .mr(1)
3962 .nr(4)
3963 .kr(2)
3964 .sr(1)
3965 .m(m)
3966 .n(n)
3967 .k(k)
3968 .ks(3)
3969 .iterations(1)
3970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3971 }
3972 }
3973 }
3974 }
3975
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_small_kernel)3976 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
3977 TEST_REQUIRES_X86_SSE41;
3978 for (uint32_t n = 5; n < 8; n++) {
3979 for (size_t k = 1; k <= 40; k += 9) {
3980 GemmMicrokernelTester()
3981 .mr(1)
3982 .nr(4)
3983 .kr(2)
3984 .sr(1)
3985 .m(1)
3986 .n(n)
3987 .k(k)
3988 .ks(3)
3989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3990 }
3991 }
3992 }
3993
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_small_kernel)3994 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
3995 TEST_REQUIRES_X86_SSE41;
3996 for (uint32_t n = 8; n <= 12; n += 4) {
3997 for (size_t k = 1; k <= 40; k += 9) {
3998 GemmMicrokernelTester()
3999 .mr(1)
4000 .nr(4)
4001 .kr(2)
4002 .sr(1)
4003 .m(1)
4004 .n(n)
4005 .k(k)
4006 .ks(3)
4007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4008 }
4009 }
4010 }
4011
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)4012 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
4013 TEST_REQUIRES_X86_SSE41;
4014 for (size_t k = 1; k <= 40; k += 9) {
4015 for (uint32_t n = 1; n <= 4; n++) {
4016 for (uint32_t m = 1; m <= 1; m++) {
4017 GemmMicrokernelTester()
4018 .mr(1)
4019 .nr(4)
4020 .kr(2)
4021 .sr(1)
4022 .m(m)
4023 .n(n)
4024 .k(k)
4025 .cm_stride(7)
4026 .iterations(1)
4027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4028 }
4029 }
4030 }
4031 }
4032
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,a_offset)4033 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
4034 TEST_REQUIRES_X86_SSE41;
4035 for (size_t k = 1; k <= 40; k += 9) {
4036 GemmMicrokernelTester()
4037 .mr(1)
4038 .nr(4)
4039 .kr(2)
4040 .sr(1)
4041 .m(1)
4042 .n(4)
4043 .k(k)
4044 .ks(3)
4045 .a_offset(43)
4046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4047 }
4048 }
4049
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,zero)4050 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
4051 TEST_REQUIRES_X86_SSE41;
4052 for (size_t k = 1; k <= 40; k += 9) {
4053 for (uint32_t mz = 0; mz < 1; mz++) {
4054 GemmMicrokernelTester()
4055 .mr(1)
4056 .nr(4)
4057 .kr(2)
4058 .sr(1)
4059 .m(1)
4060 .n(4)
4061 .k(k)
4062 .ks(3)
4063 .a_offset(43)
4064 .zero_index(mz)
4065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4066 }
4067 }
4068 }
4069
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)4070 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
4071 TEST_REQUIRES_X86_SSE41;
4072 GemmMicrokernelTester()
4073 .mr(1)
4074 .nr(4)
4075 .kr(2)
4076 .sr(1)
4077 .m(1)
4078 .n(4)
4079 .k(8)
4080 .qmin(128)
4081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4082 }
4083
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)4084 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
4085 TEST_REQUIRES_X86_SSE41;
4086 GemmMicrokernelTester()
4087 .mr(1)
4088 .nr(4)
4089 .kr(2)
4090 .sr(1)
4091 .m(1)
4092 .n(4)
4093 .k(8)
4094 .qmax(128)
4095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4096 }
4097
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)4098 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
4099 TEST_REQUIRES_X86_SSE41;
4100 GemmMicrokernelTester()
4101 .mr(1)
4102 .nr(4)
4103 .kr(2)
4104 .sr(1)
4105 .m(1)
4106 .n(4)
4107 .k(8)
4108 .cm_stride(7)
4109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4110 }
4111
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_a_zero_point)4112 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_a_zero_point) {
4113 TEST_REQUIRES_X86_SSE41;
4114 for (size_t k = 1; k <= 40; k += 9) {
4115 GemmMicrokernelTester()
4116 .mr(1)
4117 .nr(4)
4118 .kr(2)
4119 .sr(1)
4120 .m(1)
4121 .n(4)
4122 .k(k)
4123 .a_zero_point(0)
4124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4125 }
4126 }
4127
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_b_zero_point)4128 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_b_zero_point) {
4129 TEST_REQUIRES_X86_SSE41;
4130 for (size_t k = 1; k <= 40; k += 9) {
4131 GemmMicrokernelTester()
4132 .mr(1)
4133 .nr(4)
4134 .kr(2)
4135 .sr(1)
4136 .m(1)
4137 .n(4)
4138 .k(k)
4139 .b_zero_point(0)
4140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4141 }
4142 }
4143
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_zero_point)4144 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_zero_point) {
4145 TEST_REQUIRES_X86_SSE41;
4146 for (size_t k = 1; k <= 40; k += 9) {
4147 GemmMicrokernelTester()
4148 .mr(1)
4149 .nr(4)
4150 .kr(2)
4151 .sr(1)
4152 .m(1)
4153 .n(4)
4154 .k(k)
4155 .a_zero_point(0)
4156 .b_zero_point(0)
4157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4158 }
4159 }
4160 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4161
4162
4163 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8)4164 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
4165 TEST_REQUIRES_X86_SSE41;
4166 GemmMicrokernelTester()
4167 .mr(2)
4168 .nr(4)
4169 .kr(2)
4170 .sr(1)
4171 .m(2)
4172 .n(4)
4173 .k(8)
4174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4175 }
4176
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cn)4177 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
4178 TEST_REQUIRES_X86_SSE41;
4179 GemmMicrokernelTester()
4180 .mr(2)
4181 .nr(4)
4182 .kr(2)
4183 .sr(1)
4184 .m(2)
4185 .n(4)
4186 .k(8)
4187 .cn_stride(7)
4188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4189 }
4190
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile)4191 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
4192 TEST_REQUIRES_X86_SSE41;
4193 for (uint32_t n = 1; n <= 4; n++) {
4194 for (uint32_t m = 1; m <= 2; m++) {
4195 GemmMicrokernelTester()
4196 .mr(2)
4197 .nr(4)
4198 .kr(2)
4199 .sr(1)
4200 .m(m)
4201 .n(n)
4202 .k(8)
4203 .iterations(1)
4204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4205 }
4206 }
4207 }
4208
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_m)4209 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
4210 TEST_REQUIRES_X86_SSE41;
4211 for (uint32_t m = 1; m <= 2; m++) {
4212 GemmMicrokernelTester()
4213 .mr(2)
4214 .nr(4)
4215 .kr(2)
4216 .sr(1)
4217 .m(m)
4218 .n(4)
4219 .k(8)
4220 .iterations(1)
4221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4222 }
4223 }
4224
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_n)4225 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
4226 TEST_REQUIRES_X86_SSE41;
4227 for (uint32_t n = 1; n <= 4; n++) {
4228 GemmMicrokernelTester()
4229 .mr(2)
4230 .nr(4)
4231 .kr(2)
4232 .sr(1)
4233 .m(2)
4234 .n(n)
4235 .k(8)
4236 .iterations(1)
4237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4238 }
4239 }
4240
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8)4241 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
4242 TEST_REQUIRES_X86_SSE41;
4243 for (size_t k = 1; k < 8; k++) {
4244 GemmMicrokernelTester()
4245 .mr(2)
4246 .nr(4)
4247 .kr(2)
4248 .sr(1)
4249 .m(2)
4250 .n(4)
4251 .k(k)
4252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4253 }
4254 }
4255
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_subtile)4256 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
4257 TEST_REQUIRES_X86_SSE41;
4258 for (size_t k = 1; k < 8; k++) {
4259 for (uint32_t n = 1; n <= 4; n++) {
4260 for (uint32_t m = 1; m <= 2; m++) {
4261 GemmMicrokernelTester()
4262 .mr(2)
4263 .nr(4)
4264 .kr(2)
4265 .sr(1)
4266 .m(m)
4267 .n(n)
4268 .k(k)
4269 .iterations(1)
4270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4271 }
4272 }
4273 }
4274 }
4275
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8)4276 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
4277 TEST_REQUIRES_X86_SSE41;
4278 for (size_t k = 9; k < 16; k++) {
4279 GemmMicrokernelTester()
4280 .mr(2)
4281 .nr(4)
4282 .kr(2)
4283 .sr(1)
4284 .m(2)
4285 .n(4)
4286 .k(k)
4287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4288 }
4289 }
4290
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_subtile)4291 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
4292 TEST_REQUIRES_X86_SSE41;
4293 for (size_t k = 9; k < 16; k++) {
4294 for (uint32_t n = 1; n <= 4; n++) {
4295 for (uint32_t m = 1; m <= 2; m++) {
4296 GemmMicrokernelTester()
4297 .mr(2)
4298 .nr(4)
4299 .kr(2)
4300 .sr(1)
4301 .m(m)
4302 .n(n)
4303 .k(k)
4304 .iterations(1)
4305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4306 }
4307 }
4308 }
4309 }
4310
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8)4311 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
4312 TEST_REQUIRES_X86_SSE41;
4313 for (size_t k = 16; k <= 80; k += 8) {
4314 GemmMicrokernelTester()
4315 .mr(2)
4316 .nr(4)
4317 .kr(2)
4318 .sr(1)
4319 .m(2)
4320 .n(4)
4321 .k(k)
4322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4323 }
4324 }
4325
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_subtile)4326 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
4327 TEST_REQUIRES_X86_SSE41;
4328 for (size_t k = 16; k <= 80; k += 8) {
4329 for (uint32_t n = 1; n <= 4; n++) {
4330 for (uint32_t m = 1; m <= 2; m++) {
4331 GemmMicrokernelTester()
4332 .mr(2)
4333 .nr(4)
4334 .kr(2)
4335 .sr(1)
4336 .m(m)
4337 .n(n)
4338 .k(k)
4339 .iterations(1)
4340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4341 }
4342 }
4343 }
4344 }
4345
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4)4346 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
4347 TEST_REQUIRES_X86_SSE41;
4348 for (uint32_t n = 5; n < 8; n++) {
4349 for (size_t k = 1; k <= 40; k += 9) {
4350 GemmMicrokernelTester()
4351 .mr(2)
4352 .nr(4)
4353 .kr(2)
4354 .sr(1)
4355 .m(2)
4356 .n(n)
4357 .k(k)
4358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4359 }
4360 }
4361 }
4362
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_cn)4363 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
4364 TEST_REQUIRES_X86_SSE41;
4365 for (uint32_t n = 5; n < 8; n++) {
4366 for (size_t k = 1; k <= 40; k += 9) {
4367 GemmMicrokernelTester()
4368 .mr(2)
4369 .nr(4)
4370 .kr(2)
4371 .sr(1)
4372 .m(2)
4373 .n(n)
4374 .k(k)
4375 .cn_stride(7)
4376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4377 }
4378 }
4379 }
4380
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_subtile)4381 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
4382 TEST_REQUIRES_X86_SSE41;
4383 for (uint32_t n = 5; n < 8; n++) {
4384 for (size_t k = 1; k <= 40; k += 9) {
4385 for (uint32_t m = 1; m <= 2; m++) {
4386 GemmMicrokernelTester()
4387 .mr(2)
4388 .nr(4)
4389 .kr(2)
4390 .sr(1)
4391 .m(m)
4392 .n(n)
4393 .k(k)
4394 .iterations(1)
4395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4396 }
4397 }
4398 }
4399 }
4400
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4)4401 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
4402 TEST_REQUIRES_X86_SSE41;
4403 for (uint32_t n = 8; n <= 12; n += 4) {
4404 for (size_t k = 1; k <= 40; k += 9) {
4405 GemmMicrokernelTester()
4406 .mr(2)
4407 .nr(4)
4408 .kr(2)
4409 .sr(1)
4410 .m(2)
4411 .n(n)
4412 .k(k)
4413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4414 }
4415 }
4416 }
4417
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_cn)4418 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
4419 TEST_REQUIRES_X86_SSE41;
4420 for (uint32_t n = 8; n <= 12; n += 4) {
4421 for (size_t k = 1; k <= 40; k += 9) {
4422 GemmMicrokernelTester()
4423 .mr(2)
4424 .nr(4)
4425 .kr(2)
4426 .sr(1)
4427 .m(2)
4428 .n(n)
4429 .k(k)
4430 .cn_stride(7)
4431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4432 }
4433 }
4434 }
4435
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_subtile)4436 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
4437 TEST_REQUIRES_X86_SSE41;
4438 for (uint32_t n = 8; n <= 12; n += 4) {
4439 for (size_t k = 1; k <= 40; k += 9) {
4440 for (uint32_t m = 1; m <= 2; m++) {
4441 GemmMicrokernelTester()
4442 .mr(2)
4443 .nr(4)
4444 .kr(2)
4445 .sr(1)
4446 .m(m)
4447 .n(n)
4448 .k(k)
4449 .iterations(1)
4450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4451 }
4452 }
4453 }
4454 }
4455
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel)4456 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
4457 TEST_REQUIRES_X86_SSE41;
4458 for (size_t k = 1; k <= 40; k += 9) {
4459 GemmMicrokernelTester()
4460 .mr(2)
4461 .nr(4)
4462 .kr(2)
4463 .sr(1)
4464 .m(2)
4465 .n(4)
4466 .k(k)
4467 .ks(3)
4468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4469 }
4470 }
4471
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel_subtile)4472 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
4473 TEST_REQUIRES_X86_SSE41;
4474 for (size_t k = 1; k <= 40; k += 9) {
4475 for (uint32_t n = 1; n <= 4; n++) {
4476 for (uint32_t m = 1; m <= 2; m++) {
4477 GemmMicrokernelTester()
4478 .mr(2)
4479 .nr(4)
4480 .kr(2)
4481 .sr(1)
4482 .m(m)
4483 .n(n)
4484 .k(k)
4485 .ks(3)
4486 .iterations(1)
4487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4488 }
4489 }
4490 }
4491 }
4492
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_small_kernel)4493 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
4494 TEST_REQUIRES_X86_SSE41;
4495 for (uint32_t n = 5; n < 8; n++) {
4496 for (size_t k = 1; k <= 40; k += 9) {
4497 GemmMicrokernelTester()
4498 .mr(2)
4499 .nr(4)
4500 .kr(2)
4501 .sr(1)
4502 .m(2)
4503 .n(n)
4504 .k(k)
4505 .ks(3)
4506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4507 }
4508 }
4509 }
4510
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_small_kernel)4511 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
4512 TEST_REQUIRES_X86_SSE41;
4513 for (uint32_t n = 8; n <= 12; n += 4) {
4514 for (size_t k = 1; k <= 40; k += 9) {
4515 GemmMicrokernelTester()
4516 .mr(2)
4517 .nr(4)
4518 .kr(2)
4519 .sr(1)
4520 .m(2)
4521 .n(n)
4522 .k(k)
4523 .ks(3)
4524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4525 }
4526 }
4527 }
4528
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm_subtile)4529 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
4530 TEST_REQUIRES_X86_SSE41;
4531 for (size_t k = 1; k <= 40; k += 9) {
4532 for (uint32_t n = 1; n <= 4; n++) {
4533 for (uint32_t m = 1; m <= 2; m++) {
4534 GemmMicrokernelTester()
4535 .mr(2)
4536 .nr(4)
4537 .kr(2)
4538 .sr(1)
4539 .m(m)
4540 .n(n)
4541 .k(k)
4542 .cm_stride(7)
4543 .iterations(1)
4544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4545 }
4546 }
4547 }
4548 }
4549
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,a_offset)4550 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
4551 TEST_REQUIRES_X86_SSE41;
4552 for (size_t k = 1; k <= 40; k += 9) {
4553 GemmMicrokernelTester()
4554 .mr(2)
4555 .nr(4)
4556 .kr(2)
4557 .sr(1)
4558 .m(2)
4559 .n(4)
4560 .k(k)
4561 .ks(3)
4562 .a_offset(83)
4563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4564 }
4565 }
4566
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,zero)4567 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
4568 TEST_REQUIRES_X86_SSE41;
4569 for (size_t k = 1; k <= 40; k += 9) {
4570 for (uint32_t mz = 0; mz < 2; mz++) {
4571 GemmMicrokernelTester()
4572 .mr(2)
4573 .nr(4)
4574 .kr(2)
4575 .sr(1)
4576 .m(2)
4577 .n(4)
4578 .k(k)
4579 .ks(3)
4580 .a_offset(83)
4581 .zero_index(mz)
4582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmin)4587 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
4588 TEST_REQUIRES_X86_SSE41;
4589 GemmMicrokernelTester()
4590 .mr(2)
4591 .nr(4)
4592 .kr(2)
4593 .sr(1)
4594 .m(2)
4595 .n(4)
4596 .k(8)
4597 .qmin(128)
4598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4599 }
4600
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmax)4601 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
4602 TEST_REQUIRES_X86_SSE41;
4603 GemmMicrokernelTester()
4604 .mr(2)
4605 .nr(4)
4606 .kr(2)
4607 .sr(1)
4608 .m(2)
4609 .n(4)
4610 .k(8)
4611 .qmax(128)
4612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4613 }
4614
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm)4615 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
4616 TEST_REQUIRES_X86_SSE41;
4617 GemmMicrokernelTester()
4618 .mr(2)
4619 .nr(4)
4620 .kr(2)
4621 .sr(1)
4622 .m(2)
4623 .n(4)
4624 .k(8)
4625 .cm_stride(7)
4626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4627 }
4628
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_a_zero_point)4629 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_a_zero_point) {
4630 TEST_REQUIRES_X86_SSE41;
4631 for (size_t k = 1; k <= 40; k += 9) {
4632 GemmMicrokernelTester()
4633 .mr(2)
4634 .nr(4)
4635 .kr(2)
4636 .sr(1)
4637 .m(2)
4638 .n(4)
4639 .k(k)
4640 .a_zero_point(0)
4641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4642 }
4643 }
4644
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_b_zero_point)4645 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_b_zero_point) {
4646 TEST_REQUIRES_X86_SSE41;
4647 for (size_t k = 1; k <= 40; k += 9) {
4648 GemmMicrokernelTester()
4649 .mr(2)
4650 .nr(4)
4651 .kr(2)
4652 .sr(1)
4653 .m(2)
4654 .n(4)
4655 .k(k)
4656 .b_zero_point(0)
4657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4658 }
4659 }
4660
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_zero_point)4661 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_zero_point) {
4662 TEST_REQUIRES_X86_SSE41;
4663 for (size_t k = 1; k <= 40; k += 9) {
4664 GemmMicrokernelTester()
4665 .mr(2)
4666 .nr(4)
4667 .kr(2)
4668 .sr(1)
4669 .m(2)
4670 .n(4)
4671 .k(k)
4672 .a_zero_point(0)
4673 .b_zero_point(0)
4674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4675 }
4676 }
4677 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4678
4679
4680 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)4681 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
4682 TEST_REQUIRES_X86_SSE2;
4683 GemmMicrokernelTester()
4684 .mr(3)
4685 .nr(4)
4686 .kr(2)
4687 .sr(1)
4688 .m(3)
4689 .n(4)
4690 .k(8)
4691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4692 }
4693
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)4694 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
4695 TEST_REQUIRES_X86_SSE2;
4696 GemmMicrokernelTester()
4697 .mr(3)
4698 .nr(4)
4699 .kr(2)
4700 .sr(1)
4701 .m(3)
4702 .n(4)
4703 .k(8)
4704 .cn_stride(7)
4705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4706 }
4707
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)4708 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
4709 TEST_REQUIRES_X86_SSE2;
4710 for (uint32_t n = 1; n <= 4; n++) {
4711 for (uint32_t m = 1; m <= 3; m++) {
4712 GemmMicrokernelTester()
4713 .mr(3)
4714 .nr(4)
4715 .kr(2)
4716 .sr(1)
4717 .m(m)
4718 .n(n)
4719 .k(8)
4720 .iterations(1)
4721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4722 }
4723 }
4724 }
4725
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)4726 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4727 TEST_REQUIRES_X86_SSE2;
4728 for (uint32_t m = 1; m <= 3; m++) {
4729 GemmMicrokernelTester()
4730 .mr(3)
4731 .nr(4)
4732 .kr(2)
4733 .sr(1)
4734 .m(m)
4735 .n(4)
4736 .k(8)
4737 .iterations(1)
4738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4739 }
4740 }
4741
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)4742 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4743 TEST_REQUIRES_X86_SSE2;
4744 for (uint32_t n = 1; n <= 4; n++) {
4745 GemmMicrokernelTester()
4746 .mr(3)
4747 .nr(4)
4748 .kr(2)
4749 .sr(1)
4750 .m(3)
4751 .n(n)
4752 .k(8)
4753 .iterations(1)
4754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4755 }
4756 }
4757
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)4758 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
4759 TEST_REQUIRES_X86_SSE2;
4760 for (size_t k = 1; k < 8; k++) {
4761 GemmMicrokernelTester()
4762 .mr(3)
4763 .nr(4)
4764 .kr(2)
4765 .sr(1)
4766 .m(3)
4767 .n(4)
4768 .k(k)
4769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4770 }
4771 }
4772
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)4773 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
4774 TEST_REQUIRES_X86_SSE2;
4775 for (size_t k = 1; k < 8; k++) {
4776 for (uint32_t n = 1; n <= 4; n++) {
4777 for (uint32_t m = 1; m <= 3; m++) {
4778 GemmMicrokernelTester()
4779 .mr(3)
4780 .nr(4)
4781 .kr(2)
4782 .sr(1)
4783 .m(m)
4784 .n(n)
4785 .k(k)
4786 .iterations(1)
4787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4788 }
4789 }
4790 }
4791 }
4792
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)4793 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
4794 TEST_REQUIRES_X86_SSE2;
4795 for (size_t k = 9; k < 16; k++) {
4796 GemmMicrokernelTester()
4797 .mr(3)
4798 .nr(4)
4799 .kr(2)
4800 .sr(1)
4801 .m(3)
4802 .n(4)
4803 .k(k)
4804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4805 }
4806 }
4807
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)4808 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
4809 TEST_REQUIRES_X86_SSE2;
4810 for (size_t k = 9; k < 16; k++) {
4811 for (uint32_t n = 1; n <= 4; n++) {
4812 for (uint32_t m = 1; m <= 3; m++) {
4813 GemmMicrokernelTester()
4814 .mr(3)
4815 .nr(4)
4816 .kr(2)
4817 .sr(1)
4818 .m(m)
4819 .n(n)
4820 .k(k)
4821 .iterations(1)
4822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4823 }
4824 }
4825 }
4826 }
4827
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)4828 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
4829 TEST_REQUIRES_X86_SSE2;
4830 for (size_t k = 16; k <= 80; k += 8) {
4831 GemmMicrokernelTester()
4832 .mr(3)
4833 .nr(4)
4834 .kr(2)
4835 .sr(1)
4836 .m(3)
4837 .n(4)
4838 .k(k)
4839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4840 }
4841 }
4842
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)4843 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
4844 TEST_REQUIRES_X86_SSE2;
4845 for (size_t k = 16; k <= 80; k += 8) {
4846 for (uint32_t n = 1; n <= 4; n++) {
4847 for (uint32_t m = 1; m <= 3; m++) {
4848 GemmMicrokernelTester()
4849 .mr(3)
4850 .nr(4)
4851 .kr(2)
4852 .sr(1)
4853 .m(m)
4854 .n(n)
4855 .k(k)
4856 .iterations(1)
4857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4858 }
4859 }
4860 }
4861 }
4862
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)4863 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
4864 TEST_REQUIRES_X86_SSE2;
4865 for (uint32_t n = 5; n < 8; n++) {
4866 for (size_t k = 1; k <= 40; k += 9) {
4867 GemmMicrokernelTester()
4868 .mr(3)
4869 .nr(4)
4870 .kr(2)
4871 .sr(1)
4872 .m(3)
4873 .n(n)
4874 .k(k)
4875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4876 }
4877 }
4878 }
4879
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)4880 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4881 TEST_REQUIRES_X86_SSE2;
4882 for (uint32_t n = 5; n < 8; n++) {
4883 for (size_t k = 1; k <= 40; k += 9) {
4884 GemmMicrokernelTester()
4885 .mr(3)
4886 .nr(4)
4887 .kr(2)
4888 .sr(1)
4889 .m(3)
4890 .n(n)
4891 .k(k)
4892 .cn_stride(7)
4893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4894 }
4895 }
4896 }
4897
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)4898 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
4899 TEST_REQUIRES_X86_SSE2;
4900 for (uint32_t n = 5; n < 8; n++) {
4901 for (size_t k = 1; k <= 40; k += 9) {
4902 for (uint32_t m = 1; m <= 3; m++) {
4903 GemmMicrokernelTester()
4904 .mr(3)
4905 .nr(4)
4906 .kr(2)
4907 .sr(1)
4908 .m(m)
4909 .n(n)
4910 .k(k)
4911 .iterations(1)
4912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4913 }
4914 }
4915 }
4916 }
4917
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)4918 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
4919 TEST_REQUIRES_X86_SSE2;
4920 for (uint32_t n = 8; n <= 12; n += 4) {
4921 for (size_t k = 1; k <= 40; k += 9) {
4922 GemmMicrokernelTester()
4923 .mr(3)
4924 .nr(4)
4925 .kr(2)
4926 .sr(1)
4927 .m(3)
4928 .n(n)
4929 .k(k)
4930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4931 }
4932 }
4933 }
4934
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)4935 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
4936 TEST_REQUIRES_X86_SSE2;
4937 for (uint32_t n = 8; n <= 12; n += 4) {
4938 for (size_t k = 1; k <= 40; k += 9) {
4939 GemmMicrokernelTester()
4940 .mr(3)
4941 .nr(4)
4942 .kr(2)
4943 .sr(1)
4944 .m(3)
4945 .n(n)
4946 .k(k)
4947 .cn_stride(7)
4948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4949 }
4950 }
4951 }
4952
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)4953 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
4954 TEST_REQUIRES_X86_SSE2;
4955 for (uint32_t n = 8; n <= 12; n += 4) {
4956 for (size_t k = 1; k <= 40; k += 9) {
4957 for (uint32_t m = 1; m <= 3; m++) {
4958 GemmMicrokernelTester()
4959 .mr(3)
4960 .nr(4)
4961 .kr(2)
4962 .sr(1)
4963 .m(m)
4964 .n(n)
4965 .k(k)
4966 .iterations(1)
4967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4968 }
4969 }
4970 }
4971 }
4972
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)4973 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
4974 TEST_REQUIRES_X86_SSE2;
4975 for (size_t k = 1; k <= 40; k += 9) {
4976 GemmMicrokernelTester()
4977 .mr(3)
4978 .nr(4)
4979 .kr(2)
4980 .sr(1)
4981 .m(3)
4982 .n(4)
4983 .k(k)
4984 .ks(3)
4985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4986 }
4987 }
4988
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)4989 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
4990 TEST_REQUIRES_X86_SSE2;
4991 for (size_t k = 1; k <= 40; k += 9) {
4992 for (uint32_t n = 1; n <= 4; n++) {
4993 for (uint32_t m = 1; m <= 3; m++) {
4994 GemmMicrokernelTester()
4995 .mr(3)
4996 .nr(4)
4997 .kr(2)
4998 .sr(1)
4999 .m(m)
5000 .n(n)
5001 .k(k)
5002 .ks(3)
5003 .iterations(1)
5004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5005 }
5006 }
5007 }
5008 }
5009
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)5010 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
5011 TEST_REQUIRES_X86_SSE2;
5012 for (uint32_t n = 5; n < 8; n++) {
5013 for (size_t k = 1; k <= 40; k += 9) {
5014 GemmMicrokernelTester()
5015 .mr(3)
5016 .nr(4)
5017 .kr(2)
5018 .sr(1)
5019 .m(3)
5020 .n(n)
5021 .k(k)
5022 .ks(3)
5023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5024 }
5025 }
5026 }
5027
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)5028 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
5029 TEST_REQUIRES_X86_SSE2;
5030 for (uint32_t n = 8; n <= 12; n += 4) {
5031 for (size_t k = 1; k <= 40; k += 9) {
5032 GemmMicrokernelTester()
5033 .mr(3)
5034 .nr(4)
5035 .kr(2)
5036 .sr(1)
5037 .m(3)
5038 .n(n)
5039 .k(k)
5040 .ks(3)
5041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5042 }
5043 }
5044 }
5045
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)5046 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
5047 TEST_REQUIRES_X86_SSE2;
5048 for (size_t k = 1; k <= 40; k += 9) {
5049 for (uint32_t n = 1; n <= 4; n++) {
5050 for (uint32_t m = 1; m <= 3; m++) {
5051 GemmMicrokernelTester()
5052 .mr(3)
5053 .nr(4)
5054 .kr(2)
5055 .sr(1)
5056 .m(m)
5057 .n(n)
5058 .k(k)
5059 .cm_stride(7)
5060 .iterations(1)
5061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5062 }
5063 }
5064 }
5065 }
5066
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)5067 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
5068 TEST_REQUIRES_X86_SSE2;
5069 for (size_t k = 1; k <= 40; k += 9) {
5070 GemmMicrokernelTester()
5071 .mr(3)
5072 .nr(4)
5073 .kr(2)
5074 .sr(1)
5075 .m(3)
5076 .n(4)
5077 .k(k)
5078 .ks(3)
5079 .a_offset(127)
5080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5081 }
5082 }
5083
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)5084 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
5085 TEST_REQUIRES_X86_SSE2;
5086 for (size_t k = 1; k <= 40; k += 9) {
5087 for (uint32_t mz = 0; mz < 3; mz++) {
5088 GemmMicrokernelTester()
5089 .mr(3)
5090 .nr(4)
5091 .kr(2)
5092 .sr(1)
5093 .m(3)
5094 .n(4)
5095 .k(k)
5096 .ks(3)
5097 .a_offset(127)
5098 .zero_index(mz)
5099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5100 }
5101 }
5102 }
5103
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)5104 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
5105 TEST_REQUIRES_X86_SSE2;
5106 GemmMicrokernelTester()
5107 .mr(3)
5108 .nr(4)
5109 .kr(2)
5110 .sr(1)
5111 .m(3)
5112 .n(4)
5113 .k(8)
5114 .qmin(128)
5115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5116 }
5117
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)5118 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
5119 TEST_REQUIRES_X86_SSE2;
5120 GemmMicrokernelTester()
5121 .mr(3)
5122 .nr(4)
5123 .kr(2)
5124 .sr(1)
5125 .m(3)
5126 .n(4)
5127 .k(8)
5128 .qmax(128)
5129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5130 }
5131
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)5132 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
5133 TEST_REQUIRES_X86_SSE2;
5134 GemmMicrokernelTester()
5135 .mr(3)
5136 .nr(4)
5137 .kr(2)
5138 .sr(1)
5139 .m(3)
5140 .n(4)
5141 .k(8)
5142 .cm_stride(7)
5143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5144 }
5145
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_a_zero_point)5146 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
5147 TEST_REQUIRES_X86_SSE2;
5148 for (size_t k = 1; k <= 40; k += 9) {
5149 GemmMicrokernelTester()
5150 .mr(3)
5151 .nr(4)
5152 .kr(2)
5153 .sr(1)
5154 .m(3)
5155 .n(4)
5156 .k(k)
5157 .a_zero_point(0)
5158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5159 }
5160 }
5161
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_b_zero_point)5162 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
5163 TEST_REQUIRES_X86_SSE2;
5164 for (size_t k = 1; k <= 40; k += 9) {
5165 GemmMicrokernelTester()
5166 .mr(3)
5167 .nr(4)
5168 .kr(2)
5169 .sr(1)
5170 .m(3)
5171 .n(4)
5172 .k(k)
5173 .b_zero_point(0)
5174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5175 }
5176 }
5177
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_zero_point)5178 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
5179 TEST_REQUIRES_X86_SSE2;
5180 for (size_t k = 1; k <= 40; k += 9) {
5181 GemmMicrokernelTester()
5182 .mr(3)
5183 .nr(4)
5184 .kr(2)
5185 .sr(1)
5186 .m(3)
5187 .n(4)
5188 .k(k)
5189 .a_zero_point(0)
5190 .b_zero_point(0)
5191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5192 }
5193 }
5194 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5195
5196
5197 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)5198 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
5199 TEST_REQUIRES_X86_SSE41;
5200 GemmMicrokernelTester()
5201 .mr(4)
5202 .nr(4)
5203 .kr(2)
5204 .sr(1)
5205 .m(4)
5206 .n(4)
5207 .k(8)
5208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5209 }
5210
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)5211 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
5212 TEST_REQUIRES_X86_SSE41;
5213 GemmMicrokernelTester()
5214 .mr(4)
5215 .nr(4)
5216 .kr(2)
5217 .sr(1)
5218 .m(4)
5219 .n(4)
5220 .k(8)
5221 .cn_stride(7)
5222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5223 }
5224
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)5225 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
5226 TEST_REQUIRES_X86_SSE41;
5227 for (uint32_t n = 1; n <= 4; n++) {
5228 for (uint32_t m = 1; m <= 4; m++) {
5229 GemmMicrokernelTester()
5230 .mr(4)
5231 .nr(4)
5232 .kr(2)
5233 .sr(1)
5234 .m(m)
5235 .n(n)
5236 .k(8)
5237 .iterations(1)
5238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5239 }
5240 }
5241 }
5242
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)5243 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5244 TEST_REQUIRES_X86_SSE41;
5245 for (uint32_t m = 1; m <= 4; m++) {
5246 GemmMicrokernelTester()
5247 .mr(4)
5248 .nr(4)
5249 .kr(2)
5250 .sr(1)
5251 .m(m)
5252 .n(4)
5253 .k(8)
5254 .iterations(1)
5255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5256 }
5257 }
5258
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)5259 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5260 TEST_REQUIRES_X86_SSE41;
5261 for (uint32_t n = 1; n <= 4; n++) {
5262 GemmMicrokernelTester()
5263 .mr(4)
5264 .nr(4)
5265 .kr(2)
5266 .sr(1)
5267 .m(4)
5268 .n(n)
5269 .k(8)
5270 .iterations(1)
5271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5272 }
5273 }
5274
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)5275 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
5276 TEST_REQUIRES_X86_SSE41;
5277 for (size_t k = 1; k < 8; k++) {
5278 GemmMicrokernelTester()
5279 .mr(4)
5280 .nr(4)
5281 .kr(2)
5282 .sr(1)
5283 .m(4)
5284 .n(4)
5285 .k(k)
5286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5287 }
5288 }
5289
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)5290 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
5291 TEST_REQUIRES_X86_SSE41;
5292 for (size_t k = 1; k < 8; k++) {
5293 for (uint32_t n = 1; n <= 4; n++) {
5294 for (uint32_t m = 1; m <= 4; m++) {
5295 GemmMicrokernelTester()
5296 .mr(4)
5297 .nr(4)
5298 .kr(2)
5299 .sr(1)
5300 .m(m)
5301 .n(n)
5302 .k(k)
5303 .iterations(1)
5304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5305 }
5306 }
5307 }
5308 }
5309
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)5310 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
5311 TEST_REQUIRES_X86_SSE41;
5312 for (size_t k = 9; k < 16; k++) {
5313 GemmMicrokernelTester()
5314 .mr(4)
5315 .nr(4)
5316 .kr(2)
5317 .sr(1)
5318 .m(4)
5319 .n(4)
5320 .k(k)
5321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5322 }
5323 }
5324
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)5325 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
5326 TEST_REQUIRES_X86_SSE41;
5327 for (size_t k = 9; k < 16; k++) {
5328 for (uint32_t n = 1; n <= 4; n++) {
5329 for (uint32_t m = 1; m <= 4; m++) {
5330 GemmMicrokernelTester()
5331 .mr(4)
5332 .nr(4)
5333 .kr(2)
5334 .sr(1)
5335 .m(m)
5336 .n(n)
5337 .k(k)
5338 .iterations(1)
5339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5340 }
5341 }
5342 }
5343 }
5344
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)5345 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
5346 TEST_REQUIRES_X86_SSE41;
5347 for (size_t k = 16; k <= 80; k += 8) {
5348 GemmMicrokernelTester()
5349 .mr(4)
5350 .nr(4)
5351 .kr(2)
5352 .sr(1)
5353 .m(4)
5354 .n(4)
5355 .k(k)
5356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5357 }
5358 }
5359
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)5360 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
5361 TEST_REQUIRES_X86_SSE41;
5362 for (size_t k = 16; k <= 80; k += 8) {
5363 for (uint32_t n = 1; n <= 4; n++) {
5364 for (uint32_t m = 1; m <= 4; m++) {
5365 GemmMicrokernelTester()
5366 .mr(4)
5367 .nr(4)
5368 .kr(2)
5369 .sr(1)
5370 .m(m)
5371 .n(n)
5372 .k(k)
5373 .iterations(1)
5374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5375 }
5376 }
5377 }
5378 }
5379
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)5380 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
5381 TEST_REQUIRES_X86_SSE41;
5382 for (uint32_t n = 5; n < 8; n++) {
5383 for (size_t k = 1; k <= 40; k += 9) {
5384 GemmMicrokernelTester()
5385 .mr(4)
5386 .nr(4)
5387 .kr(2)
5388 .sr(1)
5389 .m(4)
5390 .n(n)
5391 .k(k)
5392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5393 }
5394 }
5395 }
5396
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)5397 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5398 TEST_REQUIRES_X86_SSE41;
5399 for (uint32_t n = 5; n < 8; n++) {
5400 for (size_t k = 1; k <= 40; k += 9) {
5401 GemmMicrokernelTester()
5402 .mr(4)
5403 .nr(4)
5404 .kr(2)
5405 .sr(1)
5406 .m(4)
5407 .n(n)
5408 .k(k)
5409 .cn_stride(7)
5410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5411 }
5412 }
5413 }
5414
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)5415 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
5416 TEST_REQUIRES_X86_SSE41;
5417 for (uint32_t n = 5; n < 8; n++) {
5418 for (size_t k = 1; k <= 40; k += 9) {
5419 for (uint32_t m = 1; m <= 4; m++) {
5420 GemmMicrokernelTester()
5421 .mr(4)
5422 .nr(4)
5423 .kr(2)
5424 .sr(1)
5425 .m(m)
5426 .n(n)
5427 .k(k)
5428 .iterations(1)
5429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5430 }
5431 }
5432 }
5433 }
5434
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)5435 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
5436 TEST_REQUIRES_X86_SSE41;
5437 for (uint32_t n = 8; n <= 12; n += 4) {
5438 for (size_t k = 1; k <= 40; k += 9) {
5439 GemmMicrokernelTester()
5440 .mr(4)
5441 .nr(4)
5442 .kr(2)
5443 .sr(1)
5444 .m(4)
5445 .n(n)
5446 .k(k)
5447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5448 }
5449 }
5450 }
5451
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)5452 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
5453 TEST_REQUIRES_X86_SSE41;
5454 for (uint32_t n = 8; n <= 12; n += 4) {
5455 for (size_t k = 1; k <= 40; k += 9) {
5456 GemmMicrokernelTester()
5457 .mr(4)
5458 .nr(4)
5459 .kr(2)
5460 .sr(1)
5461 .m(4)
5462 .n(n)
5463 .k(k)
5464 .cn_stride(7)
5465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5466 }
5467 }
5468 }
5469
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)5470 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
5471 TEST_REQUIRES_X86_SSE41;
5472 for (uint32_t n = 8; n <= 12; n += 4) {
5473 for (size_t k = 1; k <= 40; k += 9) {
5474 for (uint32_t m = 1; m <= 4; m++) {
5475 GemmMicrokernelTester()
5476 .mr(4)
5477 .nr(4)
5478 .kr(2)
5479 .sr(1)
5480 .m(m)
5481 .n(n)
5482 .k(k)
5483 .iterations(1)
5484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5485 }
5486 }
5487 }
5488 }
5489
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel)5490 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
5491 TEST_REQUIRES_X86_SSE41;
5492 for (size_t k = 1; k <= 40; k += 9) {
5493 GemmMicrokernelTester()
5494 .mr(4)
5495 .nr(4)
5496 .kr(2)
5497 .sr(1)
5498 .m(4)
5499 .n(4)
5500 .k(k)
5501 .ks(3)
5502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5503 }
5504 }
5505
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel_subtile)5506 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
5507 TEST_REQUIRES_X86_SSE41;
5508 for (size_t k = 1; k <= 40; k += 9) {
5509 for (uint32_t n = 1; n <= 4; n++) {
5510 for (uint32_t m = 1; m <= 4; m++) {
5511 GemmMicrokernelTester()
5512 .mr(4)
5513 .nr(4)
5514 .kr(2)
5515 .sr(1)
5516 .m(m)
5517 .n(n)
5518 .k(k)
5519 .ks(3)
5520 .iterations(1)
5521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5522 }
5523 }
5524 }
5525 }
5526
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_small_kernel)5527 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5528 TEST_REQUIRES_X86_SSE41;
5529 for (uint32_t n = 5; n < 8; n++) {
5530 for (size_t k = 1; k <= 40; k += 9) {
5531 GemmMicrokernelTester()
5532 .mr(4)
5533 .nr(4)
5534 .kr(2)
5535 .sr(1)
5536 .m(4)
5537 .n(n)
5538 .k(k)
5539 .ks(3)
5540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5541 }
5542 }
5543 }
5544
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_small_kernel)5545 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
5546 TEST_REQUIRES_X86_SSE41;
5547 for (uint32_t n = 8; n <= 12; n += 4) {
5548 for (size_t k = 1; k <= 40; k += 9) {
5549 GemmMicrokernelTester()
5550 .mr(4)
5551 .nr(4)
5552 .kr(2)
5553 .sr(1)
5554 .m(4)
5555 .n(n)
5556 .k(k)
5557 .ks(3)
5558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5559 }
5560 }
5561 }
5562
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)5563 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
5564 TEST_REQUIRES_X86_SSE41;
5565 for (size_t k = 1; k <= 40; k += 9) {
5566 for (uint32_t n = 1; n <= 4; n++) {
5567 for (uint32_t m = 1; m <= 4; m++) {
5568 GemmMicrokernelTester()
5569 .mr(4)
5570 .nr(4)
5571 .kr(2)
5572 .sr(1)
5573 .m(m)
5574 .n(n)
5575 .k(k)
5576 .cm_stride(7)
5577 .iterations(1)
5578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5579 }
5580 }
5581 }
5582 }
5583
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,a_offset)5584 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
5585 TEST_REQUIRES_X86_SSE41;
5586 for (size_t k = 1; k <= 40; k += 9) {
5587 GemmMicrokernelTester()
5588 .mr(4)
5589 .nr(4)
5590 .kr(2)
5591 .sr(1)
5592 .m(4)
5593 .n(4)
5594 .k(k)
5595 .ks(3)
5596 .a_offset(163)
5597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5598 }
5599 }
5600
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,zero)5601 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
5602 TEST_REQUIRES_X86_SSE41;
5603 for (size_t k = 1; k <= 40; k += 9) {
5604 for (uint32_t mz = 0; mz < 4; mz++) {
5605 GemmMicrokernelTester()
5606 .mr(4)
5607 .nr(4)
5608 .kr(2)
5609 .sr(1)
5610 .m(4)
5611 .n(4)
5612 .k(k)
5613 .ks(3)
5614 .a_offset(163)
5615 .zero_index(mz)
5616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5617 }
5618 }
5619 }
5620
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)5621 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
5622 TEST_REQUIRES_X86_SSE41;
5623 GemmMicrokernelTester()
5624 .mr(4)
5625 .nr(4)
5626 .kr(2)
5627 .sr(1)
5628 .m(4)
5629 .n(4)
5630 .k(8)
5631 .qmin(128)
5632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5633 }
5634
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)5635 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
5636 TEST_REQUIRES_X86_SSE41;
5637 GemmMicrokernelTester()
5638 .mr(4)
5639 .nr(4)
5640 .kr(2)
5641 .sr(1)
5642 .m(4)
5643 .n(4)
5644 .k(8)
5645 .qmax(128)
5646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5647 }
5648
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)5649 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
5650 TEST_REQUIRES_X86_SSE41;
5651 GemmMicrokernelTester()
5652 .mr(4)
5653 .nr(4)
5654 .kr(2)
5655 .sr(1)
5656 .m(4)
5657 .n(4)
5658 .k(8)
5659 .cm_stride(7)
5660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5661 }
5662
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_a_zero_point)5663 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_a_zero_point) {
5664 TEST_REQUIRES_X86_SSE41;
5665 for (size_t k = 1; k <= 40; k += 9) {
5666 GemmMicrokernelTester()
5667 .mr(4)
5668 .nr(4)
5669 .kr(2)
5670 .sr(1)
5671 .m(4)
5672 .n(4)
5673 .k(k)
5674 .a_zero_point(0)
5675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5676 }
5677 }
5678
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_b_zero_point)5679 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_b_zero_point) {
5680 TEST_REQUIRES_X86_SSE41;
5681 for (size_t k = 1; k <= 40; k += 9) {
5682 GemmMicrokernelTester()
5683 .mr(4)
5684 .nr(4)
5685 .kr(2)
5686 .sr(1)
5687 .m(4)
5688 .n(4)
5689 .k(k)
5690 .b_zero_point(0)
5691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5692 }
5693 }
5694
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_zero_point)5695 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_zero_point) {
5696 TEST_REQUIRES_X86_SSE41;
5697 for (size_t k = 1; k <= 40; k += 9) {
5698 GemmMicrokernelTester()
5699 .mr(4)
5700 .nr(4)
5701 .kr(2)
5702 .sr(1)
5703 .m(4)
5704 .n(4)
5705 .k(k)
5706 .a_zero_point(0)
5707 .b_zero_point(0)
5708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5709 }
5710 }
5711 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5712
5713
5714 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)5715 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
5716 TEST_REQUIRES_X86_AVX;
5717 GemmMicrokernelTester()
5718 .mr(2)
5719 .nr(4)
5720 .kr(2)
5721 .sr(1)
5722 .m(2)
5723 .n(4)
5724 .k(8)
5725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5726 }
5727
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)5728 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
5729 TEST_REQUIRES_X86_AVX;
5730 GemmMicrokernelTester()
5731 .mr(2)
5732 .nr(4)
5733 .kr(2)
5734 .sr(1)
5735 .m(2)
5736 .n(4)
5737 .k(8)
5738 .cn_stride(7)
5739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5740 }
5741
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)5742 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
5743 TEST_REQUIRES_X86_AVX;
5744 for (uint32_t n = 1; n <= 4; n++) {
5745 for (uint32_t m = 1; m <= 2; m++) {
5746 GemmMicrokernelTester()
5747 .mr(2)
5748 .nr(4)
5749 .kr(2)
5750 .sr(1)
5751 .m(m)
5752 .n(n)
5753 .k(8)
5754 .iterations(1)
5755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5756 }
5757 }
5758 }
5759
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)5760 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
5761 TEST_REQUIRES_X86_AVX;
5762 for (uint32_t m = 1; m <= 2; m++) {
5763 GemmMicrokernelTester()
5764 .mr(2)
5765 .nr(4)
5766 .kr(2)
5767 .sr(1)
5768 .m(m)
5769 .n(4)
5770 .k(8)
5771 .iterations(1)
5772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5773 }
5774 }
5775
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)5776 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
5777 TEST_REQUIRES_X86_AVX;
5778 for (uint32_t n = 1; n <= 4; n++) {
5779 GemmMicrokernelTester()
5780 .mr(2)
5781 .nr(4)
5782 .kr(2)
5783 .sr(1)
5784 .m(2)
5785 .n(n)
5786 .k(8)
5787 .iterations(1)
5788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5789 }
5790 }
5791
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)5792 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
5793 TEST_REQUIRES_X86_AVX;
5794 for (size_t k = 1; k < 8; k++) {
5795 GemmMicrokernelTester()
5796 .mr(2)
5797 .nr(4)
5798 .kr(2)
5799 .sr(1)
5800 .m(2)
5801 .n(4)
5802 .k(k)
5803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5804 }
5805 }
5806
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)5807 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
5808 TEST_REQUIRES_X86_AVX;
5809 for (size_t k = 1; k < 8; k++) {
5810 for (uint32_t n = 1; n <= 4; n++) {
5811 for (uint32_t m = 1; m <= 2; m++) {
5812 GemmMicrokernelTester()
5813 .mr(2)
5814 .nr(4)
5815 .kr(2)
5816 .sr(1)
5817 .m(m)
5818 .n(n)
5819 .k(k)
5820 .iterations(1)
5821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5822 }
5823 }
5824 }
5825 }
5826
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)5827 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
5828 TEST_REQUIRES_X86_AVX;
5829 for (size_t k = 9; k < 16; k++) {
5830 GemmMicrokernelTester()
5831 .mr(2)
5832 .nr(4)
5833 .kr(2)
5834 .sr(1)
5835 .m(2)
5836 .n(4)
5837 .k(k)
5838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5839 }
5840 }
5841
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)5842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
5843 TEST_REQUIRES_X86_AVX;
5844 for (size_t k = 9; k < 16; k++) {
5845 for (uint32_t n = 1; n <= 4; n++) {
5846 for (uint32_t m = 1; m <= 2; m++) {
5847 GemmMicrokernelTester()
5848 .mr(2)
5849 .nr(4)
5850 .kr(2)
5851 .sr(1)
5852 .m(m)
5853 .n(n)
5854 .k(k)
5855 .iterations(1)
5856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5857 }
5858 }
5859 }
5860 }
5861
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)5862 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
5863 TEST_REQUIRES_X86_AVX;
5864 for (size_t k = 16; k <= 80; k += 8) {
5865 GemmMicrokernelTester()
5866 .mr(2)
5867 .nr(4)
5868 .kr(2)
5869 .sr(1)
5870 .m(2)
5871 .n(4)
5872 .k(k)
5873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5874 }
5875 }
5876
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)5877 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
5878 TEST_REQUIRES_X86_AVX;
5879 for (size_t k = 16; k <= 80; k += 8) {
5880 for (uint32_t n = 1; n <= 4; n++) {
5881 for (uint32_t m = 1; m <= 2; m++) {
5882 GemmMicrokernelTester()
5883 .mr(2)
5884 .nr(4)
5885 .kr(2)
5886 .sr(1)
5887 .m(m)
5888 .n(n)
5889 .k(k)
5890 .iterations(1)
5891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5892 }
5893 }
5894 }
5895 }
5896
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)5897 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
5898 TEST_REQUIRES_X86_AVX;
5899 for (uint32_t n = 5; n < 8; n++) {
5900 for (size_t k = 1; k <= 40; k += 9) {
5901 GemmMicrokernelTester()
5902 .mr(2)
5903 .nr(4)
5904 .kr(2)
5905 .sr(1)
5906 .m(2)
5907 .n(n)
5908 .k(k)
5909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5910 }
5911 }
5912 }
5913
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)5914 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
5915 TEST_REQUIRES_X86_AVX;
5916 for (uint32_t n = 5; n < 8; n++) {
5917 for (size_t k = 1; k <= 40; k += 9) {
5918 GemmMicrokernelTester()
5919 .mr(2)
5920 .nr(4)
5921 .kr(2)
5922 .sr(1)
5923 .m(2)
5924 .n(n)
5925 .k(k)
5926 .cn_stride(7)
5927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5928 }
5929 }
5930 }
5931
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)5932 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
5933 TEST_REQUIRES_X86_AVX;
5934 for (uint32_t n = 5; n < 8; n++) {
5935 for (size_t k = 1; k <= 40; k += 9) {
5936 for (uint32_t m = 1; m <= 2; m++) {
5937 GemmMicrokernelTester()
5938 .mr(2)
5939 .nr(4)
5940 .kr(2)
5941 .sr(1)
5942 .m(m)
5943 .n(n)
5944 .k(k)
5945 .iterations(1)
5946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5947 }
5948 }
5949 }
5950 }
5951
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)5952 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
5953 TEST_REQUIRES_X86_AVX;
5954 for (uint32_t n = 8; n <= 12; n += 4) {
5955 for (size_t k = 1; k <= 40; k += 9) {
5956 GemmMicrokernelTester()
5957 .mr(2)
5958 .nr(4)
5959 .kr(2)
5960 .sr(1)
5961 .m(2)
5962 .n(n)
5963 .k(k)
5964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5965 }
5966 }
5967 }
5968
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)5969 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
5970 TEST_REQUIRES_X86_AVX;
5971 for (uint32_t n = 8; n <= 12; n += 4) {
5972 for (size_t k = 1; k <= 40; k += 9) {
5973 GemmMicrokernelTester()
5974 .mr(2)
5975 .nr(4)
5976 .kr(2)
5977 .sr(1)
5978 .m(2)
5979 .n(n)
5980 .k(k)
5981 .cn_stride(7)
5982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5983 }
5984 }
5985 }
5986
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)5987 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
5988 TEST_REQUIRES_X86_AVX;
5989 for (uint32_t n = 8; n <= 12; n += 4) {
5990 for (size_t k = 1; k <= 40; k += 9) {
5991 for (uint32_t m = 1; m <= 2; m++) {
5992 GemmMicrokernelTester()
5993 .mr(2)
5994 .nr(4)
5995 .kr(2)
5996 .sr(1)
5997 .m(m)
5998 .n(n)
5999 .k(k)
6000 .iterations(1)
6001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6002 }
6003 }
6004 }
6005 }
6006
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel)6007 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
6008 TEST_REQUIRES_X86_AVX;
6009 for (size_t k = 1; k <= 40; k += 9) {
6010 GemmMicrokernelTester()
6011 .mr(2)
6012 .nr(4)
6013 .kr(2)
6014 .sr(1)
6015 .m(2)
6016 .n(4)
6017 .k(k)
6018 .ks(3)
6019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6020 }
6021 }
6022
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel_subtile)6023 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
6024 TEST_REQUIRES_X86_AVX;
6025 for (size_t k = 1; k <= 40; k += 9) {
6026 for (uint32_t n = 1; n <= 4; n++) {
6027 for (uint32_t m = 1; m <= 2; m++) {
6028 GemmMicrokernelTester()
6029 .mr(2)
6030 .nr(4)
6031 .kr(2)
6032 .sr(1)
6033 .m(m)
6034 .n(n)
6035 .k(k)
6036 .ks(3)
6037 .iterations(1)
6038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6039 }
6040 }
6041 }
6042 }
6043
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_small_kernel)6044 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
6045 TEST_REQUIRES_X86_AVX;
6046 for (uint32_t n = 5; n < 8; n++) {
6047 for (size_t k = 1; k <= 40; k += 9) {
6048 GemmMicrokernelTester()
6049 .mr(2)
6050 .nr(4)
6051 .kr(2)
6052 .sr(1)
6053 .m(2)
6054 .n(n)
6055 .k(k)
6056 .ks(3)
6057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6058 }
6059 }
6060 }
6061
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_small_kernel)6062 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
6063 TEST_REQUIRES_X86_AVX;
6064 for (uint32_t n = 8; n <= 12; n += 4) {
6065 for (size_t k = 1; k <= 40; k += 9) {
6066 GemmMicrokernelTester()
6067 .mr(2)
6068 .nr(4)
6069 .kr(2)
6070 .sr(1)
6071 .m(2)
6072 .n(n)
6073 .k(k)
6074 .ks(3)
6075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6076 }
6077 }
6078 }
6079
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)6080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
6081 TEST_REQUIRES_X86_AVX;
6082 for (size_t k = 1; k <= 40; k += 9) {
6083 for (uint32_t n = 1; n <= 4; n++) {
6084 for (uint32_t m = 1; m <= 2; m++) {
6085 GemmMicrokernelTester()
6086 .mr(2)
6087 .nr(4)
6088 .kr(2)
6089 .sr(1)
6090 .m(m)
6091 .n(n)
6092 .k(k)
6093 .cm_stride(7)
6094 .iterations(1)
6095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6096 }
6097 }
6098 }
6099 }
6100
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,a_offset)6101 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
6102 TEST_REQUIRES_X86_AVX;
6103 for (size_t k = 1; k <= 40; k += 9) {
6104 GemmMicrokernelTester()
6105 .mr(2)
6106 .nr(4)
6107 .kr(2)
6108 .sr(1)
6109 .m(2)
6110 .n(4)
6111 .k(k)
6112 .ks(3)
6113 .a_offset(83)
6114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6115 }
6116 }
6117
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,zero)6118 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
6119 TEST_REQUIRES_X86_AVX;
6120 for (size_t k = 1; k <= 40; k += 9) {
6121 for (uint32_t mz = 0; mz < 2; mz++) {
6122 GemmMicrokernelTester()
6123 .mr(2)
6124 .nr(4)
6125 .kr(2)
6126 .sr(1)
6127 .m(2)
6128 .n(4)
6129 .k(k)
6130 .ks(3)
6131 .a_offset(83)
6132 .zero_index(mz)
6133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6134 }
6135 }
6136 }
6137
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)6138 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
6139 TEST_REQUIRES_X86_AVX;
6140 GemmMicrokernelTester()
6141 .mr(2)
6142 .nr(4)
6143 .kr(2)
6144 .sr(1)
6145 .m(2)
6146 .n(4)
6147 .k(8)
6148 .qmin(128)
6149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6150 }
6151
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)6152 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
6153 TEST_REQUIRES_X86_AVX;
6154 GemmMicrokernelTester()
6155 .mr(2)
6156 .nr(4)
6157 .kr(2)
6158 .sr(1)
6159 .m(2)
6160 .n(4)
6161 .k(8)
6162 .qmax(128)
6163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6164 }
6165
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)6166 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
6167 TEST_REQUIRES_X86_AVX;
6168 GemmMicrokernelTester()
6169 .mr(2)
6170 .nr(4)
6171 .kr(2)
6172 .sr(1)
6173 .m(2)
6174 .n(4)
6175 .k(8)
6176 .cm_stride(7)
6177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6178 }
6179
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_a_zero_point)6180 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
6181 TEST_REQUIRES_X86_AVX;
6182 for (size_t k = 1; k <= 40; k += 9) {
6183 GemmMicrokernelTester()
6184 .mr(2)
6185 .nr(4)
6186 .kr(2)
6187 .sr(1)
6188 .m(2)
6189 .n(4)
6190 .k(k)
6191 .a_zero_point(0)
6192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6193 }
6194 }
6195
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_b_zero_point)6196 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
6197 TEST_REQUIRES_X86_AVX;
6198 for (size_t k = 1; k <= 40; k += 9) {
6199 GemmMicrokernelTester()
6200 .mr(2)
6201 .nr(4)
6202 .kr(2)
6203 .sr(1)
6204 .m(2)
6205 .n(4)
6206 .k(k)
6207 .b_zero_point(0)
6208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6209 }
6210 }
6211
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_zero_point)6212 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
6213 TEST_REQUIRES_X86_AVX;
6214 for (size_t k = 1; k <= 40; k += 9) {
6215 GemmMicrokernelTester()
6216 .mr(2)
6217 .nr(4)
6218 .kr(2)
6219 .sr(1)
6220 .m(2)
6221 .n(4)
6222 .k(k)
6223 .a_zero_point(0)
6224 .b_zero_point(0)
6225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6226 }
6227 }
6228 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6229
6230
6231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)6232 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
6233 TEST_REQUIRES_X86_XOP;
6234 GemmMicrokernelTester()
6235 .mr(2)
6236 .nr(4)
6237 .kr(2)
6238 .sr(1)
6239 .m(2)
6240 .n(4)
6241 .k(8)
6242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6243 }
6244
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)6245 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
6246 TEST_REQUIRES_X86_XOP;
6247 GemmMicrokernelTester()
6248 .mr(2)
6249 .nr(4)
6250 .kr(2)
6251 .sr(1)
6252 .m(2)
6253 .n(4)
6254 .k(8)
6255 .cn_stride(7)
6256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6257 }
6258
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)6259 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
6260 TEST_REQUIRES_X86_XOP;
6261 for (uint32_t n = 1; n <= 4; n++) {
6262 for (uint32_t m = 1; m <= 2; m++) {
6263 GemmMicrokernelTester()
6264 .mr(2)
6265 .nr(4)
6266 .kr(2)
6267 .sr(1)
6268 .m(m)
6269 .n(n)
6270 .k(8)
6271 .iterations(1)
6272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6273 }
6274 }
6275 }
6276
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)6277 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
6278 TEST_REQUIRES_X86_XOP;
6279 for (uint32_t m = 1; m <= 2; m++) {
6280 GemmMicrokernelTester()
6281 .mr(2)
6282 .nr(4)
6283 .kr(2)
6284 .sr(1)
6285 .m(m)
6286 .n(4)
6287 .k(8)
6288 .iterations(1)
6289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6290 }
6291 }
6292
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)6293 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
6294 TEST_REQUIRES_X86_XOP;
6295 for (uint32_t n = 1; n <= 4; n++) {
6296 GemmMicrokernelTester()
6297 .mr(2)
6298 .nr(4)
6299 .kr(2)
6300 .sr(1)
6301 .m(2)
6302 .n(n)
6303 .k(8)
6304 .iterations(1)
6305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6306 }
6307 }
6308
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)6309 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
6310 TEST_REQUIRES_X86_XOP;
6311 for (size_t k = 1; k < 8; k++) {
6312 GemmMicrokernelTester()
6313 .mr(2)
6314 .nr(4)
6315 .kr(2)
6316 .sr(1)
6317 .m(2)
6318 .n(4)
6319 .k(k)
6320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6321 }
6322 }
6323
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)6324 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
6325 TEST_REQUIRES_X86_XOP;
6326 for (size_t k = 1; k < 8; k++) {
6327 for (uint32_t n = 1; n <= 4; n++) {
6328 for (uint32_t m = 1; m <= 2; m++) {
6329 GemmMicrokernelTester()
6330 .mr(2)
6331 .nr(4)
6332 .kr(2)
6333 .sr(1)
6334 .m(m)
6335 .n(n)
6336 .k(k)
6337 .iterations(1)
6338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6339 }
6340 }
6341 }
6342 }
6343
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)6344 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
6345 TEST_REQUIRES_X86_XOP;
6346 for (size_t k = 9; k < 16; k++) {
6347 GemmMicrokernelTester()
6348 .mr(2)
6349 .nr(4)
6350 .kr(2)
6351 .sr(1)
6352 .m(2)
6353 .n(4)
6354 .k(k)
6355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6356 }
6357 }
6358
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)6359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
6360 TEST_REQUIRES_X86_XOP;
6361 for (size_t k = 9; k < 16; k++) {
6362 for (uint32_t n = 1; n <= 4; n++) {
6363 for (uint32_t m = 1; m <= 2; m++) {
6364 GemmMicrokernelTester()
6365 .mr(2)
6366 .nr(4)
6367 .kr(2)
6368 .sr(1)
6369 .m(m)
6370 .n(n)
6371 .k(k)
6372 .iterations(1)
6373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6374 }
6375 }
6376 }
6377 }
6378
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)6379 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
6380 TEST_REQUIRES_X86_XOP;
6381 for (size_t k = 16; k <= 80; k += 8) {
6382 GemmMicrokernelTester()
6383 .mr(2)
6384 .nr(4)
6385 .kr(2)
6386 .sr(1)
6387 .m(2)
6388 .n(4)
6389 .k(k)
6390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6391 }
6392 }
6393
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)6394 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
6395 TEST_REQUIRES_X86_XOP;
6396 for (size_t k = 16; k <= 80; k += 8) {
6397 for (uint32_t n = 1; n <= 4; n++) {
6398 for (uint32_t m = 1; m <= 2; m++) {
6399 GemmMicrokernelTester()
6400 .mr(2)
6401 .nr(4)
6402 .kr(2)
6403 .sr(1)
6404 .m(m)
6405 .n(n)
6406 .k(k)
6407 .iterations(1)
6408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6409 }
6410 }
6411 }
6412 }
6413
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)6414 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
6415 TEST_REQUIRES_X86_XOP;
6416 for (uint32_t n = 5; n < 8; n++) {
6417 for (size_t k = 1; k <= 40; k += 9) {
6418 GemmMicrokernelTester()
6419 .mr(2)
6420 .nr(4)
6421 .kr(2)
6422 .sr(1)
6423 .m(2)
6424 .n(n)
6425 .k(k)
6426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6427 }
6428 }
6429 }
6430
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)6431 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
6432 TEST_REQUIRES_X86_XOP;
6433 for (uint32_t n = 5; n < 8; n++) {
6434 for (size_t k = 1; k <= 40; k += 9) {
6435 GemmMicrokernelTester()
6436 .mr(2)
6437 .nr(4)
6438 .kr(2)
6439 .sr(1)
6440 .m(2)
6441 .n(n)
6442 .k(k)
6443 .cn_stride(7)
6444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6445 }
6446 }
6447 }
6448
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)6449 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
6450 TEST_REQUIRES_X86_XOP;
6451 for (uint32_t n = 5; n < 8; n++) {
6452 for (size_t k = 1; k <= 40; k += 9) {
6453 for (uint32_t m = 1; m <= 2; m++) {
6454 GemmMicrokernelTester()
6455 .mr(2)
6456 .nr(4)
6457 .kr(2)
6458 .sr(1)
6459 .m(m)
6460 .n(n)
6461 .k(k)
6462 .iterations(1)
6463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6464 }
6465 }
6466 }
6467 }
6468
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)6469 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
6470 TEST_REQUIRES_X86_XOP;
6471 for (uint32_t n = 8; n <= 12; n += 4) {
6472 for (size_t k = 1; k <= 40; k += 9) {
6473 GemmMicrokernelTester()
6474 .mr(2)
6475 .nr(4)
6476 .kr(2)
6477 .sr(1)
6478 .m(2)
6479 .n(n)
6480 .k(k)
6481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6482 }
6483 }
6484 }
6485
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)6486 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
6487 TEST_REQUIRES_X86_XOP;
6488 for (uint32_t n = 8; n <= 12; n += 4) {
6489 for (size_t k = 1; k <= 40; k += 9) {
6490 GemmMicrokernelTester()
6491 .mr(2)
6492 .nr(4)
6493 .kr(2)
6494 .sr(1)
6495 .m(2)
6496 .n(n)
6497 .k(k)
6498 .cn_stride(7)
6499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6500 }
6501 }
6502 }
6503
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)6504 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
6505 TEST_REQUIRES_X86_XOP;
6506 for (uint32_t n = 8; n <= 12; n += 4) {
6507 for (size_t k = 1; k <= 40; k += 9) {
6508 for (uint32_t m = 1; m <= 2; m++) {
6509 GemmMicrokernelTester()
6510 .mr(2)
6511 .nr(4)
6512 .kr(2)
6513 .sr(1)
6514 .m(m)
6515 .n(n)
6516 .k(k)
6517 .iterations(1)
6518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6519 }
6520 }
6521 }
6522 }
6523
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel)6524 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
6525 TEST_REQUIRES_X86_XOP;
6526 for (size_t k = 1; k <= 40; k += 9) {
6527 GemmMicrokernelTester()
6528 .mr(2)
6529 .nr(4)
6530 .kr(2)
6531 .sr(1)
6532 .m(2)
6533 .n(4)
6534 .k(k)
6535 .ks(3)
6536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6537 }
6538 }
6539
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel_subtile)6540 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
6541 TEST_REQUIRES_X86_XOP;
6542 for (size_t k = 1; k <= 40; k += 9) {
6543 for (uint32_t n = 1; n <= 4; n++) {
6544 for (uint32_t m = 1; m <= 2; m++) {
6545 GemmMicrokernelTester()
6546 .mr(2)
6547 .nr(4)
6548 .kr(2)
6549 .sr(1)
6550 .m(m)
6551 .n(n)
6552 .k(k)
6553 .ks(3)
6554 .iterations(1)
6555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6556 }
6557 }
6558 }
6559 }
6560
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_small_kernel)6561 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
6562 TEST_REQUIRES_X86_XOP;
6563 for (uint32_t n = 5; n < 8; n++) {
6564 for (size_t k = 1; k <= 40; k += 9) {
6565 GemmMicrokernelTester()
6566 .mr(2)
6567 .nr(4)
6568 .kr(2)
6569 .sr(1)
6570 .m(2)
6571 .n(n)
6572 .k(k)
6573 .ks(3)
6574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6575 }
6576 }
6577 }
6578
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_small_kernel)6579 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
6580 TEST_REQUIRES_X86_XOP;
6581 for (uint32_t n = 8; n <= 12; n += 4) {
6582 for (size_t k = 1; k <= 40; k += 9) {
6583 GemmMicrokernelTester()
6584 .mr(2)
6585 .nr(4)
6586 .kr(2)
6587 .sr(1)
6588 .m(2)
6589 .n(n)
6590 .k(k)
6591 .ks(3)
6592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6593 }
6594 }
6595 }
6596
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)6597 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
6598 TEST_REQUIRES_X86_XOP;
6599 for (size_t k = 1; k <= 40; k += 9) {
6600 for (uint32_t n = 1; n <= 4; n++) {
6601 for (uint32_t m = 1; m <= 2; m++) {
6602 GemmMicrokernelTester()
6603 .mr(2)
6604 .nr(4)
6605 .kr(2)
6606 .sr(1)
6607 .m(m)
6608 .n(n)
6609 .k(k)
6610 .cm_stride(7)
6611 .iterations(1)
6612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6613 }
6614 }
6615 }
6616 }
6617
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,a_offset)6618 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
6619 TEST_REQUIRES_X86_XOP;
6620 for (size_t k = 1; k <= 40; k += 9) {
6621 GemmMicrokernelTester()
6622 .mr(2)
6623 .nr(4)
6624 .kr(2)
6625 .sr(1)
6626 .m(2)
6627 .n(4)
6628 .k(k)
6629 .ks(3)
6630 .a_offset(83)
6631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6632 }
6633 }
6634
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,zero)6635 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
6636 TEST_REQUIRES_X86_XOP;
6637 for (size_t k = 1; k <= 40; k += 9) {
6638 for (uint32_t mz = 0; mz < 2; mz++) {
6639 GemmMicrokernelTester()
6640 .mr(2)
6641 .nr(4)
6642 .kr(2)
6643 .sr(1)
6644 .m(2)
6645 .n(4)
6646 .k(k)
6647 .ks(3)
6648 .a_offset(83)
6649 .zero_index(mz)
6650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6651 }
6652 }
6653 }
6654
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)6655 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
6656 TEST_REQUIRES_X86_XOP;
6657 GemmMicrokernelTester()
6658 .mr(2)
6659 .nr(4)
6660 .kr(2)
6661 .sr(1)
6662 .m(2)
6663 .n(4)
6664 .k(8)
6665 .qmin(128)
6666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6667 }
6668
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)6669 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
6670 TEST_REQUIRES_X86_XOP;
6671 GemmMicrokernelTester()
6672 .mr(2)
6673 .nr(4)
6674 .kr(2)
6675 .sr(1)
6676 .m(2)
6677 .n(4)
6678 .k(8)
6679 .qmax(128)
6680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6681 }
6682
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)6683 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
6684 TEST_REQUIRES_X86_XOP;
6685 GemmMicrokernelTester()
6686 .mr(2)
6687 .nr(4)
6688 .kr(2)
6689 .sr(1)
6690 .m(2)
6691 .n(4)
6692 .k(8)
6693 .cm_stride(7)
6694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6695 }
6696
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_a_zero_point)6697 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
6698 TEST_REQUIRES_X86_XOP;
6699 for (size_t k = 1; k <= 40; k += 9) {
6700 GemmMicrokernelTester()
6701 .mr(2)
6702 .nr(4)
6703 .kr(2)
6704 .sr(1)
6705 .m(2)
6706 .n(4)
6707 .k(k)
6708 .a_zero_point(0)
6709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6710 }
6711 }
6712
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_b_zero_point)6713 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
6714 TEST_REQUIRES_X86_XOP;
6715 for (size_t k = 1; k <= 40; k += 9) {
6716 GemmMicrokernelTester()
6717 .mr(2)
6718 .nr(4)
6719 .kr(2)
6720 .sr(1)
6721 .m(2)
6722 .n(4)
6723 .k(k)
6724 .b_zero_point(0)
6725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6726 }
6727 }
6728
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_zero_point)6729 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
6730 TEST_REQUIRES_X86_XOP;
6731 for (size_t k = 1; k <= 40; k += 9) {
6732 GemmMicrokernelTester()
6733 .mr(2)
6734 .nr(4)
6735 .kr(2)
6736 .sr(1)
6737 .m(2)
6738 .n(4)
6739 .k(k)
6740 .a_zero_point(0)
6741 .b_zero_point(0)
6742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6743 }
6744 }
6745 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6746
6747
6748 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)6749 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
6750 TEST_REQUIRES_X86_AVX;
6751 GemmMicrokernelTester()
6752 .mr(3)
6753 .nr(4)
6754 .kr(2)
6755 .sr(1)
6756 .m(3)
6757 .n(4)
6758 .k(8)
6759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6760 }
6761
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)6762 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
6763 TEST_REQUIRES_X86_AVX;
6764 GemmMicrokernelTester()
6765 .mr(3)
6766 .nr(4)
6767 .kr(2)
6768 .sr(1)
6769 .m(3)
6770 .n(4)
6771 .k(8)
6772 .cn_stride(7)
6773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6774 }
6775
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)6776 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
6777 TEST_REQUIRES_X86_AVX;
6778 for (uint32_t n = 1; n <= 4; n++) {
6779 for (uint32_t m = 1; m <= 3; m++) {
6780 GemmMicrokernelTester()
6781 .mr(3)
6782 .nr(4)
6783 .kr(2)
6784 .sr(1)
6785 .m(m)
6786 .n(n)
6787 .k(8)
6788 .iterations(1)
6789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6790 }
6791 }
6792 }
6793
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)6794 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
6795 TEST_REQUIRES_X86_AVX;
6796 for (uint32_t m = 1; m <= 3; m++) {
6797 GemmMicrokernelTester()
6798 .mr(3)
6799 .nr(4)
6800 .kr(2)
6801 .sr(1)
6802 .m(m)
6803 .n(4)
6804 .k(8)
6805 .iterations(1)
6806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6807 }
6808 }
6809
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)6810 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
6811 TEST_REQUIRES_X86_AVX;
6812 for (uint32_t n = 1; n <= 4; n++) {
6813 GemmMicrokernelTester()
6814 .mr(3)
6815 .nr(4)
6816 .kr(2)
6817 .sr(1)
6818 .m(3)
6819 .n(n)
6820 .k(8)
6821 .iterations(1)
6822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6823 }
6824 }
6825
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)6826 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
6827 TEST_REQUIRES_X86_AVX;
6828 for (size_t k = 1; k < 8; k++) {
6829 GemmMicrokernelTester()
6830 .mr(3)
6831 .nr(4)
6832 .kr(2)
6833 .sr(1)
6834 .m(3)
6835 .n(4)
6836 .k(k)
6837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6838 }
6839 }
6840
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)6841 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
6842 TEST_REQUIRES_X86_AVX;
6843 for (size_t k = 1; k < 8; k++) {
6844 for (uint32_t n = 1; n <= 4; n++) {
6845 for (uint32_t m = 1; m <= 3; m++) {
6846 GemmMicrokernelTester()
6847 .mr(3)
6848 .nr(4)
6849 .kr(2)
6850 .sr(1)
6851 .m(m)
6852 .n(n)
6853 .k(k)
6854 .iterations(1)
6855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6856 }
6857 }
6858 }
6859 }
6860
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)6861 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
6862 TEST_REQUIRES_X86_AVX;
6863 for (size_t k = 9; k < 16; k++) {
6864 GemmMicrokernelTester()
6865 .mr(3)
6866 .nr(4)
6867 .kr(2)
6868 .sr(1)
6869 .m(3)
6870 .n(4)
6871 .k(k)
6872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6873 }
6874 }
6875
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)6876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
6877 TEST_REQUIRES_X86_AVX;
6878 for (size_t k = 9; k < 16; k++) {
6879 for (uint32_t n = 1; n <= 4; n++) {
6880 for (uint32_t m = 1; m <= 3; m++) {
6881 GemmMicrokernelTester()
6882 .mr(3)
6883 .nr(4)
6884 .kr(2)
6885 .sr(1)
6886 .m(m)
6887 .n(n)
6888 .k(k)
6889 .iterations(1)
6890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6891 }
6892 }
6893 }
6894 }
6895
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)6896 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
6897 TEST_REQUIRES_X86_AVX;
6898 for (size_t k = 16; k <= 80; k += 8) {
6899 GemmMicrokernelTester()
6900 .mr(3)
6901 .nr(4)
6902 .kr(2)
6903 .sr(1)
6904 .m(3)
6905 .n(4)
6906 .k(k)
6907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6908 }
6909 }
6910
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)6911 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
6912 TEST_REQUIRES_X86_AVX;
6913 for (size_t k = 16; k <= 80; k += 8) {
6914 for (uint32_t n = 1; n <= 4; n++) {
6915 for (uint32_t m = 1; m <= 3; m++) {
6916 GemmMicrokernelTester()
6917 .mr(3)
6918 .nr(4)
6919 .kr(2)
6920 .sr(1)
6921 .m(m)
6922 .n(n)
6923 .k(k)
6924 .iterations(1)
6925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6926 }
6927 }
6928 }
6929 }
6930
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)6931 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
6932 TEST_REQUIRES_X86_AVX;
6933 for (uint32_t n = 5; n < 8; n++) {
6934 for (size_t k = 1; k <= 40; k += 9) {
6935 GemmMicrokernelTester()
6936 .mr(3)
6937 .nr(4)
6938 .kr(2)
6939 .sr(1)
6940 .m(3)
6941 .n(n)
6942 .k(k)
6943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6944 }
6945 }
6946 }
6947
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)6948 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
6949 TEST_REQUIRES_X86_AVX;
6950 for (uint32_t n = 5; n < 8; n++) {
6951 for (size_t k = 1; k <= 40; k += 9) {
6952 GemmMicrokernelTester()
6953 .mr(3)
6954 .nr(4)
6955 .kr(2)
6956 .sr(1)
6957 .m(3)
6958 .n(n)
6959 .k(k)
6960 .cn_stride(7)
6961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6962 }
6963 }
6964 }
6965
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)6966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
6967 TEST_REQUIRES_X86_AVX;
6968 for (uint32_t n = 5; n < 8; n++) {
6969 for (size_t k = 1; k <= 40; k += 9) {
6970 for (uint32_t m = 1; m <= 3; m++) {
6971 GemmMicrokernelTester()
6972 .mr(3)
6973 .nr(4)
6974 .kr(2)
6975 .sr(1)
6976 .m(m)
6977 .n(n)
6978 .k(k)
6979 .iterations(1)
6980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6981 }
6982 }
6983 }
6984 }
6985
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)6986 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
6987 TEST_REQUIRES_X86_AVX;
6988 for (uint32_t n = 8; n <= 12; n += 4) {
6989 for (size_t k = 1; k <= 40; k += 9) {
6990 GemmMicrokernelTester()
6991 .mr(3)
6992 .nr(4)
6993 .kr(2)
6994 .sr(1)
6995 .m(3)
6996 .n(n)
6997 .k(k)
6998 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)7003 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
7004 TEST_REQUIRES_X86_AVX;
7005 for (uint32_t n = 8; n <= 12; n += 4) {
7006 for (size_t k = 1; k <= 40; k += 9) {
7007 GemmMicrokernelTester()
7008 .mr(3)
7009 .nr(4)
7010 .kr(2)
7011 .sr(1)
7012 .m(3)
7013 .n(n)
7014 .k(k)
7015 .cn_stride(7)
7016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7017 }
7018 }
7019 }
7020
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)7021 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
7022 TEST_REQUIRES_X86_AVX;
7023 for (uint32_t n = 8; n <= 12; n += 4) {
7024 for (size_t k = 1; k <= 40; k += 9) {
7025 for (uint32_t m = 1; m <= 3; m++) {
7026 GemmMicrokernelTester()
7027 .mr(3)
7028 .nr(4)
7029 .kr(2)
7030 .sr(1)
7031 .m(m)
7032 .n(n)
7033 .k(k)
7034 .iterations(1)
7035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7036 }
7037 }
7038 }
7039 }
7040
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)7041 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
7042 TEST_REQUIRES_X86_AVX;
7043 for (size_t k = 1; k <= 40; k += 9) {
7044 GemmMicrokernelTester()
7045 .mr(3)
7046 .nr(4)
7047 .kr(2)
7048 .sr(1)
7049 .m(3)
7050 .n(4)
7051 .k(k)
7052 .ks(3)
7053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7054 }
7055 }
7056
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)7057 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
7058 TEST_REQUIRES_X86_AVX;
7059 for (size_t k = 1; k <= 40; k += 9) {
7060 for (uint32_t n = 1; n <= 4; n++) {
7061 for (uint32_t m = 1; m <= 3; m++) {
7062 GemmMicrokernelTester()
7063 .mr(3)
7064 .nr(4)
7065 .kr(2)
7066 .sr(1)
7067 .m(m)
7068 .n(n)
7069 .k(k)
7070 .ks(3)
7071 .iterations(1)
7072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7073 }
7074 }
7075 }
7076 }
7077
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)7078 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
7079 TEST_REQUIRES_X86_AVX;
7080 for (uint32_t n = 5; n < 8; n++) {
7081 for (size_t k = 1; k <= 40; k += 9) {
7082 GemmMicrokernelTester()
7083 .mr(3)
7084 .nr(4)
7085 .kr(2)
7086 .sr(1)
7087 .m(3)
7088 .n(n)
7089 .k(k)
7090 .ks(3)
7091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7092 }
7093 }
7094 }
7095
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)7096 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
7097 TEST_REQUIRES_X86_AVX;
7098 for (uint32_t n = 8; n <= 12; n += 4) {
7099 for (size_t k = 1; k <= 40; k += 9) {
7100 GemmMicrokernelTester()
7101 .mr(3)
7102 .nr(4)
7103 .kr(2)
7104 .sr(1)
7105 .m(3)
7106 .n(n)
7107 .k(k)
7108 .ks(3)
7109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7110 }
7111 }
7112 }
7113
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)7114 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
7115 TEST_REQUIRES_X86_AVX;
7116 for (size_t k = 1; k <= 40; k += 9) {
7117 for (uint32_t n = 1; n <= 4; n++) {
7118 for (uint32_t m = 1; m <= 3; m++) {
7119 GemmMicrokernelTester()
7120 .mr(3)
7121 .nr(4)
7122 .kr(2)
7123 .sr(1)
7124 .m(m)
7125 .n(n)
7126 .k(k)
7127 .cm_stride(7)
7128 .iterations(1)
7129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7130 }
7131 }
7132 }
7133 }
7134
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)7135 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
7136 TEST_REQUIRES_X86_AVX;
7137 for (size_t k = 1; k <= 40; k += 9) {
7138 GemmMicrokernelTester()
7139 .mr(3)
7140 .nr(4)
7141 .kr(2)
7142 .sr(1)
7143 .m(3)
7144 .n(4)
7145 .k(k)
7146 .ks(3)
7147 .a_offset(127)
7148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7149 }
7150 }
7151
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)7152 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
7153 TEST_REQUIRES_X86_AVX;
7154 for (size_t k = 1; k <= 40; k += 9) {
7155 for (uint32_t mz = 0; mz < 3; mz++) {
7156 GemmMicrokernelTester()
7157 .mr(3)
7158 .nr(4)
7159 .kr(2)
7160 .sr(1)
7161 .m(3)
7162 .n(4)
7163 .k(k)
7164 .ks(3)
7165 .a_offset(127)
7166 .zero_index(mz)
7167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7168 }
7169 }
7170 }
7171
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)7172 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
7173 TEST_REQUIRES_X86_AVX;
7174 GemmMicrokernelTester()
7175 .mr(3)
7176 .nr(4)
7177 .kr(2)
7178 .sr(1)
7179 .m(3)
7180 .n(4)
7181 .k(8)
7182 .qmin(128)
7183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7184 }
7185
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)7186 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
7187 TEST_REQUIRES_X86_AVX;
7188 GemmMicrokernelTester()
7189 .mr(3)
7190 .nr(4)
7191 .kr(2)
7192 .sr(1)
7193 .m(3)
7194 .n(4)
7195 .k(8)
7196 .qmax(128)
7197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7198 }
7199
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)7200 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
7201 TEST_REQUIRES_X86_AVX;
7202 GemmMicrokernelTester()
7203 .mr(3)
7204 .nr(4)
7205 .kr(2)
7206 .sr(1)
7207 .m(3)
7208 .n(4)
7209 .k(8)
7210 .cm_stride(7)
7211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7212 }
7213
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_a_zero_point)7214 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
7215 TEST_REQUIRES_X86_AVX;
7216 for (size_t k = 1; k <= 40; k += 9) {
7217 GemmMicrokernelTester()
7218 .mr(3)
7219 .nr(4)
7220 .kr(2)
7221 .sr(1)
7222 .m(3)
7223 .n(4)
7224 .k(k)
7225 .a_zero_point(0)
7226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7227 }
7228 }
7229
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_b_zero_point)7230 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
7231 TEST_REQUIRES_X86_AVX;
7232 for (size_t k = 1; k <= 40; k += 9) {
7233 GemmMicrokernelTester()
7234 .mr(3)
7235 .nr(4)
7236 .kr(2)
7237 .sr(1)
7238 .m(3)
7239 .n(4)
7240 .k(k)
7241 .b_zero_point(0)
7242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7243 }
7244 }
7245
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_zero_point)7246 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
7247 TEST_REQUIRES_X86_AVX;
7248 for (size_t k = 1; k <= 40; k += 9) {
7249 GemmMicrokernelTester()
7250 .mr(3)
7251 .nr(4)
7252 .kr(2)
7253 .sr(1)
7254 .m(3)
7255 .n(4)
7256 .k(k)
7257 .a_zero_point(0)
7258 .b_zero_point(0)
7259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7260 }
7261 }
7262 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7263
7264
7265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)7266 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
7267 TEST_REQUIRES_X86_XOP;
7268 GemmMicrokernelTester()
7269 .mr(3)
7270 .nr(4)
7271 .kr(2)
7272 .sr(1)
7273 .m(3)
7274 .n(4)
7275 .k(8)
7276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7277 }
7278
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)7279 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
7280 TEST_REQUIRES_X86_XOP;
7281 GemmMicrokernelTester()
7282 .mr(3)
7283 .nr(4)
7284 .kr(2)
7285 .sr(1)
7286 .m(3)
7287 .n(4)
7288 .k(8)
7289 .cn_stride(7)
7290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7291 }
7292
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)7293 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
7294 TEST_REQUIRES_X86_XOP;
7295 for (uint32_t n = 1; n <= 4; n++) {
7296 for (uint32_t m = 1; m <= 3; m++) {
7297 GemmMicrokernelTester()
7298 .mr(3)
7299 .nr(4)
7300 .kr(2)
7301 .sr(1)
7302 .m(m)
7303 .n(n)
7304 .k(8)
7305 .iterations(1)
7306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7307 }
7308 }
7309 }
7310
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)7311 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
7312 TEST_REQUIRES_X86_XOP;
7313 for (uint32_t m = 1; m <= 3; m++) {
7314 GemmMicrokernelTester()
7315 .mr(3)
7316 .nr(4)
7317 .kr(2)
7318 .sr(1)
7319 .m(m)
7320 .n(4)
7321 .k(8)
7322 .iterations(1)
7323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7324 }
7325 }
7326
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)7327 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
7328 TEST_REQUIRES_X86_XOP;
7329 for (uint32_t n = 1; n <= 4; n++) {
7330 GemmMicrokernelTester()
7331 .mr(3)
7332 .nr(4)
7333 .kr(2)
7334 .sr(1)
7335 .m(3)
7336 .n(n)
7337 .k(8)
7338 .iterations(1)
7339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7340 }
7341 }
7342
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)7343 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
7344 TEST_REQUIRES_X86_XOP;
7345 for (size_t k = 1; k < 8; k++) {
7346 GemmMicrokernelTester()
7347 .mr(3)
7348 .nr(4)
7349 .kr(2)
7350 .sr(1)
7351 .m(3)
7352 .n(4)
7353 .k(k)
7354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7355 }
7356 }
7357
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)7358 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
7359 TEST_REQUIRES_X86_XOP;
7360 for (size_t k = 1; k < 8; k++) {
7361 for (uint32_t n = 1; n <= 4; n++) {
7362 for (uint32_t m = 1; m <= 3; m++) {
7363 GemmMicrokernelTester()
7364 .mr(3)
7365 .nr(4)
7366 .kr(2)
7367 .sr(1)
7368 .m(m)
7369 .n(n)
7370 .k(k)
7371 .iterations(1)
7372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7373 }
7374 }
7375 }
7376 }
7377
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)7378 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
7379 TEST_REQUIRES_X86_XOP;
7380 for (size_t k = 9; k < 16; k++) {
7381 GemmMicrokernelTester()
7382 .mr(3)
7383 .nr(4)
7384 .kr(2)
7385 .sr(1)
7386 .m(3)
7387 .n(4)
7388 .k(k)
7389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7390 }
7391 }
7392
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)7393 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
7394 TEST_REQUIRES_X86_XOP;
7395 for (size_t k = 9; k < 16; k++) {
7396 for (uint32_t n = 1; n <= 4; n++) {
7397 for (uint32_t m = 1; m <= 3; m++) {
7398 GemmMicrokernelTester()
7399 .mr(3)
7400 .nr(4)
7401 .kr(2)
7402 .sr(1)
7403 .m(m)
7404 .n(n)
7405 .k(k)
7406 .iterations(1)
7407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7408 }
7409 }
7410 }
7411 }
7412
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)7413 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
7414 TEST_REQUIRES_X86_XOP;
7415 for (size_t k = 16; k <= 80; k += 8) {
7416 GemmMicrokernelTester()
7417 .mr(3)
7418 .nr(4)
7419 .kr(2)
7420 .sr(1)
7421 .m(3)
7422 .n(4)
7423 .k(k)
7424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7425 }
7426 }
7427
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)7428 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
7429 TEST_REQUIRES_X86_XOP;
7430 for (size_t k = 16; k <= 80; k += 8) {
7431 for (uint32_t n = 1; n <= 4; n++) {
7432 for (uint32_t m = 1; m <= 3; m++) {
7433 GemmMicrokernelTester()
7434 .mr(3)
7435 .nr(4)
7436 .kr(2)
7437 .sr(1)
7438 .m(m)
7439 .n(n)
7440 .k(k)
7441 .iterations(1)
7442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7443 }
7444 }
7445 }
7446 }
7447
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)7448 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
7449 TEST_REQUIRES_X86_XOP;
7450 for (uint32_t n = 5; n < 8; n++) {
7451 for (size_t k = 1; k <= 40; k += 9) {
7452 GemmMicrokernelTester()
7453 .mr(3)
7454 .nr(4)
7455 .kr(2)
7456 .sr(1)
7457 .m(3)
7458 .n(n)
7459 .k(k)
7460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7461 }
7462 }
7463 }
7464
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)7465 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
7466 TEST_REQUIRES_X86_XOP;
7467 for (uint32_t n = 5; n < 8; n++) {
7468 for (size_t k = 1; k <= 40; k += 9) {
7469 GemmMicrokernelTester()
7470 .mr(3)
7471 .nr(4)
7472 .kr(2)
7473 .sr(1)
7474 .m(3)
7475 .n(n)
7476 .k(k)
7477 .cn_stride(7)
7478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7479 }
7480 }
7481 }
7482
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)7483 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
7484 TEST_REQUIRES_X86_XOP;
7485 for (uint32_t n = 5; n < 8; n++) {
7486 for (size_t k = 1; k <= 40; k += 9) {
7487 for (uint32_t m = 1; m <= 3; m++) {
7488 GemmMicrokernelTester()
7489 .mr(3)
7490 .nr(4)
7491 .kr(2)
7492 .sr(1)
7493 .m(m)
7494 .n(n)
7495 .k(k)
7496 .iterations(1)
7497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7498 }
7499 }
7500 }
7501 }
7502
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)7503 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
7504 TEST_REQUIRES_X86_XOP;
7505 for (uint32_t n = 8; n <= 12; n += 4) {
7506 for (size_t k = 1; k <= 40; k += 9) {
7507 GemmMicrokernelTester()
7508 .mr(3)
7509 .nr(4)
7510 .kr(2)
7511 .sr(1)
7512 .m(3)
7513 .n(n)
7514 .k(k)
7515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7516 }
7517 }
7518 }
7519
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)7520 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
7521 TEST_REQUIRES_X86_XOP;
7522 for (uint32_t n = 8; n <= 12; n += 4) {
7523 for (size_t k = 1; k <= 40; k += 9) {
7524 GemmMicrokernelTester()
7525 .mr(3)
7526 .nr(4)
7527 .kr(2)
7528 .sr(1)
7529 .m(3)
7530 .n(n)
7531 .k(k)
7532 .cn_stride(7)
7533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7534 }
7535 }
7536 }
7537
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)7538 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
7539 TEST_REQUIRES_X86_XOP;
7540 for (uint32_t n = 8; n <= 12; n += 4) {
7541 for (size_t k = 1; k <= 40; k += 9) {
7542 for (uint32_t m = 1; m <= 3; m++) {
7543 GemmMicrokernelTester()
7544 .mr(3)
7545 .nr(4)
7546 .kr(2)
7547 .sr(1)
7548 .m(m)
7549 .n(n)
7550 .k(k)
7551 .iterations(1)
7552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7553 }
7554 }
7555 }
7556 }
7557
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel)7558 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
7559 TEST_REQUIRES_X86_XOP;
7560 for (size_t k = 1; k <= 40; k += 9) {
7561 GemmMicrokernelTester()
7562 .mr(3)
7563 .nr(4)
7564 .kr(2)
7565 .sr(1)
7566 .m(3)
7567 .n(4)
7568 .k(k)
7569 .ks(3)
7570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7571 }
7572 }
7573
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel_subtile)7574 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
7575 TEST_REQUIRES_X86_XOP;
7576 for (size_t k = 1; k <= 40; k += 9) {
7577 for (uint32_t n = 1; n <= 4; n++) {
7578 for (uint32_t m = 1; m <= 3; m++) {
7579 GemmMicrokernelTester()
7580 .mr(3)
7581 .nr(4)
7582 .kr(2)
7583 .sr(1)
7584 .m(m)
7585 .n(n)
7586 .k(k)
7587 .ks(3)
7588 .iterations(1)
7589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7590 }
7591 }
7592 }
7593 }
7594
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_small_kernel)7595 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
7596 TEST_REQUIRES_X86_XOP;
7597 for (uint32_t n = 5; n < 8; n++) {
7598 for (size_t k = 1; k <= 40; k += 9) {
7599 GemmMicrokernelTester()
7600 .mr(3)
7601 .nr(4)
7602 .kr(2)
7603 .sr(1)
7604 .m(3)
7605 .n(n)
7606 .k(k)
7607 .ks(3)
7608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7609 }
7610 }
7611 }
7612
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_small_kernel)7613 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
7614 TEST_REQUIRES_X86_XOP;
7615 for (uint32_t n = 8; n <= 12; n += 4) {
7616 for (size_t k = 1; k <= 40; k += 9) {
7617 GemmMicrokernelTester()
7618 .mr(3)
7619 .nr(4)
7620 .kr(2)
7621 .sr(1)
7622 .m(3)
7623 .n(n)
7624 .k(k)
7625 .ks(3)
7626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7627 }
7628 }
7629 }
7630
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)7631 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
7632 TEST_REQUIRES_X86_XOP;
7633 for (size_t k = 1; k <= 40; k += 9) {
7634 for (uint32_t n = 1; n <= 4; n++) {
7635 for (uint32_t m = 1; m <= 3; m++) {
7636 GemmMicrokernelTester()
7637 .mr(3)
7638 .nr(4)
7639 .kr(2)
7640 .sr(1)
7641 .m(m)
7642 .n(n)
7643 .k(k)
7644 .cm_stride(7)
7645 .iterations(1)
7646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7647 }
7648 }
7649 }
7650 }
7651
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,a_offset)7652 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
7653 TEST_REQUIRES_X86_XOP;
7654 for (size_t k = 1; k <= 40; k += 9) {
7655 GemmMicrokernelTester()
7656 .mr(3)
7657 .nr(4)
7658 .kr(2)
7659 .sr(1)
7660 .m(3)
7661 .n(4)
7662 .k(k)
7663 .ks(3)
7664 .a_offset(127)
7665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7666 }
7667 }
7668
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,zero)7669 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
7670 TEST_REQUIRES_X86_XOP;
7671 for (size_t k = 1; k <= 40; k += 9) {
7672 for (uint32_t mz = 0; mz < 3; mz++) {
7673 GemmMicrokernelTester()
7674 .mr(3)
7675 .nr(4)
7676 .kr(2)
7677 .sr(1)
7678 .m(3)
7679 .n(4)
7680 .k(k)
7681 .ks(3)
7682 .a_offset(127)
7683 .zero_index(mz)
7684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7685 }
7686 }
7687 }
7688
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)7689 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
7690 TEST_REQUIRES_X86_XOP;
7691 GemmMicrokernelTester()
7692 .mr(3)
7693 .nr(4)
7694 .kr(2)
7695 .sr(1)
7696 .m(3)
7697 .n(4)
7698 .k(8)
7699 .qmin(128)
7700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7701 }
7702
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)7703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
7704 TEST_REQUIRES_X86_XOP;
7705 GemmMicrokernelTester()
7706 .mr(3)
7707 .nr(4)
7708 .kr(2)
7709 .sr(1)
7710 .m(3)
7711 .n(4)
7712 .k(8)
7713 .qmax(128)
7714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7715 }
7716
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)7717 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
7718 TEST_REQUIRES_X86_XOP;
7719 GemmMicrokernelTester()
7720 .mr(3)
7721 .nr(4)
7722 .kr(2)
7723 .sr(1)
7724 .m(3)
7725 .n(4)
7726 .k(8)
7727 .cm_stride(7)
7728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7729 }
7730
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_a_zero_point)7731 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
7732 TEST_REQUIRES_X86_XOP;
7733 for (size_t k = 1; k <= 40; k += 9) {
7734 GemmMicrokernelTester()
7735 .mr(3)
7736 .nr(4)
7737 .kr(2)
7738 .sr(1)
7739 .m(3)
7740 .n(4)
7741 .k(k)
7742 .a_zero_point(0)
7743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7744 }
7745 }
7746
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_b_zero_point)7747 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
7748 TEST_REQUIRES_X86_XOP;
7749 for (size_t k = 1; k <= 40; k += 9) {
7750 GemmMicrokernelTester()
7751 .mr(3)
7752 .nr(4)
7753 .kr(2)
7754 .sr(1)
7755 .m(3)
7756 .n(4)
7757 .k(k)
7758 .b_zero_point(0)
7759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7760 }
7761 }
7762
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_zero_point)7763 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
7764 TEST_REQUIRES_X86_XOP;
7765 for (size_t k = 1; k <= 40; k += 9) {
7766 GemmMicrokernelTester()
7767 .mr(3)
7768 .nr(4)
7769 .kr(2)
7770 .sr(1)
7771 .m(3)
7772 .n(4)
7773 .k(k)
7774 .a_zero_point(0)
7775 .b_zero_point(0)
7776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7777 }
7778 }
7779 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7780
7781
7782 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)7783 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
7784 TEST_REQUIRES_X86_AVX;
7785 GemmMicrokernelTester()
7786 .mr(4)
7787 .nr(4)
7788 .kr(2)
7789 .sr(1)
7790 .m(4)
7791 .n(4)
7792 .k(8)
7793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7794 }
7795
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)7796 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
7797 TEST_REQUIRES_X86_AVX;
7798 GemmMicrokernelTester()
7799 .mr(4)
7800 .nr(4)
7801 .kr(2)
7802 .sr(1)
7803 .m(4)
7804 .n(4)
7805 .k(8)
7806 .cn_stride(7)
7807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7808 }
7809
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)7810 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
7811 TEST_REQUIRES_X86_AVX;
7812 for (uint32_t n = 1; n <= 4; n++) {
7813 for (uint32_t m = 1; m <= 4; m++) {
7814 GemmMicrokernelTester()
7815 .mr(4)
7816 .nr(4)
7817 .kr(2)
7818 .sr(1)
7819 .m(m)
7820 .n(n)
7821 .k(8)
7822 .iterations(1)
7823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7824 }
7825 }
7826 }
7827
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)7828 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
7829 TEST_REQUIRES_X86_AVX;
7830 for (uint32_t m = 1; m <= 4; m++) {
7831 GemmMicrokernelTester()
7832 .mr(4)
7833 .nr(4)
7834 .kr(2)
7835 .sr(1)
7836 .m(m)
7837 .n(4)
7838 .k(8)
7839 .iterations(1)
7840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7841 }
7842 }
7843
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)7844 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
7845 TEST_REQUIRES_X86_AVX;
7846 for (uint32_t n = 1; n <= 4; n++) {
7847 GemmMicrokernelTester()
7848 .mr(4)
7849 .nr(4)
7850 .kr(2)
7851 .sr(1)
7852 .m(4)
7853 .n(n)
7854 .k(8)
7855 .iterations(1)
7856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7857 }
7858 }
7859
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)7860 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
7861 TEST_REQUIRES_X86_AVX;
7862 for (size_t k = 1; k < 8; k++) {
7863 GemmMicrokernelTester()
7864 .mr(4)
7865 .nr(4)
7866 .kr(2)
7867 .sr(1)
7868 .m(4)
7869 .n(4)
7870 .k(k)
7871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7872 }
7873 }
7874
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)7875 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
7876 TEST_REQUIRES_X86_AVX;
7877 for (size_t k = 1; k < 8; k++) {
7878 for (uint32_t n = 1; n <= 4; n++) {
7879 for (uint32_t m = 1; m <= 4; m++) {
7880 GemmMicrokernelTester()
7881 .mr(4)
7882 .nr(4)
7883 .kr(2)
7884 .sr(1)
7885 .m(m)
7886 .n(n)
7887 .k(k)
7888 .iterations(1)
7889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7890 }
7891 }
7892 }
7893 }
7894
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)7895 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
7896 TEST_REQUIRES_X86_AVX;
7897 for (size_t k = 9; k < 16; k++) {
7898 GemmMicrokernelTester()
7899 .mr(4)
7900 .nr(4)
7901 .kr(2)
7902 .sr(1)
7903 .m(4)
7904 .n(4)
7905 .k(k)
7906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7907 }
7908 }
7909
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)7910 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
7911 TEST_REQUIRES_X86_AVX;
7912 for (size_t k = 9; k < 16; k++) {
7913 for (uint32_t n = 1; n <= 4; n++) {
7914 for (uint32_t m = 1; m <= 4; m++) {
7915 GemmMicrokernelTester()
7916 .mr(4)
7917 .nr(4)
7918 .kr(2)
7919 .sr(1)
7920 .m(m)
7921 .n(n)
7922 .k(k)
7923 .iterations(1)
7924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7925 }
7926 }
7927 }
7928 }
7929
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)7930 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
7931 TEST_REQUIRES_X86_AVX;
7932 for (size_t k = 16; k <= 80; k += 8) {
7933 GemmMicrokernelTester()
7934 .mr(4)
7935 .nr(4)
7936 .kr(2)
7937 .sr(1)
7938 .m(4)
7939 .n(4)
7940 .k(k)
7941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7942 }
7943 }
7944
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)7945 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
7946 TEST_REQUIRES_X86_AVX;
7947 for (size_t k = 16; k <= 80; k += 8) {
7948 for (uint32_t n = 1; n <= 4; n++) {
7949 for (uint32_t m = 1; m <= 4; m++) {
7950 GemmMicrokernelTester()
7951 .mr(4)
7952 .nr(4)
7953 .kr(2)
7954 .sr(1)
7955 .m(m)
7956 .n(n)
7957 .k(k)
7958 .iterations(1)
7959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7960 }
7961 }
7962 }
7963 }
7964
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)7965 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
7966 TEST_REQUIRES_X86_AVX;
7967 for (uint32_t n = 5; n < 8; n++) {
7968 for (size_t k = 1; k <= 40; k += 9) {
7969 GemmMicrokernelTester()
7970 .mr(4)
7971 .nr(4)
7972 .kr(2)
7973 .sr(1)
7974 .m(4)
7975 .n(n)
7976 .k(k)
7977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7978 }
7979 }
7980 }
7981
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)7982 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
7983 TEST_REQUIRES_X86_AVX;
7984 for (uint32_t n = 5; n < 8; n++) {
7985 for (size_t k = 1; k <= 40; k += 9) {
7986 GemmMicrokernelTester()
7987 .mr(4)
7988 .nr(4)
7989 .kr(2)
7990 .sr(1)
7991 .m(4)
7992 .n(n)
7993 .k(k)
7994 .cn_stride(7)
7995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7996 }
7997 }
7998 }
7999
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)8000 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
8001 TEST_REQUIRES_X86_AVX;
8002 for (uint32_t n = 5; n < 8; n++) {
8003 for (size_t k = 1; k <= 40; k += 9) {
8004 for (uint32_t m = 1; m <= 4; m++) {
8005 GemmMicrokernelTester()
8006 .mr(4)
8007 .nr(4)
8008 .kr(2)
8009 .sr(1)
8010 .m(m)
8011 .n(n)
8012 .k(k)
8013 .iterations(1)
8014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8015 }
8016 }
8017 }
8018 }
8019
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)8020 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
8021 TEST_REQUIRES_X86_AVX;
8022 for (uint32_t n = 8; n <= 12; n += 4) {
8023 for (size_t k = 1; k <= 40; k += 9) {
8024 GemmMicrokernelTester()
8025 .mr(4)
8026 .nr(4)
8027 .kr(2)
8028 .sr(1)
8029 .m(4)
8030 .n(n)
8031 .k(k)
8032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8033 }
8034 }
8035 }
8036
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)8037 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
8038 TEST_REQUIRES_X86_AVX;
8039 for (uint32_t n = 8; n <= 12; n += 4) {
8040 for (size_t k = 1; k <= 40; k += 9) {
8041 GemmMicrokernelTester()
8042 .mr(4)
8043 .nr(4)
8044 .kr(2)
8045 .sr(1)
8046 .m(4)
8047 .n(n)
8048 .k(k)
8049 .cn_stride(7)
8050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8051 }
8052 }
8053 }
8054
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)8055 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
8056 TEST_REQUIRES_X86_AVX;
8057 for (uint32_t n = 8; n <= 12; n += 4) {
8058 for (size_t k = 1; k <= 40; k += 9) {
8059 for (uint32_t m = 1; m <= 4; m++) {
8060 GemmMicrokernelTester()
8061 .mr(4)
8062 .nr(4)
8063 .kr(2)
8064 .sr(1)
8065 .m(m)
8066 .n(n)
8067 .k(k)
8068 .iterations(1)
8069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8070 }
8071 }
8072 }
8073 }
8074
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel)8075 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
8076 TEST_REQUIRES_X86_AVX;
8077 for (size_t k = 1; k <= 40; k += 9) {
8078 GemmMicrokernelTester()
8079 .mr(4)
8080 .nr(4)
8081 .kr(2)
8082 .sr(1)
8083 .m(4)
8084 .n(4)
8085 .k(k)
8086 .ks(3)
8087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8088 }
8089 }
8090
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel_subtile)8091 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
8092 TEST_REQUIRES_X86_AVX;
8093 for (size_t k = 1; k <= 40; k += 9) {
8094 for (uint32_t n = 1; n <= 4; n++) {
8095 for (uint32_t m = 1; m <= 4; m++) {
8096 GemmMicrokernelTester()
8097 .mr(4)
8098 .nr(4)
8099 .kr(2)
8100 .sr(1)
8101 .m(m)
8102 .n(n)
8103 .k(k)
8104 .ks(3)
8105 .iterations(1)
8106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8107 }
8108 }
8109 }
8110 }
8111
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_small_kernel)8112 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
8113 TEST_REQUIRES_X86_AVX;
8114 for (uint32_t n = 5; n < 8; n++) {
8115 for (size_t k = 1; k <= 40; k += 9) {
8116 GemmMicrokernelTester()
8117 .mr(4)
8118 .nr(4)
8119 .kr(2)
8120 .sr(1)
8121 .m(4)
8122 .n(n)
8123 .k(k)
8124 .ks(3)
8125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8126 }
8127 }
8128 }
8129
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_small_kernel)8130 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
8131 TEST_REQUIRES_X86_AVX;
8132 for (uint32_t n = 8; n <= 12; n += 4) {
8133 for (size_t k = 1; k <= 40; k += 9) {
8134 GemmMicrokernelTester()
8135 .mr(4)
8136 .nr(4)
8137 .kr(2)
8138 .sr(1)
8139 .m(4)
8140 .n(n)
8141 .k(k)
8142 .ks(3)
8143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8144 }
8145 }
8146 }
8147
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)8148 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
8149 TEST_REQUIRES_X86_AVX;
8150 for (size_t k = 1; k <= 40; k += 9) {
8151 for (uint32_t n = 1; n <= 4; n++) {
8152 for (uint32_t m = 1; m <= 4; m++) {
8153 GemmMicrokernelTester()
8154 .mr(4)
8155 .nr(4)
8156 .kr(2)
8157 .sr(1)
8158 .m(m)
8159 .n(n)
8160 .k(k)
8161 .cm_stride(7)
8162 .iterations(1)
8163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8164 }
8165 }
8166 }
8167 }
8168
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,a_offset)8169 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
8170 TEST_REQUIRES_X86_AVX;
8171 for (size_t k = 1; k <= 40; k += 9) {
8172 GemmMicrokernelTester()
8173 .mr(4)
8174 .nr(4)
8175 .kr(2)
8176 .sr(1)
8177 .m(4)
8178 .n(4)
8179 .k(k)
8180 .ks(3)
8181 .a_offset(163)
8182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8183 }
8184 }
8185
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,zero)8186 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
8187 TEST_REQUIRES_X86_AVX;
8188 for (size_t k = 1; k <= 40; k += 9) {
8189 for (uint32_t mz = 0; mz < 4; mz++) {
8190 GemmMicrokernelTester()
8191 .mr(4)
8192 .nr(4)
8193 .kr(2)
8194 .sr(1)
8195 .m(4)
8196 .n(4)
8197 .k(k)
8198 .ks(3)
8199 .a_offset(163)
8200 .zero_index(mz)
8201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8202 }
8203 }
8204 }
8205
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)8206 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
8207 TEST_REQUIRES_X86_AVX;
8208 GemmMicrokernelTester()
8209 .mr(4)
8210 .nr(4)
8211 .kr(2)
8212 .sr(1)
8213 .m(4)
8214 .n(4)
8215 .k(8)
8216 .qmin(128)
8217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8218 }
8219
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)8220 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
8221 TEST_REQUIRES_X86_AVX;
8222 GemmMicrokernelTester()
8223 .mr(4)
8224 .nr(4)
8225 .kr(2)
8226 .sr(1)
8227 .m(4)
8228 .n(4)
8229 .k(8)
8230 .qmax(128)
8231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8232 }
8233
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)8234 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
8235 TEST_REQUIRES_X86_AVX;
8236 GemmMicrokernelTester()
8237 .mr(4)
8238 .nr(4)
8239 .kr(2)
8240 .sr(1)
8241 .m(4)
8242 .n(4)
8243 .k(8)
8244 .cm_stride(7)
8245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8246 }
8247
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_a_zero_point)8248 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
8249 TEST_REQUIRES_X86_AVX;
8250 for (size_t k = 1; k <= 40; k += 9) {
8251 GemmMicrokernelTester()
8252 .mr(4)
8253 .nr(4)
8254 .kr(2)
8255 .sr(1)
8256 .m(4)
8257 .n(4)
8258 .k(k)
8259 .a_zero_point(0)
8260 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8261 }
8262 }
8263
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_b_zero_point)8264 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
8265 TEST_REQUIRES_X86_AVX;
8266 for (size_t k = 1; k <= 40; k += 9) {
8267 GemmMicrokernelTester()
8268 .mr(4)
8269 .nr(4)
8270 .kr(2)
8271 .sr(1)
8272 .m(4)
8273 .n(4)
8274 .k(k)
8275 .b_zero_point(0)
8276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8277 }
8278 }
8279
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_zero_point)8280 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
8281 TEST_REQUIRES_X86_AVX;
8282 for (size_t k = 1; k <= 40; k += 9) {
8283 GemmMicrokernelTester()
8284 .mr(4)
8285 .nr(4)
8286 .kr(2)
8287 .sr(1)
8288 .m(4)
8289 .n(4)
8290 .k(k)
8291 .a_zero_point(0)
8292 .b_zero_point(0)
8293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8294 }
8295 }
8296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8297
8298
8299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)8300 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
8301 TEST_REQUIRES_X86_XOP;
8302 GemmMicrokernelTester()
8303 .mr(4)
8304 .nr(4)
8305 .kr(2)
8306 .sr(1)
8307 .m(4)
8308 .n(4)
8309 .k(8)
8310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8311 }
8312
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)8313 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
8314 TEST_REQUIRES_X86_XOP;
8315 GemmMicrokernelTester()
8316 .mr(4)
8317 .nr(4)
8318 .kr(2)
8319 .sr(1)
8320 .m(4)
8321 .n(4)
8322 .k(8)
8323 .cn_stride(7)
8324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8325 }
8326
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)8327 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
8328 TEST_REQUIRES_X86_XOP;
8329 for (uint32_t n = 1; n <= 4; n++) {
8330 for (uint32_t m = 1; m <= 4; m++) {
8331 GemmMicrokernelTester()
8332 .mr(4)
8333 .nr(4)
8334 .kr(2)
8335 .sr(1)
8336 .m(m)
8337 .n(n)
8338 .k(8)
8339 .iterations(1)
8340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8341 }
8342 }
8343 }
8344
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)8345 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
8346 TEST_REQUIRES_X86_XOP;
8347 for (uint32_t m = 1; m <= 4; m++) {
8348 GemmMicrokernelTester()
8349 .mr(4)
8350 .nr(4)
8351 .kr(2)
8352 .sr(1)
8353 .m(m)
8354 .n(4)
8355 .k(8)
8356 .iterations(1)
8357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8358 }
8359 }
8360
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)8361 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
8362 TEST_REQUIRES_X86_XOP;
8363 for (uint32_t n = 1; n <= 4; n++) {
8364 GemmMicrokernelTester()
8365 .mr(4)
8366 .nr(4)
8367 .kr(2)
8368 .sr(1)
8369 .m(4)
8370 .n(n)
8371 .k(8)
8372 .iterations(1)
8373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8374 }
8375 }
8376
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)8377 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
8378 TEST_REQUIRES_X86_XOP;
8379 for (size_t k = 1; k < 8; k++) {
8380 GemmMicrokernelTester()
8381 .mr(4)
8382 .nr(4)
8383 .kr(2)
8384 .sr(1)
8385 .m(4)
8386 .n(4)
8387 .k(k)
8388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8389 }
8390 }
8391
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)8392 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
8393 TEST_REQUIRES_X86_XOP;
8394 for (size_t k = 1; k < 8; k++) {
8395 for (uint32_t n = 1; n <= 4; n++) {
8396 for (uint32_t m = 1; m <= 4; m++) {
8397 GemmMicrokernelTester()
8398 .mr(4)
8399 .nr(4)
8400 .kr(2)
8401 .sr(1)
8402 .m(m)
8403 .n(n)
8404 .k(k)
8405 .iterations(1)
8406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8407 }
8408 }
8409 }
8410 }
8411
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)8412 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
8413 TEST_REQUIRES_X86_XOP;
8414 for (size_t k = 9; k < 16; k++) {
8415 GemmMicrokernelTester()
8416 .mr(4)
8417 .nr(4)
8418 .kr(2)
8419 .sr(1)
8420 .m(4)
8421 .n(4)
8422 .k(k)
8423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8424 }
8425 }
8426
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)8427 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
8428 TEST_REQUIRES_X86_XOP;
8429 for (size_t k = 9; k < 16; k++) {
8430 for (uint32_t n = 1; n <= 4; n++) {
8431 for (uint32_t m = 1; m <= 4; m++) {
8432 GemmMicrokernelTester()
8433 .mr(4)
8434 .nr(4)
8435 .kr(2)
8436 .sr(1)
8437 .m(m)
8438 .n(n)
8439 .k(k)
8440 .iterations(1)
8441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8442 }
8443 }
8444 }
8445 }
8446
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)8447 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
8448 TEST_REQUIRES_X86_XOP;
8449 for (size_t k = 16; k <= 80; k += 8) {
8450 GemmMicrokernelTester()
8451 .mr(4)
8452 .nr(4)
8453 .kr(2)
8454 .sr(1)
8455 .m(4)
8456 .n(4)
8457 .k(k)
8458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8459 }
8460 }
8461
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)8462 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
8463 TEST_REQUIRES_X86_XOP;
8464 for (size_t k = 16; k <= 80; k += 8) {
8465 for (uint32_t n = 1; n <= 4; n++) {
8466 for (uint32_t m = 1; m <= 4; m++) {
8467 GemmMicrokernelTester()
8468 .mr(4)
8469 .nr(4)
8470 .kr(2)
8471 .sr(1)
8472 .m(m)
8473 .n(n)
8474 .k(k)
8475 .iterations(1)
8476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8477 }
8478 }
8479 }
8480 }
8481
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)8482 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
8483 TEST_REQUIRES_X86_XOP;
8484 for (uint32_t n = 5; n < 8; n++) {
8485 for (size_t k = 1; k <= 40; k += 9) {
8486 GemmMicrokernelTester()
8487 .mr(4)
8488 .nr(4)
8489 .kr(2)
8490 .sr(1)
8491 .m(4)
8492 .n(n)
8493 .k(k)
8494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8495 }
8496 }
8497 }
8498
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)8499 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
8500 TEST_REQUIRES_X86_XOP;
8501 for (uint32_t n = 5; n < 8; n++) {
8502 for (size_t k = 1; k <= 40; k += 9) {
8503 GemmMicrokernelTester()
8504 .mr(4)
8505 .nr(4)
8506 .kr(2)
8507 .sr(1)
8508 .m(4)
8509 .n(n)
8510 .k(k)
8511 .cn_stride(7)
8512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8513 }
8514 }
8515 }
8516
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)8517 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
8518 TEST_REQUIRES_X86_XOP;
8519 for (uint32_t n = 5; n < 8; n++) {
8520 for (size_t k = 1; k <= 40; k += 9) {
8521 for (uint32_t m = 1; m <= 4; m++) {
8522 GemmMicrokernelTester()
8523 .mr(4)
8524 .nr(4)
8525 .kr(2)
8526 .sr(1)
8527 .m(m)
8528 .n(n)
8529 .k(k)
8530 .iterations(1)
8531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8532 }
8533 }
8534 }
8535 }
8536
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)8537 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
8538 TEST_REQUIRES_X86_XOP;
8539 for (uint32_t n = 8; n <= 12; n += 4) {
8540 for (size_t k = 1; k <= 40; k += 9) {
8541 GemmMicrokernelTester()
8542 .mr(4)
8543 .nr(4)
8544 .kr(2)
8545 .sr(1)
8546 .m(4)
8547 .n(n)
8548 .k(k)
8549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8550 }
8551 }
8552 }
8553
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)8554 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
8555 TEST_REQUIRES_X86_XOP;
8556 for (uint32_t n = 8; n <= 12; n += 4) {
8557 for (size_t k = 1; k <= 40; k += 9) {
8558 GemmMicrokernelTester()
8559 .mr(4)
8560 .nr(4)
8561 .kr(2)
8562 .sr(1)
8563 .m(4)
8564 .n(n)
8565 .k(k)
8566 .cn_stride(7)
8567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8568 }
8569 }
8570 }
8571
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)8572 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
8573 TEST_REQUIRES_X86_XOP;
8574 for (uint32_t n = 8; n <= 12; n += 4) {
8575 for (size_t k = 1; k <= 40; k += 9) {
8576 for (uint32_t m = 1; m <= 4; m++) {
8577 GemmMicrokernelTester()
8578 .mr(4)
8579 .nr(4)
8580 .kr(2)
8581 .sr(1)
8582 .m(m)
8583 .n(n)
8584 .k(k)
8585 .iterations(1)
8586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8587 }
8588 }
8589 }
8590 }
8591
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel)8592 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
8593 TEST_REQUIRES_X86_XOP;
8594 for (size_t k = 1; k <= 40; k += 9) {
8595 GemmMicrokernelTester()
8596 .mr(4)
8597 .nr(4)
8598 .kr(2)
8599 .sr(1)
8600 .m(4)
8601 .n(4)
8602 .k(k)
8603 .ks(3)
8604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8605 }
8606 }
8607
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel_subtile)8608 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
8609 TEST_REQUIRES_X86_XOP;
8610 for (size_t k = 1; k <= 40; k += 9) {
8611 for (uint32_t n = 1; n <= 4; n++) {
8612 for (uint32_t m = 1; m <= 4; m++) {
8613 GemmMicrokernelTester()
8614 .mr(4)
8615 .nr(4)
8616 .kr(2)
8617 .sr(1)
8618 .m(m)
8619 .n(n)
8620 .k(k)
8621 .ks(3)
8622 .iterations(1)
8623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8624 }
8625 }
8626 }
8627 }
8628
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_small_kernel)8629 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
8630 TEST_REQUIRES_X86_XOP;
8631 for (uint32_t n = 5; n < 8; n++) {
8632 for (size_t k = 1; k <= 40; k += 9) {
8633 GemmMicrokernelTester()
8634 .mr(4)
8635 .nr(4)
8636 .kr(2)
8637 .sr(1)
8638 .m(4)
8639 .n(n)
8640 .k(k)
8641 .ks(3)
8642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8643 }
8644 }
8645 }
8646
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_small_kernel)8647 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
8648 TEST_REQUIRES_X86_XOP;
8649 for (uint32_t n = 8; n <= 12; n += 4) {
8650 for (size_t k = 1; k <= 40; k += 9) {
8651 GemmMicrokernelTester()
8652 .mr(4)
8653 .nr(4)
8654 .kr(2)
8655 .sr(1)
8656 .m(4)
8657 .n(n)
8658 .k(k)
8659 .ks(3)
8660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8661 }
8662 }
8663 }
8664
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)8665 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
8666 TEST_REQUIRES_X86_XOP;
8667 for (size_t k = 1; k <= 40; k += 9) {
8668 for (uint32_t n = 1; n <= 4; n++) {
8669 for (uint32_t m = 1; m <= 4; m++) {
8670 GemmMicrokernelTester()
8671 .mr(4)
8672 .nr(4)
8673 .kr(2)
8674 .sr(1)
8675 .m(m)
8676 .n(n)
8677 .k(k)
8678 .cm_stride(7)
8679 .iterations(1)
8680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8681 }
8682 }
8683 }
8684 }
8685
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,a_offset)8686 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
8687 TEST_REQUIRES_X86_XOP;
8688 for (size_t k = 1; k <= 40; k += 9) {
8689 GemmMicrokernelTester()
8690 .mr(4)
8691 .nr(4)
8692 .kr(2)
8693 .sr(1)
8694 .m(4)
8695 .n(4)
8696 .k(k)
8697 .ks(3)
8698 .a_offset(163)
8699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8700 }
8701 }
8702
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,zero)8703 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
8704 TEST_REQUIRES_X86_XOP;
8705 for (size_t k = 1; k <= 40; k += 9) {
8706 for (uint32_t mz = 0; mz < 4; mz++) {
8707 GemmMicrokernelTester()
8708 .mr(4)
8709 .nr(4)
8710 .kr(2)
8711 .sr(1)
8712 .m(4)
8713 .n(4)
8714 .k(k)
8715 .ks(3)
8716 .a_offset(163)
8717 .zero_index(mz)
8718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8719 }
8720 }
8721 }
8722
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)8723 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
8724 TEST_REQUIRES_X86_XOP;
8725 GemmMicrokernelTester()
8726 .mr(4)
8727 .nr(4)
8728 .kr(2)
8729 .sr(1)
8730 .m(4)
8731 .n(4)
8732 .k(8)
8733 .qmin(128)
8734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8735 }
8736
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)8737 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
8738 TEST_REQUIRES_X86_XOP;
8739 GemmMicrokernelTester()
8740 .mr(4)
8741 .nr(4)
8742 .kr(2)
8743 .sr(1)
8744 .m(4)
8745 .n(4)
8746 .k(8)
8747 .qmax(128)
8748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8749 }
8750
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)8751 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
8752 TEST_REQUIRES_X86_XOP;
8753 GemmMicrokernelTester()
8754 .mr(4)
8755 .nr(4)
8756 .kr(2)
8757 .sr(1)
8758 .m(4)
8759 .n(4)
8760 .k(8)
8761 .cm_stride(7)
8762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8763 }
8764
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_a_zero_point)8765 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
8766 TEST_REQUIRES_X86_XOP;
8767 for (size_t k = 1; k <= 40; k += 9) {
8768 GemmMicrokernelTester()
8769 .mr(4)
8770 .nr(4)
8771 .kr(2)
8772 .sr(1)
8773 .m(4)
8774 .n(4)
8775 .k(k)
8776 .a_zero_point(0)
8777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8778 }
8779 }
8780
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_b_zero_point)8781 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
8782 TEST_REQUIRES_X86_XOP;
8783 for (size_t k = 1; k <= 40; k += 9) {
8784 GemmMicrokernelTester()
8785 .mr(4)
8786 .nr(4)
8787 .kr(2)
8788 .sr(1)
8789 .m(4)
8790 .n(4)
8791 .k(k)
8792 .b_zero_point(0)
8793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8794 }
8795 }
8796
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_zero_point)8797 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
8798 TEST_REQUIRES_X86_XOP;
8799 for (size_t k = 1; k <= 40; k += 9) {
8800 GemmMicrokernelTester()
8801 .mr(4)
8802 .nr(4)
8803 .kr(2)
8804 .sr(1)
8805 .m(4)
8806 .n(4)
8807 .k(k)
8808 .a_zero_point(0)
8809 .b_zero_point(0)
8810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8811 }
8812 }
8813 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8814
8815
8816 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)8817 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
8818 TEST_REQUIRES_X86_SSE2;
8819 GemmMicrokernelTester()
8820 .mr(3)
8821 .nr(4)
8822 .kr(2)
8823 .sr(1)
8824 .m(3)
8825 .n(4)
8826 .k(8)
8827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8828 }
8829
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)8830 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
8831 TEST_REQUIRES_X86_SSE2;
8832 GemmMicrokernelTester()
8833 .mr(3)
8834 .nr(4)
8835 .kr(2)
8836 .sr(1)
8837 .m(3)
8838 .n(4)
8839 .k(8)
8840 .cn_stride(7)
8841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8842 }
8843
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)8844 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
8845 TEST_REQUIRES_X86_SSE2;
8846 for (uint32_t n = 1; n <= 4; n++) {
8847 for (uint32_t m = 1; m <= 3; m++) {
8848 GemmMicrokernelTester()
8849 .mr(3)
8850 .nr(4)
8851 .kr(2)
8852 .sr(1)
8853 .m(m)
8854 .n(n)
8855 .k(8)
8856 .iterations(1)
8857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8858 }
8859 }
8860 }
8861
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)8862 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
8863 TEST_REQUIRES_X86_SSE2;
8864 for (uint32_t m = 1; m <= 3; m++) {
8865 GemmMicrokernelTester()
8866 .mr(3)
8867 .nr(4)
8868 .kr(2)
8869 .sr(1)
8870 .m(m)
8871 .n(4)
8872 .k(8)
8873 .iterations(1)
8874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8875 }
8876 }
8877
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)8878 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
8879 TEST_REQUIRES_X86_SSE2;
8880 for (uint32_t n = 1; n <= 4; n++) {
8881 GemmMicrokernelTester()
8882 .mr(3)
8883 .nr(4)
8884 .kr(2)
8885 .sr(1)
8886 .m(3)
8887 .n(n)
8888 .k(8)
8889 .iterations(1)
8890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8891 }
8892 }
8893
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)8894 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
8895 TEST_REQUIRES_X86_SSE2;
8896 for (size_t k = 1; k < 8; k++) {
8897 GemmMicrokernelTester()
8898 .mr(3)
8899 .nr(4)
8900 .kr(2)
8901 .sr(1)
8902 .m(3)
8903 .n(4)
8904 .k(k)
8905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8906 }
8907 }
8908
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)8909 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
8910 TEST_REQUIRES_X86_SSE2;
8911 for (size_t k = 1; k < 8; k++) {
8912 for (uint32_t n = 1; n <= 4; n++) {
8913 for (uint32_t m = 1; m <= 3; m++) {
8914 GemmMicrokernelTester()
8915 .mr(3)
8916 .nr(4)
8917 .kr(2)
8918 .sr(1)
8919 .m(m)
8920 .n(n)
8921 .k(k)
8922 .iterations(1)
8923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8924 }
8925 }
8926 }
8927 }
8928
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)8929 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
8930 TEST_REQUIRES_X86_SSE2;
8931 for (size_t k = 9; k < 16; k++) {
8932 GemmMicrokernelTester()
8933 .mr(3)
8934 .nr(4)
8935 .kr(2)
8936 .sr(1)
8937 .m(3)
8938 .n(4)
8939 .k(k)
8940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8941 }
8942 }
8943
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)8944 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
8945 TEST_REQUIRES_X86_SSE2;
8946 for (size_t k = 9; k < 16; k++) {
8947 for (uint32_t n = 1; n <= 4; n++) {
8948 for (uint32_t m = 1; m <= 3; m++) {
8949 GemmMicrokernelTester()
8950 .mr(3)
8951 .nr(4)
8952 .kr(2)
8953 .sr(1)
8954 .m(m)
8955 .n(n)
8956 .k(k)
8957 .iterations(1)
8958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8959 }
8960 }
8961 }
8962 }
8963
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)8964 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
8965 TEST_REQUIRES_X86_SSE2;
8966 for (size_t k = 16; k <= 80; k += 8) {
8967 GemmMicrokernelTester()
8968 .mr(3)
8969 .nr(4)
8970 .kr(2)
8971 .sr(1)
8972 .m(3)
8973 .n(4)
8974 .k(k)
8975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8976 }
8977 }
8978
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)8979 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
8980 TEST_REQUIRES_X86_SSE2;
8981 for (size_t k = 16; k <= 80; k += 8) {
8982 for (uint32_t n = 1; n <= 4; n++) {
8983 for (uint32_t m = 1; m <= 3; m++) {
8984 GemmMicrokernelTester()
8985 .mr(3)
8986 .nr(4)
8987 .kr(2)
8988 .sr(1)
8989 .m(m)
8990 .n(n)
8991 .k(k)
8992 .iterations(1)
8993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8994 }
8995 }
8996 }
8997 }
8998
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)8999 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
9000 TEST_REQUIRES_X86_SSE2;
9001 for (uint32_t n = 5; n < 8; n++) {
9002 for (size_t k = 1; k <= 40; k += 9) {
9003 GemmMicrokernelTester()
9004 .mr(3)
9005 .nr(4)
9006 .kr(2)
9007 .sr(1)
9008 .m(3)
9009 .n(n)
9010 .k(k)
9011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9012 }
9013 }
9014 }
9015
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)9016 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
9017 TEST_REQUIRES_X86_SSE2;
9018 for (uint32_t n = 5; n < 8; n++) {
9019 for (size_t k = 1; k <= 40; k += 9) {
9020 GemmMicrokernelTester()
9021 .mr(3)
9022 .nr(4)
9023 .kr(2)
9024 .sr(1)
9025 .m(3)
9026 .n(n)
9027 .k(k)
9028 .cn_stride(7)
9029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9030 }
9031 }
9032 }
9033
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)9034 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
9035 TEST_REQUIRES_X86_SSE2;
9036 for (uint32_t n = 5; n < 8; n++) {
9037 for (size_t k = 1; k <= 40; k += 9) {
9038 for (uint32_t m = 1; m <= 3; m++) {
9039 GemmMicrokernelTester()
9040 .mr(3)
9041 .nr(4)
9042 .kr(2)
9043 .sr(1)
9044 .m(m)
9045 .n(n)
9046 .k(k)
9047 .iterations(1)
9048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9049 }
9050 }
9051 }
9052 }
9053
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)9054 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
9055 TEST_REQUIRES_X86_SSE2;
9056 for (uint32_t n = 8; n <= 12; n += 4) {
9057 for (size_t k = 1; k <= 40; k += 9) {
9058 GemmMicrokernelTester()
9059 .mr(3)
9060 .nr(4)
9061 .kr(2)
9062 .sr(1)
9063 .m(3)
9064 .n(n)
9065 .k(k)
9066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9067 }
9068 }
9069 }
9070
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)9071 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
9072 TEST_REQUIRES_X86_SSE2;
9073 for (uint32_t n = 8; n <= 12; n += 4) {
9074 for (size_t k = 1; k <= 40; k += 9) {
9075 GemmMicrokernelTester()
9076 .mr(3)
9077 .nr(4)
9078 .kr(2)
9079 .sr(1)
9080 .m(3)
9081 .n(n)
9082 .k(k)
9083 .cn_stride(7)
9084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9085 }
9086 }
9087 }
9088
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)9089 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
9090 TEST_REQUIRES_X86_SSE2;
9091 for (uint32_t n = 8; n <= 12; n += 4) {
9092 for (size_t k = 1; k <= 40; k += 9) {
9093 for (uint32_t m = 1; m <= 3; m++) {
9094 GemmMicrokernelTester()
9095 .mr(3)
9096 .nr(4)
9097 .kr(2)
9098 .sr(1)
9099 .m(m)
9100 .n(n)
9101 .k(k)
9102 .iterations(1)
9103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9104 }
9105 }
9106 }
9107 }
9108
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel)9109 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
9110 TEST_REQUIRES_X86_SSE2;
9111 for (size_t k = 1; k <= 40; k += 9) {
9112 GemmMicrokernelTester()
9113 .mr(3)
9114 .nr(4)
9115 .kr(2)
9116 .sr(1)
9117 .m(3)
9118 .n(4)
9119 .k(k)
9120 .ks(3)
9121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9122 }
9123 }
9124
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel_subtile)9125 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
9126 TEST_REQUIRES_X86_SSE2;
9127 for (size_t k = 1; k <= 40; k += 9) {
9128 for (uint32_t n = 1; n <= 4; n++) {
9129 for (uint32_t m = 1; m <= 3; m++) {
9130 GemmMicrokernelTester()
9131 .mr(3)
9132 .nr(4)
9133 .kr(2)
9134 .sr(1)
9135 .m(m)
9136 .n(n)
9137 .k(k)
9138 .ks(3)
9139 .iterations(1)
9140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9141 }
9142 }
9143 }
9144 }
9145
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_small_kernel)9146 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
9147 TEST_REQUIRES_X86_SSE2;
9148 for (uint32_t n = 5; n < 8; n++) {
9149 for (size_t k = 1; k <= 40; k += 9) {
9150 GemmMicrokernelTester()
9151 .mr(3)
9152 .nr(4)
9153 .kr(2)
9154 .sr(1)
9155 .m(3)
9156 .n(n)
9157 .k(k)
9158 .ks(3)
9159 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9160 }
9161 }
9162 }
9163
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_small_kernel)9164 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
9165 TEST_REQUIRES_X86_SSE2;
9166 for (uint32_t n = 8; n <= 12; n += 4) {
9167 for (size_t k = 1; k <= 40; k += 9) {
9168 GemmMicrokernelTester()
9169 .mr(3)
9170 .nr(4)
9171 .kr(2)
9172 .sr(1)
9173 .m(3)
9174 .n(n)
9175 .k(k)
9176 .ks(3)
9177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9178 }
9179 }
9180 }
9181
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)9182 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
9183 TEST_REQUIRES_X86_SSE2;
9184 for (size_t k = 1; k <= 40; k += 9) {
9185 for (uint32_t n = 1; n <= 4; n++) {
9186 for (uint32_t m = 1; m <= 3; m++) {
9187 GemmMicrokernelTester()
9188 .mr(3)
9189 .nr(4)
9190 .kr(2)
9191 .sr(1)
9192 .m(m)
9193 .n(n)
9194 .k(k)
9195 .cm_stride(7)
9196 .iterations(1)
9197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9198 }
9199 }
9200 }
9201 }
9202
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,a_offset)9203 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
9204 TEST_REQUIRES_X86_SSE2;
9205 for (size_t k = 1; k <= 40; k += 9) {
9206 GemmMicrokernelTester()
9207 .mr(3)
9208 .nr(4)
9209 .kr(2)
9210 .sr(1)
9211 .m(3)
9212 .n(4)
9213 .k(k)
9214 .ks(3)
9215 .a_offset(127)
9216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9217 }
9218 }
9219
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,zero)9220 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
9221 TEST_REQUIRES_X86_SSE2;
9222 for (size_t k = 1; k <= 40; k += 9) {
9223 for (uint32_t mz = 0; mz < 3; mz++) {
9224 GemmMicrokernelTester()
9225 .mr(3)
9226 .nr(4)
9227 .kr(2)
9228 .sr(1)
9229 .m(3)
9230 .n(4)
9231 .k(k)
9232 .ks(3)
9233 .a_offset(127)
9234 .zero_index(mz)
9235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9236 }
9237 }
9238 }
9239
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)9240 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
9241 TEST_REQUIRES_X86_SSE2;
9242 GemmMicrokernelTester()
9243 .mr(3)
9244 .nr(4)
9245 .kr(2)
9246 .sr(1)
9247 .m(3)
9248 .n(4)
9249 .k(8)
9250 .qmin(128)
9251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9252 }
9253
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)9254 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
9255 TEST_REQUIRES_X86_SSE2;
9256 GemmMicrokernelTester()
9257 .mr(3)
9258 .nr(4)
9259 .kr(2)
9260 .sr(1)
9261 .m(3)
9262 .n(4)
9263 .k(8)
9264 .qmax(128)
9265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9266 }
9267
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)9268 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
9269 TEST_REQUIRES_X86_SSE2;
9270 GemmMicrokernelTester()
9271 .mr(3)
9272 .nr(4)
9273 .kr(2)
9274 .sr(1)
9275 .m(3)
9276 .n(4)
9277 .k(8)
9278 .cm_stride(7)
9279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9280 }
9281
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_a_zero_point)9282 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_a_zero_point) {
9283 TEST_REQUIRES_X86_SSE2;
9284 for (size_t k = 1; k <= 40; k += 9) {
9285 GemmMicrokernelTester()
9286 .mr(3)
9287 .nr(4)
9288 .kr(2)
9289 .sr(1)
9290 .m(3)
9291 .n(4)
9292 .k(k)
9293 .a_zero_point(0)
9294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9295 }
9296 }
9297
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_b_zero_point)9298 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_b_zero_point) {
9299 TEST_REQUIRES_X86_SSE2;
9300 for (size_t k = 1; k <= 40; k += 9) {
9301 GemmMicrokernelTester()
9302 .mr(3)
9303 .nr(4)
9304 .kr(2)
9305 .sr(1)
9306 .m(3)
9307 .n(4)
9308 .k(k)
9309 .b_zero_point(0)
9310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9311 }
9312 }
9313
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_zero_point)9314 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_zero_point) {
9315 TEST_REQUIRES_X86_SSE2;
9316 for (size_t k = 1; k <= 40; k += 9) {
9317 GemmMicrokernelTester()
9318 .mr(3)
9319 .nr(4)
9320 .kr(2)
9321 .sr(1)
9322 .m(3)
9323 .n(4)
9324 .k(k)
9325 .a_zero_point(0)
9326 .b_zero_point(0)
9327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9328 }
9329 }
9330 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9331
9332
9333 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)9334 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
9335 TEST_REQUIRES_X86_SSE41;
9336 GemmMicrokernelTester()
9337 .mr(3)
9338 .nr(4)
9339 .kr(2)
9340 .sr(1)
9341 .m(3)
9342 .n(4)
9343 .k(8)
9344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9345 }
9346
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)9347 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
9348 TEST_REQUIRES_X86_SSE41;
9349 GemmMicrokernelTester()
9350 .mr(3)
9351 .nr(4)
9352 .kr(2)
9353 .sr(1)
9354 .m(3)
9355 .n(4)
9356 .k(8)
9357 .cn_stride(7)
9358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9359 }
9360
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)9361 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
9362 TEST_REQUIRES_X86_SSE41;
9363 for (uint32_t n = 1; n <= 4; n++) {
9364 for (uint32_t m = 1; m <= 3; m++) {
9365 GemmMicrokernelTester()
9366 .mr(3)
9367 .nr(4)
9368 .kr(2)
9369 .sr(1)
9370 .m(m)
9371 .n(n)
9372 .k(8)
9373 .iterations(1)
9374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9375 }
9376 }
9377 }
9378
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)9379 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
9380 TEST_REQUIRES_X86_SSE41;
9381 for (uint32_t m = 1; m <= 3; m++) {
9382 GemmMicrokernelTester()
9383 .mr(3)
9384 .nr(4)
9385 .kr(2)
9386 .sr(1)
9387 .m(m)
9388 .n(4)
9389 .k(8)
9390 .iterations(1)
9391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9392 }
9393 }
9394
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)9395 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
9396 TEST_REQUIRES_X86_SSE41;
9397 for (uint32_t n = 1; n <= 4; n++) {
9398 GemmMicrokernelTester()
9399 .mr(3)
9400 .nr(4)
9401 .kr(2)
9402 .sr(1)
9403 .m(3)
9404 .n(n)
9405 .k(8)
9406 .iterations(1)
9407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9408 }
9409 }
9410
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)9411 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
9412 TEST_REQUIRES_X86_SSE41;
9413 for (size_t k = 1; k < 8; k++) {
9414 GemmMicrokernelTester()
9415 .mr(3)
9416 .nr(4)
9417 .kr(2)
9418 .sr(1)
9419 .m(3)
9420 .n(4)
9421 .k(k)
9422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9423 }
9424 }
9425
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)9426 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
9427 TEST_REQUIRES_X86_SSE41;
9428 for (size_t k = 1; k < 8; k++) {
9429 for (uint32_t n = 1; n <= 4; n++) {
9430 for (uint32_t m = 1; m <= 3; m++) {
9431 GemmMicrokernelTester()
9432 .mr(3)
9433 .nr(4)
9434 .kr(2)
9435 .sr(1)
9436 .m(m)
9437 .n(n)
9438 .k(k)
9439 .iterations(1)
9440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9441 }
9442 }
9443 }
9444 }
9445
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)9446 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
9447 TEST_REQUIRES_X86_SSE41;
9448 for (size_t k = 9; k < 16; k++) {
9449 GemmMicrokernelTester()
9450 .mr(3)
9451 .nr(4)
9452 .kr(2)
9453 .sr(1)
9454 .m(3)
9455 .n(4)
9456 .k(k)
9457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9458 }
9459 }
9460
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)9461 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
9462 TEST_REQUIRES_X86_SSE41;
9463 for (size_t k = 9; k < 16; k++) {
9464 for (uint32_t n = 1; n <= 4; n++) {
9465 for (uint32_t m = 1; m <= 3; m++) {
9466 GemmMicrokernelTester()
9467 .mr(3)
9468 .nr(4)
9469 .kr(2)
9470 .sr(1)
9471 .m(m)
9472 .n(n)
9473 .k(k)
9474 .iterations(1)
9475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9476 }
9477 }
9478 }
9479 }
9480
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)9481 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
9482 TEST_REQUIRES_X86_SSE41;
9483 for (size_t k = 16; k <= 80; k += 8) {
9484 GemmMicrokernelTester()
9485 .mr(3)
9486 .nr(4)
9487 .kr(2)
9488 .sr(1)
9489 .m(3)
9490 .n(4)
9491 .k(k)
9492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9493 }
9494 }
9495
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)9496 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
9497 TEST_REQUIRES_X86_SSE41;
9498 for (size_t k = 16; k <= 80; k += 8) {
9499 for (uint32_t n = 1; n <= 4; n++) {
9500 for (uint32_t m = 1; m <= 3; m++) {
9501 GemmMicrokernelTester()
9502 .mr(3)
9503 .nr(4)
9504 .kr(2)
9505 .sr(1)
9506 .m(m)
9507 .n(n)
9508 .k(k)
9509 .iterations(1)
9510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9511 }
9512 }
9513 }
9514 }
9515
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)9516 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
9517 TEST_REQUIRES_X86_SSE41;
9518 for (uint32_t n = 5; n < 8; n++) {
9519 for (size_t k = 1; k <= 40; k += 9) {
9520 GemmMicrokernelTester()
9521 .mr(3)
9522 .nr(4)
9523 .kr(2)
9524 .sr(1)
9525 .m(3)
9526 .n(n)
9527 .k(k)
9528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9529 }
9530 }
9531 }
9532
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)9533 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9534 TEST_REQUIRES_X86_SSE41;
9535 for (uint32_t n = 5; n < 8; n++) {
9536 for (size_t k = 1; k <= 40; k += 9) {
9537 GemmMicrokernelTester()
9538 .mr(3)
9539 .nr(4)
9540 .kr(2)
9541 .sr(1)
9542 .m(3)
9543 .n(n)
9544 .k(k)
9545 .cn_stride(7)
9546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9547 }
9548 }
9549 }
9550
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)9551 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
9552 TEST_REQUIRES_X86_SSE41;
9553 for (uint32_t n = 5; n < 8; n++) {
9554 for (size_t k = 1; k <= 40; k += 9) {
9555 for (uint32_t m = 1; m <= 3; m++) {
9556 GemmMicrokernelTester()
9557 .mr(3)
9558 .nr(4)
9559 .kr(2)
9560 .sr(1)
9561 .m(m)
9562 .n(n)
9563 .k(k)
9564 .iterations(1)
9565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9566 }
9567 }
9568 }
9569 }
9570
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)9571 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
9572 TEST_REQUIRES_X86_SSE41;
9573 for (uint32_t n = 8; n <= 12; n += 4) {
9574 for (size_t k = 1; k <= 40; k += 9) {
9575 GemmMicrokernelTester()
9576 .mr(3)
9577 .nr(4)
9578 .kr(2)
9579 .sr(1)
9580 .m(3)
9581 .n(n)
9582 .k(k)
9583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9584 }
9585 }
9586 }
9587
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)9588 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
9589 TEST_REQUIRES_X86_SSE41;
9590 for (uint32_t n = 8; n <= 12; n += 4) {
9591 for (size_t k = 1; k <= 40; k += 9) {
9592 GemmMicrokernelTester()
9593 .mr(3)
9594 .nr(4)
9595 .kr(2)
9596 .sr(1)
9597 .m(3)
9598 .n(n)
9599 .k(k)
9600 .cn_stride(7)
9601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9602 }
9603 }
9604 }
9605
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)9606 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
9607 TEST_REQUIRES_X86_SSE41;
9608 for (uint32_t n = 8; n <= 12; n += 4) {
9609 for (size_t k = 1; k <= 40; k += 9) {
9610 for (uint32_t m = 1; m <= 3; m++) {
9611 GemmMicrokernelTester()
9612 .mr(3)
9613 .nr(4)
9614 .kr(2)
9615 .sr(1)
9616 .m(m)
9617 .n(n)
9618 .k(k)
9619 .iterations(1)
9620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9621 }
9622 }
9623 }
9624 }
9625
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel)9626 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
9627 TEST_REQUIRES_X86_SSE41;
9628 for (size_t k = 1; k <= 40; k += 9) {
9629 GemmMicrokernelTester()
9630 .mr(3)
9631 .nr(4)
9632 .kr(2)
9633 .sr(1)
9634 .m(3)
9635 .n(4)
9636 .k(k)
9637 .ks(3)
9638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9639 }
9640 }
9641
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel_subtile)9642 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
9643 TEST_REQUIRES_X86_SSE41;
9644 for (size_t k = 1; k <= 40; k += 9) {
9645 for (uint32_t n = 1; n <= 4; n++) {
9646 for (uint32_t m = 1; m <= 3; m++) {
9647 GemmMicrokernelTester()
9648 .mr(3)
9649 .nr(4)
9650 .kr(2)
9651 .sr(1)
9652 .m(m)
9653 .n(n)
9654 .k(k)
9655 .ks(3)
9656 .iterations(1)
9657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9658 }
9659 }
9660 }
9661 }
9662
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_small_kernel)9663 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
9664 TEST_REQUIRES_X86_SSE41;
9665 for (uint32_t n = 5; n < 8; n++) {
9666 for (size_t k = 1; k <= 40; k += 9) {
9667 GemmMicrokernelTester()
9668 .mr(3)
9669 .nr(4)
9670 .kr(2)
9671 .sr(1)
9672 .m(3)
9673 .n(n)
9674 .k(k)
9675 .ks(3)
9676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9677 }
9678 }
9679 }
9680
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_small_kernel)9681 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
9682 TEST_REQUIRES_X86_SSE41;
9683 for (uint32_t n = 8; n <= 12; n += 4) {
9684 for (size_t k = 1; k <= 40; k += 9) {
9685 GemmMicrokernelTester()
9686 .mr(3)
9687 .nr(4)
9688 .kr(2)
9689 .sr(1)
9690 .m(3)
9691 .n(n)
9692 .k(k)
9693 .ks(3)
9694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9695 }
9696 }
9697 }
9698
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)9699 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
9700 TEST_REQUIRES_X86_SSE41;
9701 for (size_t k = 1; k <= 40; k += 9) {
9702 for (uint32_t n = 1; n <= 4; n++) {
9703 for (uint32_t m = 1; m <= 3; m++) {
9704 GemmMicrokernelTester()
9705 .mr(3)
9706 .nr(4)
9707 .kr(2)
9708 .sr(1)
9709 .m(m)
9710 .n(n)
9711 .k(k)
9712 .cm_stride(7)
9713 .iterations(1)
9714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9715 }
9716 }
9717 }
9718 }
9719
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,a_offset)9720 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
9721 TEST_REQUIRES_X86_SSE41;
9722 for (size_t k = 1; k <= 40; k += 9) {
9723 GemmMicrokernelTester()
9724 .mr(3)
9725 .nr(4)
9726 .kr(2)
9727 .sr(1)
9728 .m(3)
9729 .n(4)
9730 .k(k)
9731 .ks(3)
9732 .a_offset(127)
9733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9734 }
9735 }
9736
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,zero)9737 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
9738 TEST_REQUIRES_X86_SSE41;
9739 for (size_t k = 1; k <= 40; k += 9) {
9740 for (uint32_t mz = 0; mz < 3; mz++) {
9741 GemmMicrokernelTester()
9742 .mr(3)
9743 .nr(4)
9744 .kr(2)
9745 .sr(1)
9746 .m(3)
9747 .n(4)
9748 .k(k)
9749 .ks(3)
9750 .a_offset(127)
9751 .zero_index(mz)
9752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9753 }
9754 }
9755 }
9756
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)9757 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
9758 TEST_REQUIRES_X86_SSE41;
9759 GemmMicrokernelTester()
9760 .mr(3)
9761 .nr(4)
9762 .kr(2)
9763 .sr(1)
9764 .m(3)
9765 .n(4)
9766 .k(8)
9767 .qmin(128)
9768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9769 }
9770
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)9771 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
9772 TEST_REQUIRES_X86_SSE41;
9773 GemmMicrokernelTester()
9774 .mr(3)
9775 .nr(4)
9776 .kr(2)
9777 .sr(1)
9778 .m(3)
9779 .n(4)
9780 .k(8)
9781 .qmax(128)
9782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9783 }
9784
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)9785 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
9786 TEST_REQUIRES_X86_SSE41;
9787 GemmMicrokernelTester()
9788 .mr(3)
9789 .nr(4)
9790 .kr(2)
9791 .sr(1)
9792 .m(3)
9793 .n(4)
9794 .k(8)
9795 .cm_stride(7)
9796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9797 }
9798
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_a_zero_point)9799 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
9800 TEST_REQUIRES_X86_SSE41;
9801 for (size_t k = 1; k <= 40; k += 9) {
9802 GemmMicrokernelTester()
9803 .mr(3)
9804 .nr(4)
9805 .kr(2)
9806 .sr(1)
9807 .m(3)
9808 .n(4)
9809 .k(k)
9810 .a_zero_point(0)
9811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9812 }
9813 }
9814
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_b_zero_point)9815 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
9816 TEST_REQUIRES_X86_SSE41;
9817 for (size_t k = 1; k <= 40; k += 9) {
9818 GemmMicrokernelTester()
9819 .mr(3)
9820 .nr(4)
9821 .kr(2)
9822 .sr(1)
9823 .m(3)
9824 .n(4)
9825 .k(k)
9826 .b_zero_point(0)
9827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9828 }
9829 }
9830
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_zero_point)9831 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
9832 TEST_REQUIRES_X86_SSE41;
9833 for (size_t k = 1; k <= 40; k += 9) {
9834 GemmMicrokernelTester()
9835 .mr(3)
9836 .nr(4)
9837 .kr(2)
9838 .sr(1)
9839 .m(3)
9840 .n(4)
9841 .k(k)
9842 .a_zero_point(0)
9843 .b_zero_point(0)
9844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9845 }
9846 }
9847 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9848
9849
9850 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)9851 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
9852 TEST_REQUIRES_X86_SSE41;
9853 GemmMicrokernelTester()
9854 .mr(4)
9855 .nr(4)
9856 .kr(2)
9857 .sr(1)
9858 .m(4)
9859 .n(4)
9860 .k(8)
9861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9862 }
9863
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)9864 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
9865 TEST_REQUIRES_X86_SSE41;
9866 GemmMicrokernelTester()
9867 .mr(4)
9868 .nr(4)
9869 .kr(2)
9870 .sr(1)
9871 .m(4)
9872 .n(4)
9873 .k(8)
9874 .cn_stride(7)
9875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9876 }
9877
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)9878 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
9879 TEST_REQUIRES_X86_SSE41;
9880 for (uint32_t n = 1; n <= 4; n++) {
9881 for (uint32_t m = 1; m <= 4; m++) {
9882 GemmMicrokernelTester()
9883 .mr(4)
9884 .nr(4)
9885 .kr(2)
9886 .sr(1)
9887 .m(m)
9888 .n(n)
9889 .k(8)
9890 .iterations(1)
9891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9892 }
9893 }
9894 }
9895
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)9896 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
9897 TEST_REQUIRES_X86_SSE41;
9898 for (uint32_t m = 1; m <= 4; m++) {
9899 GemmMicrokernelTester()
9900 .mr(4)
9901 .nr(4)
9902 .kr(2)
9903 .sr(1)
9904 .m(m)
9905 .n(4)
9906 .k(8)
9907 .iterations(1)
9908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9909 }
9910 }
9911
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)9912 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
9913 TEST_REQUIRES_X86_SSE41;
9914 for (uint32_t n = 1; n <= 4; n++) {
9915 GemmMicrokernelTester()
9916 .mr(4)
9917 .nr(4)
9918 .kr(2)
9919 .sr(1)
9920 .m(4)
9921 .n(n)
9922 .k(8)
9923 .iterations(1)
9924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9925 }
9926 }
9927
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)9928 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
9929 TEST_REQUIRES_X86_SSE41;
9930 for (size_t k = 1; k < 8; k++) {
9931 GemmMicrokernelTester()
9932 .mr(4)
9933 .nr(4)
9934 .kr(2)
9935 .sr(1)
9936 .m(4)
9937 .n(4)
9938 .k(k)
9939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9940 }
9941 }
9942
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)9943 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
9944 TEST_REQUIRES_X86_SSE41;
9945 for (size_t k = 1; k < 8; k++) {
9946 for (uint32_t n = 1; n <= 4; n++) {
9947 for (uint32_t m = 1; m <= 4; m++) {
9948 GemmMicrokernelTester()
9949 .mr(4)
9950 .nr(4)
9951 .kr(2)
9952 .sr(1)
9953 .m(m)
9954 .n(n)
9955 .k(k)
9956 .iterations(1)
9957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9958 }
9959 }
9960 }
9961 }
9962
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)9963 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
9964 TEST_REQUIRES_X86_SSE41;
9965 for (size_t k = 9; k < 16; k++) {
9966 GemmMicrokernelTester()
9967 .mr(4)
9968 .nr(4)
9969 .kr(2)
9970 .sr(1)
9971 .m(4)
9972 .n(4)
9973 .k(k)
9974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9975 }
9976 }
9977
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)9978 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
9979 TEST_REQUIRES_X86_SSE41;
9980 for (size_t k = 9; k < 16; k++) {
9981 for (uint32_t n = 1; n <= 4; n++) {
9982 for (uint32_t m = 1; m <= 4; m++) {
9983 GemmMicrokernelTester()
9984 .mr(4)
9985 .nr(4)
9986 .kr(2)
9987 .sr(1)
9988 .m(m)
9989 .n(n)
9990 .k(k)
9991 .iterations(1)
9992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9993 }
9994 }
9995 }
9996 }
9997
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)9998 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
9999 TEST_REQUIRES_X86_SSE41;
10000 for (size_t k = 16; k <= 80; k += 8) {
10001 GemmMicrokernelTester()
10002 .mr(4)
10003 .nr(4)
10004 .kr(2)
10005 .sr(1)
10006 .m(4)
10007 .n(4)
10008 .k(k)
10009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10010 }
10011 }
10012
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)10013 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
10014 TEST_REQUIRES_X86_SSE41;
10015 for (size_t k = 16; k <= 80; k += 8) {
10016 for (uint32_t n = 1; n <= 4; n++) {
10017 for (uint32_t m = 1; m <= 4; m++) {
10018 GemmMicrokernelTester()
10019 .mr(4)
10020 .nr(4)
10021 .kr(2)
10022 .sr(1)
10023 .m(m)
10024 .n(n)
10025 .k(k)
10026 .iterations(1)
10027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10028 }
10029 }
10030 }
10031 }
10032
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)10033 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
10034 TEST_REQUIRES_X86_SSE41;
10035 for (uint32_t n = 5; n < 8; n++) {
10036 for (size_t k = 1; k <= 40; k += 9) {
10037 GemmMicrokernelTester()
10038 .mr(4)
10039 .nr(4)
10040 .kr(2)
10041 .sr(1)
10042 .m(4)
10043 .n(n)
10044 .k(k)
10045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10046 }
10047 }
10048 }
10049
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)10050 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
10051 TEST_REQUIRES_X86_SSE41;
10052 for (uint32_t n = 5; n < 8; n++) {
10053 for (size_t k = 1; k <= 40; k += 9) {
10054 GemmMicrokernelTester()
10055 .mr(4)
10056 .nr(4)
10057 .kr(2)
10058 .sr(1)
10059 .m(4)
10060 .n(n)
10061 .k(k)
10062 .cn_stride(7)
10063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10064 }
10065 }
10066 }
10067
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)10068 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
10069 TEST_REQUIRES_X86_SSE41;
10070 for (uint32_t n = 5; n < 8; n++) {
10071 for (size_t k = 1; k <= 40; k += 9) {
10072 for (uint32_t m = 1; m <= 4; m++) {
10073 GemmMicrokernelTester()
10074 .mr(4)
10075 .nr(4)
10076 .kr(2)
10077 .sr(1)
10078 .m(m)
10079 .n(n)
10080 .k(k)
10081 .iterations(1)
10082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10083 }
10084 }
10085 }
10086 }
10087
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)10088 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
10089 TEST_REQUIRES_X86_SSE41;
10090 for (uint32_t n = 8; n <= 12; n += 4) {
10091 for (size_t k = 1; k <= 40; k += 9) {
10092 GemmMicrokernelTester()
10093 .mr(4)
10094 .nr(4)
10095 .kr(2)
10096 .sr(1)
10097 .m(4)
10098 .n(n)
10099 .k(k)
10100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10101 }
10102 }
10103 }
10104
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)10105 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
10106 TEST_REQUIRES_X86_SSE41;
10107 for (uint32_t n = 8; n <= 12; n += 4) {
10108 for (size_t k = 1; k <= 40; k += 9) {
10109 GemmMicrokernelTester()
10110 .mr(4)
10111 .nr(4)
10112 .kr(2)
10113 .sr(1)
10114 .m(4)
10115 .n(n)
10116 .k(k)
10117 .cn_stride(7)
10118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10119 }
10120 }
10121 }
10122
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)10123 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
10124 TEST_REQUIRES_X86_SSE41;
10125 for (uint32_t n = 8; n <= 12; n += 4) {
10126 for (size_t k = 1; k <= 40; k += 9) {
10127 for (uint32_t m = 1; m <= 4; m++) {
10128 GemmMicrokernelTester()
10129 .mr(4)
10130 .nr(4)
10131 .kr(2)
10132 .sr(1)
10133 .m(m)
10134 .n(n)
10135 .k(k)
10136 .iterations(1)
10137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10138 }
10139 }
10140 }
10141 }
10142
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel)10143 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
10144 TEST_REQUIRES_X86_SSE41;
10145 for (size_t k = 1; k <= 40; k += 9) {
10146 GemmMicrokernelTester()
10147 .mr(4)
10148 .nr(4)
10149 .kr(2)
10150 .sr(1)
10151 .m(4)
10152 .n(4)
10153 .k(k)
10154 .ks(3)
10155 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10156 }
10157 }
10158
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel_subtile)10159 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
10160 TEST_REQUIRES_X86_SSE41;
10161 for (size_t k = 1; k <= 40; k += 9) {
10162 for (uint32_t n = 1; n <= 4; n++) {
10163 for (uint32_t m = 1; m <= 4; m++) {
10164 GemmMicrokernelTester()
10165 .mr(4)
10166 .nr(4)
10167 .kr(2)
10168 .sr(1)
10169 .m(m)
10170 .n(n)
10171 .k(k)
10172 .ks(3)
10173 .iterations(1)
10174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10175 }
10176 }
10177 }
10178 }
10179
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_small_kernel)10180 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
10181 TEST_REQUIRES_X86_SSE41;
10182 for (uint32_t n = 5; n < 8; n++) {
10183 for (size_t k = 1; k <= 40; k += 9) {
10184 GemmMicrokernelTester()
10185 .mr(4)
10186 .nr(4)
10187 .kr(2)
10188 .sr(1)
10189 .m(4)
10190 .n(n)
10191 .k(k)
10192 .ks(3)
10193 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10194 }
10195 }
10196 }
10197
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_small_kernel)10198 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
10199 TEST_REQUIRES_X86_SSE41;
10200 for (uint32_t n = 8; n <= 12; n += 4) {
10201 for (size_t k = 1; k <= 40; k += 9) {
10202 GemmMicrokernelTester()
10203 .mr(4)
10204 .nr(4)
10205 .kr(2)
10206 .sr(1)
10207 .m(4)
10208 .n(n)
10209 .k(k)
10210 .ks(3)
10211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10212 }
10213 }
10214 }
10215
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)10216 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
10217 TEST_REQUIRES_X86_SSE41;
10218 for (size_t k = 1; k <= 40; k += 9) {
10219 for (uint32_t n = 1; n <= 4; n++) {
10220 for (uint32_t m = 1; m <= 4; m++) {
10221 GemmMicrokernelTester()
10222 .mr(4)
10223 .nr(4)
10224 .kr(2)
10225 .sr(1)
10226 .m(m)
10227 .n(n)
10228 .k(k)
10229 .cm_stride(7)
10230 .iterations(1)
10231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10232 }
10233 }
10234 }
10235 }
10236
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,a_offset)10237 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
10238 TEST_REQUIRES_X86_SSE41;
10239 for (size_t k = 1; k <= 40; k += 9) {
10240 GemmMicrokernelTester()
10241 .mr(4)
10242 .nr(4)
10243 .kr(2)
10244 .sr(1)
10245 .m(4)
10246 .n(4)
10247 .k(k)
10248 .ks(3)
10249 .a_offset(163)
10250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10251 }
10252 }
10253
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,zero)10254 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
10255 TEST_REQUIRES_X86_SSE41;
10256 for (size_t k = 1; k <= 40; k += 9) {
10257 for (uint32_t mz = 0; mz < 4; mz++) {
10258 GemmMicrokernelTester()
10259 .mr(4)
10260 .nr(4)
10261 .kr(2)
10262 .sr(1)
10263 .m(4)
10264 .n(4)
10265 .k(k)
10266 .ks(3)
10267 .a_offset(163)
10268 .zero_index(mz)
10269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10270 }
10271 }
10272 }
10273
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)10274 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
10275 TEST_REQUIRES_X86_SSE41;
10276 GemmMicrokernelTester()
10277 .mr(4)
10278 .nr(4)
10279 .kr(2)
10280 .sr(1)
10281 .m(4)
10282 .n(4)
10283 .k(8)
10284 .qmin(128)
10285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10286 }
10287
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)10288 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
10289 TEST_REQUIRES_X86_SSE41;
10290 GemmMicrokernelTester()
10291 .mr(4)
10292 .nr(4)
10293 .kr(2)
10294 .sr(1)
10295 .m(4)
10296 .n(4)
10297 .k(8)
10298 .qmax(128)
10299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10300 }
10301
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)10302 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
10303 TEST_REQUIRES_X86_SSE41;
10304 GemmMicrokernelTester()
10305 .mr(4)
10306 .nr(4)
10307 .kr(2)
10308 .sr(1)
10309 .m(4)
10310 .n(4)
10311 .k(8)
10312 .cm_stride(7)
10313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10314 }
10315
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_a_zero_point)10316 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
10317 TEST_REQUIRES_X86_SSE41;
10318 for (size_t k = 1; k <= 40; k += 9) {
10319 GemmMicrokernelTester()
10320 .mr(4)
10321 .nr(4)
10322 .kr(2)
10323 .sr(1)
10324 .m(4)
10325 .n(4)
10326 .k(k)
10327 .a_zero_point(0)
10328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10329 }
10330 }
10331
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_b_zero_point)10332 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
10333 TEST_REQUIRES_X86_SSE41;
10334 for (size_t k = 1; k <= 40; k += 9) {
10335 GemmMicrokernelTester()
10336 .mr(4)
10337 .nr(4)
10338 .kr(2)
10339 .sr(1)
10340 .m(4)
10341 .n(4)
10342 .k(k)
10343 .b_zero_point(0)
10344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10345 }
10346 }
10347
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_zero_point)10348 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
10349 TEST_REQUIRES_X86_SSE41;
10350 for (size_t k = 1; k <= 40; k += 9) {
10351 GemmMicrokernelTester()
10352 .mr(4)
10353 .nr(4)
10354 .kr(2)
10355 .sr(1)
10356 .m(4)
10357 .n(4)
10358 .k(k)
10359 .a_zero_point(0)
10360 .b_zero_point(0)
10361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10362 }
10363 }
10364 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10365
10366
10367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)10368 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
10369 TEST_REQUIRES_X86_XOP;
10370 GemmMicrokernelTester()
10371 .mr(1)
10372 .nr(4)
10373 .kr(2)
10374 .sr(1)
10375 .m(1)
10376 .n(4)
10377 .k(8)
10378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10379 }
10380
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)10381 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
10382 TEST_REQUIRES_X86_XOP;
10383 GemmMicrokernelTester()
10384 .mr(1)
10385 .nr(4)
10386 .kr(2)
10387 .sr(1)
10388 .m(1)
10389 .n(4)
10390 .k(8)
10391 .cn_stride(7)
10392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10393 }
10394
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)10395 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
10396 TEST_REQUIRES_X86_XOP;
10397 for (uint32_t n = 1; n <= 4; n++) {
10398 for (uint32_t m = 1; m <= 1; m++) {
10399 GemmMicrokernelTester()
10400 .mr(1)
10401 .nr(4)
10402 .kr(2)
10403 .sr(1)
10404 .m(m)
10405 .n(n)
10406 .k(8)
10407 .iterations(1)
10408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10409 }
10410 }
10411 }
10412
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)10413 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
10414 TEST_REQUIRES_X86_XOP;
10415 for (uint32_t m = 1; m <= 1; m++) {
10416 GemmMicrokernelTester()
10417 .mr(1)
10418 .nr(4)
10419 .kr(2)
10420 .sr(1)
10421 .m(m)
10422 .n(4)
10423 .k(8)
10424 .iterations(1)
10425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10426 }
10427 }
10428
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)10429 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
10430 TEST_REQUIRES_X86_XOP;
10431 for (uint32_t n = 1; n <= 4; n++) {
10432 GemmMicrokernelTester()
10433 .mr(1)
10434 .nr(4)
10435 .kr(2)
10436 .sr(1)
10437 .m(1)
10438 .n(n)
10439 .k(8)
10440 .iterations(1)
10441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10442 }
10443 }
10444
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)10445 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
10446 TEST_REQUIRES_X86_XOP;
10447 for (size_t k = 1; k < 8; k++) {
10448 GemmMicrokernelTester()
10449 .mr(1)
10450 .nr(4)
10451 .kr(2)
10452 .sr(1)
10453 .m(1)
10454 .n(4)
10455 .k(k)
10456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10457 }
10458 }
10459
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)10460 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
10461 TEST_REQUIRES_X86_XOP;
10462 for (size_t k = 1; k < 8; k++) {
10463 for (uint32_t n = 1; n <= 4; n++) {
10464 for (uint32_t m = 1; m <= 1; m++) {
10465 GemmMicrokernelTester()
10466 .mr(1)
10467 .nr(4)
10468 .kr(2)
10469 .sr(1)
10470 .m(m)
10471 .n(n)
10472 .k(k)
10473 .iterations(1)
10474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10475 }
10476 }
10477 }
10478 }
10479
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)10480 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
10481 TEST_REQUIRES_X86_XOP;
10482 for (size_t k = 9; k < 16; k++) {
10483 GemmMicrokernelTester()
10484 .mr(1)
10485 .nr(4)
10486 .kr(2)
10487 .sr(1)
10488 .m(1)
10489 .n(4)
10490 .k(k)
10491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10492 }
10493 }
10494
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)10495 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
10496 TEST_REQUIRES_X86_XOP;
10497 for (size_t k = 9; k < 16; k++) {
10498 for (uint32_t n = 1; n <= 4; n++) {
10499 for (uint32_t m = 1; m <= 1; m++) {
10500 GemmMicrokernelTester()
10501 .mr(1)
10502 .nr(4)
10503 .kr(2)
10504 .sr(1)
10505 .m(m)
10506 .n(n)
10507 .k(k)
10508 .iterations(1)
10509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10510 }
10511 }
10512 }
10513 }
10514
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)10515 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
10516 TEST_REQUIRES_X86_XOP;
10517 for (size_t k = 16; k <= 80; k += 8) {
10518 GemmMicrokernelTester()
10519 .mr(1)
10520 .nr(4)
10521 .kr(2)
10522 .sr(1)
10523 .m(1)
10524 .n(4)
10525 .k(k)
10526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10527 }
10528 }
10529
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)10530 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
10531 TEST_REQUIRES_X86_XOP;
10532 for (size_t k = 16; k <= 80; k += 8) {
10533 for (uint32_t n = 1; n <= 4; n++) {
10534 for (uint32_t m = 1; m <= 1; m++) {
10535 GemmMicrokernelTester()
10536 .mr(1)
10537 .nr(4)
10538 .kr(2)
10539 .sr(1)
10540 .m(m)
10541 .n(n)
10542 .k(k)
10543 .iterations(1)
10544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10545 }
10546 }
10547 }
10548 }
10549
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)10550 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
10551 TEST_REQUIRES_X86_XOP;
10552 for (uint32_t n = 5; n < 8; n++) {
10553 for (size_t k = 1; k <= 40; k += 9) {
10554 GemmMicrokernelTester()
10555 .mr(1)
10556 .nr(4)
10557 .kr(2)
10558 .sr(1)
10559 .m(1)
10560 .n(n)
10561 .k(k)
10562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10563 }
10564 }
10565 }
10566
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)10567 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
10568 TEST_REQUIRES_X86_XOP;
10569 for (uint32_t n = 5; n < 8; n++) {
10570 for (size_t k = 1; k <= 40; k += 9) {
10571 GemmMicrokernelTester()
10572 .mr(1)
10573 .nr(4)
10574 .kr(2)
10575 .sr(1)
10576 .m(1)
10577 .n(n)
10578 .k(k)
10579 .cn_stride(7)
10580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10581 }
10582 }
10583 }
10584
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)10585 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
10586 TEST_REQUIRES_X86_XOP;
10587 for (uint32_t n = 5; n < 8; n++) {
10588 for (size_t k = 1; k <= 40; k += 9) {
10589 for (uint32_t m = 1; m <= 1; m++) {
10590 GemmMicrokernelTester()
10591 .mr(1)
10592 .nr(4)
10593 .kr(2)
10594 .sr(1)
10595 .m(m)
10596 .n(n)
10597 .k(k)
10598 .iterations(1)
10599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10600 }
10601 }
10602 }
10603 }
10604
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)10605 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
10606 TEST_REQUIRES_X86_XOP;
10607 for (uint32_t n = 8; n <= 12; n += 4) {
10608 for (size_t k = 1; k <= 40; k += 9) {
10609 GemmMicrokernelTester()
10610 .mr(1)
10611 .nr(4)
10612 .kr(2)
10613 .sr(1)
10614 .m(1)
10615 .n(n)
10616 .k(k)
10617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10618 }
10619 }
10620 }
10621
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)10622 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
10623 TEST_REQUIRES_X86_XOP;
10624 for (uint32_t n = 8; n <= 12; n += 4) {
10625 for (size_t k = 1; k <= 40; k += 9) {
10626 GemmMicrokernelTester()
10627 .mr(1)
10628 .nr(4)
10629 .kr(2)
10630 .sr(1)
10631 .m(1)
10632 .n(n)
10633 .k(k)
10634 .cn_stride(7)
10635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10636 }
10637 }
10638 }
10639
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)10640 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
10641 TEST_REQUIRES_X86_XOP;
10642 for (uint32_t n = 8; n <= 12; n += 4) {
10643 for (size_t k = 1; k <= 40; k += 9) {
10644 for (uint32_t m = 1; m <= 1; m++) {
10645 GemmMicrokernelTester()
10646 .mr(1)
10647 .nr(4)
10648 .kr(2)
10649 .sr(1)
10650 .m(m)
10651 .n(n)
10652 .k(k)
10653 .iterations(1)
10654 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10655 }
10656 }
10657 }
10658 }
10659
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel)10660 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
10661 TEST_REQUIRES_X86_XOP;
10662 for (size_t k = 1; k <= 40; k += 9) {
10663 GemmMicrokernelTester()
10664 .mr(1)
10665 .nr(4)
10666 .kr(2)
10667 .sr(1)
10668 .m(1)
10669 .n(4)
10670 .k(k)
10671 .ks(3)
10672 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10673 }
10674 }
10675
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel_subtile)10676 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
10677 TEST_REQUIRES_X86_XOP;
10678 for (size_t k = 1; k <= 40; k += 9) {
10679 for (uint32_t n = 1; n <= 4; n++) {
10680 for (uint32_t m = 1; m <= 1; m++) {
10681 GemmMicrokernelTester()
10682 .mr(1)
10683 .nr(4)
10684 .kr(2)
10685 .sr(1)
10686 .m(m)
10687 .n(n)
10688 .k(k)
10689 .ks(3)
10690 .iterations(1)
10691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10692 }
10693 }
10694 }
10695 }
10696
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_small_kernel)10697 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
10698 TEST_REQUIRES_X86_XOP;
10699 for (uint32_t n = 5; n < 8; n++) {
10700 for (size_t k = 1; k <= 40; k += 9) {
10701 GemmMicrokernelTester()
10702 .mr(1)
10703 .nr(4)
10704 .kr(2)
10705 .sr(1)
10706 .m(1)
10707 .n(n)
10708 .k(k)
10709 .ks(3)
10710 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10711 }
10712 }
10713 }
10714
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_small_kernel)10715 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
10716 TEST_REQUIRES_X86_XOP;
10717 for (uint32_t n = 8; n <= 12; n += 4) {
10718 for (size_t k = 1; k <= 40; k += 9) {
10719 GemmMicrokernelTester()
10720 .mr(1)
10721 .nr(4)
10722 .kr(2)
10723 .sr(1)
10724 .m(1)
10725 .n(n)
10726 .k(k)
10727 .ks(3)
10728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10729 }
10730 }
10731 }
10732
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)10733 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
10734 TEST_REQUIRES_X86_XOP;
10735 for (size_t k = 1; k <= 40; k += 9) {
10736 for (uint32_t n = 1; n <= 4; n++) {
10737 for (uint32_t m = 1; m <= 1; m++) {
10738 GemmMicrokernelTester()
10739 .mr(1)
10740 .nr(4)
10741 .kr(2)
10742 .sr(1)
10743 .m(m)
10744 .n(n)
10745 .k(k)
10746 .cm_stride(7)
10747 .iterations(1)
10748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10749 }
10750 }
10751 }
10752 }
10753
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,a_offset)10754 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
10755 TEST_REQUIRES_X86_XOP;
10756 for (size_t k = 1; k <= 40; k += 9) {
10757 GemmMicrokernelTester()
10758 .mr(1)
10759 .nr(4)
10760 .kr(2)
10761 .sr(1)
10762 .m(1)
10763 .n(4)
10764 .k(k)
10765 .ks(3)
10766 .a_offset(43)
10767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10768 }
10769 }
10770
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,zero)10771 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
10772 TEST_REQUIRES_X86_XOP;
10773 for (size_t k = 1; k <= 40; k += 9) {
10774 for (uint32_t mz = 0; mz < 1; mz++) {
10775 GemmMicrokernelTester()
10776 .mr(1)
10777 .nr(4)
10778 .kr(2)
10779 .sr(1)
10780 .m(1)
10781 .n(4)
10782 .k(k)
10783 .ks(3)
10784 .a_offset(43)
10785 .zero_index(mz)
10786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10787 }
10788 }
10789 }
10790
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)10791 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
10792 TEST_REQUIRES_X86_XOP;
10793 GemmMicrokernelTester()
10794 .mr(1)
10795 .nr(4)
10796 .kr(2)
10797 .sr(1)
10798 .m(1)
10799 .n(4)
10800 .k(8)
10801 .qmin(128)
10802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10803 }
10804
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)10805 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
10806 TEST_REQUIRES_X86_XOP;
10807 GemmMicrokernelTester()
10808 .mr(1)
10809 .nr(4)
10810 .kr(2)
10811 .sr(1)
10812 .m(1)
10813 .n(4)
10814 .k(8)
10815 .qmax(128)
10816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10817 }
10818
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)10819 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
10820 TEST_REQUIRES_X86_XOP;
10821 GemmMicrokernelTester()
10822 .mr(1)
10823 .nr(4)
10824 .kr(2)
10825 .sr(1)
10826 .m(1)
10827 .n(4)
10828 .k(8)
10829 .cm_stride(7)
10830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10831 }
10832
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_a_zero_point)10833 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
10834 TEST_REQUIRES_X86_XOP;
10835 for (size_t k = 1; k <= 40; k += 9) {
10836 GemmMicrokernelTester()
10837 .mr(1)
10838 .nr(4)
10839 .kr(2)
10840 .sr(1)
10841 .m(1)
10842 .n(4)
10843 .k(k)
10844 .a_zero_point(0)
10845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10846 }
10847 }
10848
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_b_zero_point)10849 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
10850 TEST_REQUIRES_X86_XOP;
10851 for (size_t k = 1; k <= 40; k += 9) {
10852 GemmMicrokernelTester()
10853 .mr(1)
10854 .nr(4)
10855 .kr(2)
10856 .sr(1)
10857 .m(1)
10858 .n(4)
10859 .k(k)
10860 .b_zero_point(0)
10861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10862 }
10863 }
10864
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_zero_point)10865 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
10866 TEST_REQUIRES_X86_XOP;
10867 for (size_t k = 1; k <= 40; k += 9) {
10868 GemmMicrokernelTester()
10869 .mr(1)
10870 .nr(4)
10871 .kr(2)
10872 .sr(1)
10873 .m(1)
10874 .n(4)
10875 .k(k)
10876 .a_zero_point(0)
10877 .b_zero_point(0)
10878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10879 }
10880 }
10881 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10882
10883
10884 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)10885 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
10886 TEST_REQUIRES_X86_XOP;
10887 GemmMicrokernelTester()
10888 .mr(2)
10889 .nr(4)
10890 .kr(2)
10891 .sr(1)
10892 .m(2)
10893 .n(4)
10894 .k(8)
10895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10896 }
10897
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)10898 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
10899 TEST_REQUIRES_X86_XOP;
10900 GemmMicrokernelTester()
10901 .mr(2)
10902 .nr(4)
10903 .kr(2)
10904 .sr(1)
10905 .m(2)
10906 .n(4)
10907 .k(8)
10908 .cn_stride(7)
10909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10910 }
10911
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)10912 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
10913 TEST_REQUIRES_X86_XOP;
10914 for (uint32_t n = 1; n <= 4; n++) {
10915 for (uint32_t m = 1; m <= 2; m++) {
10916 GemmMicrokernelTester()
10917 .mr(2)
10918 .nr(4)
10919 .kr(2)
10920 .sr(1)
10921 .m(m)
10922 .n(n)
10923 .k(8)
10924 .iterations(1)
10925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10926 }
10927 }
10928 }
10929
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)10930 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
10931 TEST_REQUIRES_X86_XOP;
10932 for (uint32_t m = 1; m <= 2; m++) {
10933 GemmMicrokernelTester()
10934 .mr(2)
10935 .nr(4)
10936 .kr(2)
10937 .sr(1)
10938 .m(m)
10939 .n(4)
10940 .k(8)
10941 .iterations(1)
10942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10943 }
10944 }
10945
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)10946 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
10947 TEST_REQUIRES_X86_XOP;
10948 for (uint32_t n = 1; n <= 4; n++) {
10949 GemmMicrokernelTester()
10950 .mr(2)
10951 .nr(4)
10952 .kr(2)
10953 .sr(1)
10954 .m(2)
10955 .n(n)
10956 .k(8)
10957 .iterations(1)
10958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10959 }
10960 }
10961
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)10962 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
10963 TEST_REQUIRES_X86_XOP;
10964 for (size_t k = 1; k < 8; k++) {
10965 GemmMicrokernelTester()
10966 .mr(2)
10967 .nr(4)
10968 .kr(2)
10969 .sr(1)
10970 .m(2)
10971 .n(4)
10972 .k(k)
10973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10974 }
10975 }
10976
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)10977 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
10978 TEST_REQUIRES_X86_XOP;
10979 for (size_t k = 1; k < 8; k++) {
10980 for (uint32_t n = 1; n <= 4; n++) {
10981 for (uint32_t m = 1; m <= 2; m++) {
10982 GemmMicrokernelTester()
10983 .mr(2)
10984 .nr(4)
10985 .kr(2)
10986 .sr(1)
10987 .m(m)
10988 .n(n)
10989 .k(k)
10990 .iterations(1)
10991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10992 }
10993 }
10994 }
10995 }
10996
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)10997 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
10998 TEST_REQUIRES_X86_XOP;
10999 for (size_t k = 9; k < 16; k++) {
11000 GemmMicrokernelTester()
11001 .mr(2)
11002 .nr(4)
11003 .kr(2)
11004 .sr(1)
11005 .m(2)
11006 .n(4)
11007 .k(k)
11008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11009 }
11010 }
11011
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)11012 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
11013 TEST_REQUIRES_X86_XOP;
11014 for (size_t k = 9; k < 16; k++) {
11015 for (uint32_t n = 1; n <= 4; n++) {
11016 for (uint32_t m = 1; m <= 2; m++) {
11017 GemmMicrokernelTester()
11018 .mr(2)
11019 .nr(4)
11020 .kr(2)
11021 .sr(1)
11022 .m(m)
11023 .n(n)
11024 .k(k)
11025 .iterations(1)
11026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11027 }
11028 }
11029 }
11030 }
11031
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)11032 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
11033 TEST_REQUIRES_X86_XOP;
11034 for (size_t k = 16; k <= 80; k += 8) {
11035 GemmMicrokernelTester()
11036 .mr(2)
11037 .nr(4)
11038 .kr(2)
11039 .sr(1)
11040 .m(2)
11041 .n(4)
11042 .k(k)
11043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11044 }
11045 }
11046
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)11047 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
11048 TEST_REQUIRES_X86_XOP;
11049 for (size_t k = 16; k <= 80; k += 8) {
11050 for (uint32_t n = 1; n <= 4; n++) {
11051 for (uint32_t m = 1; m <= 2; m++) {
11052 GemmMicrokernelTester()
11053 .mr(2)
11054 .nr(4)
11055 .kr(2)
11056 .sr(1)
11057 .m(m)
11058 .n(n)
11059 .k(k)
11060 .iterations(1)
11061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11062 }
11063 }
11064 }
11065 }
11066
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)11067 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
11068 TEST_REQUIRES_X86_XOP;
11069 for (uint32_t n = 5; n < 8; n++) {
11070 for (size_t k = 1; k <= 40; k += 9) {
11071 GemmMicrokernelTester()
11072 .mr(2)
11073 .nr(4)
11074 .kr(2)
11075 .sr(1)
11076 .m(2)
11077 .n(n)
11078 .k(k)
11079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11080 }
11081 }
11082 }
11083
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)11084 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
11085 TEST_REQUIRES_X86_XOP;
11086 for (uint32_t n = 5; n < 8; n++) {
11087 for (size_t k = 1; k <= 40; k += 9) {
11088 GemmMicrokernelTester()
11089 .mr(2)
11090 .nr(4)
11091 .kr(2)
11092 .sr(1)
11093 .m(2)
11094 .n(n)
11095 .k(k)
11096 .cn_stride(7)
11097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11098 }
11099 }
11100 }
11101
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)11102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
11103 TEST_REQUIRES_X86_XOP;
11104 for (uint32_t n = 5; n < 8; n++) {
11105 for (size_t k = 1; k <= 40; k += 9) {
11106 for (uint32_t m = 1; m <= 2; m++) {
11107 GemmMicrokernelTester()
11108 .mr(2)
11109 .nr(4)
11110 .kr(2)
11111 .sr(1)
11112 .m(m)
11113 .n(n)
11114 .k(k)
11115 .iterations(1)
11116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11117 }
11118 }
11119 }
11120 }
11121
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)11122 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
11123 TEST_REQUIRES_X86_XOP;
11124 for (uint32_t n = 8; n <= 12; n += 4) {
11125 for (size_t k = 1; k <= 40; k += 9) {
11126 GemmMicrokernelTester()
11127 .mr(2)
11128 .nr(4)
11129 .kr(2)
11130 .sr(1)
11131 .m(2)
11132 .n(n)
11133 .k(k)
11134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)11139 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
11140 TEST_REQUIRES_X86_XOP;
11141 for (uint32_t n = 8; n <= 12; n += 4) {
11142 for (size_t k = 1; k <= 40; k += 9) {
11143 GemmMicrokernelTester()
11144 .mr(2)
11145 .nr(4)
11146 .kr(2)
11147 .sr(1)
11148 .m(2)
11149 .n(n)
11150 .k(k)
11151 .cn_stride(7)
11152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)11157 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
11158 TEST_REQUIRES_X86_XOP;
11159 for (uint32_t n = 8; n <= 12; n += 4) {
11160 for (size_t k = 1; k <= 40; k += 9) {
11161 for (uint32_t m = 1; m <= 2; m++) {
11162 GemmMicrokernelTester()
11163 .mr(2)
11164 .nr(4)
11165 .kr(2)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .iterations(1)
11171 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11172 }
11173 }
11174 }
11175 }
11176
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel)11177 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
11178 TEST_REQUIRES_X86_XOP;
11179 for (size_t k = 1; k <= 40; k += 9) {
11180 GemmMicrokernelTester()
11181 .mr(2)
11182 .nr(4)
11183 .kr(2)
11184 .sr(1)
11185 .m(2)
11186 .n(4)
11187 .k(k)
11188 .ks(3)
11189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11190 }
11191 }
11192
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel_subtile)11193 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
11194 TEST_REQUIRES_X86_XOP;
11195 for (size_t k = 1; k <= 40; k += 9) {
11196 for (uint32_t n = 1; n <= 4; n++) {
11197 for (uint32_t m = 1; m <= 2; m++) {
11198 GemmMicrokernelTester()
11199 .mr(2)
11200 .nr(4)
11201 .kr(2)
11202 .sr(1)
11203 .m(m)
11204 .n(n)
11205 .k(k)
11206 .ks(3)
11207 .iterations(1)
11208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11209 }
11210 }
11211 }
11212 }
11213
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_small_kernel)11214 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
11215 TEST_REQUIRES_X86_XOP;
11216 for (uint32_t n = 5; n < 8; n++) {
11217 for (size_t k = 1; k <= 40; k += 9) {
11218 GemmMicrokernelTester()
11219 .mr(2)
11220 .nr(4)
11221 .kr(2)
11222 .sr(1)
11223 .m(2)
11224 .n(n)
11225 .k(k)
11226 .ks(3)
11227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11228 }
11229 }
11230 }
11231
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_small_kernel)11232 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
11233 TEST_REQUIRES_X86_XOP;
11234 for (uint32_t n = 8; n <= 12; n += 4) {
11235 for (size_t k = 1; k <= 40; k += 9) {
11236 GemmMicrokernelTester()
11237 .mr(2)
11238 .nr(4)
11239 .kr(2)
11240 .sr(1)
11241 .m(2)
11242 .n(n)
11243 .k(k)
11244 .ks(3)
11245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11246 }
11247 }
11248 }
11249
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)11250 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
11251 TEST_REQUIRES_X86_XOP;
11252 for (size_t k = 1; k <= 40; k += 9) {
11253 for (uint32_t n = 1; n <= 4; n++) {
11254 for (uint32_t m = 1; m <= 2; m++) {
11255 GemmMicrokernelTester()
11256 .mr(2)
11257 .nr(4)
11258 .kr(2)
11259 .sr(1)
11260 .m(m)
11261 .n(n)
11262 .k(k)
11263 .cm_stride(7)
11264 .iterations(1)
11265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11266 }
11267 }
11268 }
11269 }
11270
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,a_offset)11271 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
11272 TEST_REQUIRES_X86_XOP;
11273 for (size_t k = 1; k <= 40; k += 9) {
11274 GemmMicrokernelTester()
11275 .mr(2)
11276 .nr(4)
11277 .kr(2)
11278 .sr(1)
11279 .m(2)
11280 .n(4)
11281 .k(k)
11282 .ks(3)
11283 .a_offset(83)
11284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11285 }
11286 }
11287
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,zero)11288 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
11289 TEST_REQUIRES_X86_XOP;
11290 for (size_t k = 1; k <= 40; k += 9) {
11291 for (uint32_t mz = 0; mz < 2; mz++) {
11292 GemmMicrokernelTester()
11293 .mr(2)
11294 .nr(4)
11295 .kr(2)
11296 .sr(1)
11297 .m(2)
11298 .n(4)
11299 .k(k)
11300 .ks(3)
11301 .a_offset(83)
11302 .zero_index(mz)
11303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11304 }
11305 }
11306 }
11307
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)11308 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
11309 TEST_REQUIRES_X86_XOP;
11310 GemmMicrokernelTester()
11311 .mr(2)
11312 .nr(4)
11313 .kr(2)
11314 .sr(1)
11315 .m(2)
11316 .n(4)
11317 .k(8)
11318 .qmin(128)
11319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11320 }
11321
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)11322 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
11323 TEST_REQUIRES_X86_XOP;
11324 GemmMicrokernelTester()
11325 .mr(2)
11326 .nr(4)
11327 .kr(2)
11328 .sr(1)
11329 .m(2)
11330 .n(4)
11331 .k(8)
11332 .qmax(128)
11333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11334 }
11335
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)11336 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
11337 TEST_REQUIRES_X86_XOP;
11338 GemmMicrokernelTester()
11339 .mr(2)
11340 .nr(4)
11341 .kr(2)
11342 .sr(1)
11343 .m(2)
11344 .n(4)
11345 .k(8)
11346 .cm_stride(7)
11347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11348 }
11349
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_a_zero_point)11350 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
11351 TEST_REQUIRES_X86_XOP;
11352 for (size_t k = 1; k <= 40; k += 9) {
11353 GemmMicrokernelTester()
11354 .mr(2)
11355 .nr(4)
11356 .kr(2)
11357 .sr(1)
11358 .m(2)
11359 .n(4)
11360 .k(k)
11361 .a_zero_point(0)
11362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11363 }
11364 }
11365
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_b_zero_point)11366 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
11367 TEST_REQUIRES_X86_XOP;
11368 for (size_t k = 1; k <= 40; k += 9) {
11369 GemmMicrokernelTester()
11370 .mr(2)
11371 .nr(4)
11372 .kr(2)
11373 .sr(1)
11374 .m(2)
11375 .n(4)
11376 .k(k)
11377 .b_zero_point(0)
11378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11379 }
11380 }
11381
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_zero_point)11382 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
11383 TEST_REQUIRES_X86_XOP;
11384 for (size_t k = 1; k <= 40; k += 9) {
11385 GemmMicrokernelTester()
11386 .mr(2)
11387 .nr(4)
11388 .kr(2)
11389 .sr(1)
11390 .m(2)
11391 .n(4)
11392 .k(k)
11393 .a_zero_point(0)
11394 .b_zero_point(0)
11395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11396 }
11397 }
11398 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11399
11400
11401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)11402 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
11403 TEST_REQUIRES_X86_AVX;
11404 GemmMicrokernelTester()
11405 .mr(3)
11406 .nr(4)
11407 .kr(2)
11408 .sr(1)
11409 .m(3)
11410 .n(4)
11411 .k(8)
11412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11413 }
11414
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)11415 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
11416 TEST_REQUIRES_X86_AVX;
11417 GemmMicrokernelTester()
11418 .mr(3)
11419 .nr(4)
11420 .kr(2)
11421 .sr(1)
11422 .m(3)
11423 .n(4)
11424 .k(8)
11425 .cn_stride(7)
11426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11427 }
11428
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)11429 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
11430 TEST_REQUIRES_X86_AVX;
11431 for (uint32_t n = 1; n <= 4; n++) {
11432 for (uint32_t m = 1; m <= 3; m++) {
11433 GemmMicrokernelTester()
11434 .mr(3)
11435 .nr(4)
11436 .kr(2)
11437 .sr(1)
11438 .m(m)
11439 .n(n)
11440 .k(8)
11441 .iterations(1)
11442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11443 }
11444 }
11445 }
11446
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)11447 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
11448 TEST_REQUIRES_X86_AVX;
11449 for (uint32_t m = 1; m <= 3; m++) {
11450 GemmMicrokernelTester()
11451 .mr(3)
11452 .nr(4)
11453 .kr(2)
11454 .sr(1)
11455 .m(m)
11456 .n(4)
11457 .k(8)
11458 .iterations(1)
11459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11460 }
11461 }
11462
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)11463 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
11464 TEST_REQUIRES_X86_AVX;
11465 for (uint32_t n = 1; n <= 4; n++) {
11466 GemmMicrokernelTester()
11467 .mr(3)
11468 .nr(4)
11469 .kr(2)
11470 .sr(1)
11471 .m(3)
11472 .n(n)
11473 .k(8)
11474 .iterations(1)
11475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11476 }
11477 }
11478
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)11479 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
11480 TEST_REQUIRES_X86_AVX;
11481 for (size_t k = 1; k < 8; k++) {
11482 GemmMicrokernelTester()
11483 .mr(3)
11484 .nr(4)
11485 .kr(2)
11486 .sr(1)
11487 .m(3)
11488 .n(4)
11489 .k(k)
11490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11491 }
11492 }
11493
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)11494 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
11495 TEST_REQUIRES_X86_AVX;
11496 for (size_t k = 1; k < 8; k++) {
11497 for (uint32_t n = 1; n <= 4; n++) {
11498 for (uint32_t m = 1; m <= 3; m++) {
11499 GemmMicrokernelTester()
11500 .mr(3)
11501 .nr(4)
11502 .kr(2)
11503 .sr(1)
11504 .m(m)
11505 .n(n)
11506 .k(k)
11507 .iterations(1)
11508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11509 }
11510 }
11511 }
11512 }
11513
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)11514 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
11515 TEST_REQUIRES_X86_AVX;
11516 for (size_t k = 9; k < 16; k++) {
11517 GemmMicrokernelTester()
11518 .mr(3)
11519 .nr(4)
11520 .kr(2)
11521 .sr(1)
11522 .m(3)
11523 .n(4)
11524 .k(k)
11525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11526 }
11527 }
11528
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)11529 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
11530 TEST_REQUIRES_X86_AVX;
11531 for (size_t k = 9; k < 16; k++) {
11532 for (uint32_t n = 1; n <= 4; n++) {
11533 for (uint32_t m = 1; m <= 3; m++) {
11534 GemmMicrokernelTester()
11535 .mr(3)
11536 .nr(4)
11537 .kr(2)
11538 .sr(1)
11539 .m(m)
11540 .n(n)
11541 .k(k)
11542 .iterations(1)
11543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11544 }
11545 }
11546 }
11547 }
11548
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)11549 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
11550 TEST_REQUIRES_X86_AVX;
11551 for (size_t k = 16; k <= 80; k += 8) {
11552 GemmMicrokernelTester()
11553 .mr(3)
11554 .nr(4)
11555 .kr(2)
11556 .sr(1)
11557 .m(3)
11558 .n(4)
11559 .k(k)
11560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11561 }
11562 }
11563
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)11564 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
11565 TEST_REQUIRES_X86_AVX;
11566 for (size_t k = 16; k <= 80; k += 8) {
11567 for (uint32_t n = 1; n <= 4; n++) {
11568 for (uint32_t m = 1; m <= 3; m++) {
11569 GemmMicrokernelTester()
11570 .mr(3)
11571 .nr(4)
11572 .kr(2)
11573 .sr(1)
11574 .m(m)
11575 .n(n)
11576 .k(k)
11577 .iterations(1)
11578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11579 }
11580 }
11581 }
11582 }
11583
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)11584 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
11585 TEST_REQUIRES_X86_AVX;
11586 for (uint32_t n = 5; n < 8; n++) {
11587 for (size_t k = 1; k <= 40; k += 9) {
11588 GemmMicrokernelTester()
11589 .mr(3)
11590 .nr(4)
11591 .kr(2)
11592 .sr(1)
11593 .m(3)
11594 .n(n)
11595 .k(k)
11596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11597 }
11598 }
11599 }
11600
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)11601 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
11602 TEST_REQUIRES_X86_AVX;
11603 for (uint32_t n = 5; n < 8; n++) {
11604 for (size_t k = 1; k <= 40; k += 9) {
11605 GemmMicrokernelTester()
11606 .mr(3)
11607 .nr(4)
11608 .kr(2)
11609 .sr(1)
11610 .m(3)
11611 .n(n)
11612 .k(k)
11613 .cn_stride(7)
11614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11615 }
11616 }
11617 }
11618
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)11619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
11620 TEST_REQUIRES_X86_AVX;
11621 for (uint32_t n = 5; n < 8; n++) {
11622 for (size_t k = 1; k <= 40; k += 9) {
11623 for (uint32_t m = 1; m <= 3; m++) {
11624 GemmMicrokernelTester()
11625 .mr(3)
11626 .nr(4)
11627 .kr(2)
11628 .sr(1)
11629 .m(m)
11630 .n(n)
11631 .k(k)
11632 .iterations(1)
11633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11634 }
11635 }
11636 }
11637 }
11638
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)11639 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
11640 TEST_REQUIRES_X86_AVX;
11641 for (uint32_t n = 8; n <= 12; n += 4) {
11642 for (size_t k = 1; k <= 40; k += 9) {
11643 GemmMicrokernelTester()
11644 .mr(3)
11645 .nr(4)
11646 .kr(2)
11647 .sr(1)
11648 .m(3)
11649 .n(n)
11650 .k(k)
11651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11652 }
11653 }
11654 }
11655
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)11656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
11657 TEST_REQUIRES_X86_AVX;
11658 for (uint32_t n = 8; n <= 12; n += 4) {
11659 for (size_t k = 1; k <= 40; k += 9) {
11660 GemmMicrokernelTester()
11661 .mr(3)
11662 .nr(4)
11663 .kr(2)
11664 .sr(1)
11665 .m(3)
11666 .n(n)
11667 .k(k)
11668 .cn_stride(7)
11669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11670 }
11671 }
11672 }
11673
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)11674 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
11675 TEST_REQUIRES_X86_AVX;
11676 for (uint32_t n = 8; n <= 12; n += 4) {
11677 for (size_t k = 1; k <= 40; k += 9) {
11678 for (uint32_t m = 1; m <= 3; m++) {
11679 GemmMicrokernelTester()
11680 .mr(3)
11681 .nr(4)
11682 .kr(2)
11683 .sr(1)
11684 .m(m)
11685 .n(n)
11686 .k(k)
11687 .iterations(1)
11688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11689 }
11690 }
11691 }
11692 }
11693
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel)11694 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
11695 TEST_REQUIRES_X86_AVX;
11696 for (size_t k = 1; k <= 40; k += 9) {
11697 GemmMicrokernelTester()
11698 .mr(3)
11699 .nr(4)
11700 .kr(2)
11701 .sr(1)
11702 .m(3)
11703 .n(4)
11704 .k(k)
11705 .ks(3)
11706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11707 }
11708 }
11709
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel_subtile)11710 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
11711 TEST_REQUIRES_X86_AVX;
11712 for (size_t k = 1; k <= 40; k += 9) {
11713 for (uint32_t n = 1; n <= 4; n++) {
11714 for (uint32_t m = 1; m <= 3; m++) {
11715 GemmMicrokernelTester()
11716 .mr(3)
11717 .nr(4)
11718 .kr(2)
11719 .sr(1)
11720 .m(m)
11721 .n(n)
11722 .k(k)
11723 .ks(3)
11724 .iterations(1)
11725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11726 }
11727 }
11728 }
11729 }
11730
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_small_kernel)11731 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
11732 TEST_REQUIRES_X86_AVX;
11733 for (uint32_t n = 5; n < 8; n++) {
11734 for (size_t k = 1; k <= 40; k += 9) {
11735 GemmMicrokernelTester()
11736 .mr(3)
11737 .nr(4)
11738 .kr(2)
11739 .sr(1)
11740 .m(3)
11741 .n(n)
11742 .k(k)
11743 .ks(3)
11744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11745 }
11746 }
11747 }
11748
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_small_kernel)11749 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
11750 TEST_REQUIRES_X86_AVX;
11751 for (uint32_t n = 8; n <= 12; n += 4) {
11752 for (size_t k = 1; k <= 40; k += 9) {
11753 GemmMicrokernelTester()
11754 .mr(3)
11755 .nr(4)
11756 .kr(2)
11757 .sr(1)
11758 .m(3)
11759 .n(n)
11760 .k(k)
11761 .ks(3)
11762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11763 }
11764 }
11765 }
11766
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)11767 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
11768 TEST_REQUIRES_X86_AVX;
11769 for (size_t k = 1; k <= 40; k += 9) {
11770 for (uint32_t n = 1; n <= 4; n++) {
11771 for (uint32_t m = 1; m <= 3; m++) {
11772 GemmMicrokernelTester()
11773 .mr(3)
11774 .nr(4)
11775 .kr(2)
11776 .sr(1)
11777 .m(m)
11778 .n(n)
11779 .k(k)
11780 .cm_stride(7)
11781 .iterations(1)
11782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11783 }
11784 }
11785 }
11786 }
11787
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,a_offset)11788 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
11789 TEST_REQUIRES_X86_AVX;
11790 for (size_t k = 1; k <= 40; k += 9) {
11791 GemmMicrokernelTester()
11792 .mr(3)
11793 .nr(4)
11794 .kr(2)
11795 .sr(1)
11796 .m(3)
11797 .n(4)
11798 .k(k)
11799 .ks(3)
11800 .a_offset(127)
11801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11802 }
11803 }
11804
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,zero)11805 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
11806 TEST_REQUIRES_X86_AVX;
11807 for (size_t k = 1; k <= 40; k += 9) {
11808 for (uint32_t mz = 0; mz < 3; mz++) {
11809 GemmMicrokernelTester()
11810 .mr(3)
11811 .nr(4)
11812 .kr(2)
11813 .sr(1)
11814 .m(3)
11815 .n(4)
11816 .k(k)
11817 .ks(3)
11818 .a_offset(127)
11819 .zero_index(mz)
11820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11821 }
11822 }
11823 }
11824
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)11825 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
11826 TEST_REQUIRES_X86_AVX;
11827 GemmMicrokernelTester()
11828 .mr(3)
11829 .nr(4)
11830 .kr(2)
11831 .sr(1)
11832 .m(3)
11833 .n(4)
11834 .k(8)
11835 .qmin(128)
11836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11837 }
11838
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)11839 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
11840 TEST_REQUIRES_X86_AVX;
11841 GemmMicrokernelTester()
11842 .mr(3)
11843 .nr(4)
11844 .kr(2)
11845 .sr(1)
11846 .m(3)
11847 .n(4)
11848 .k(8)
11849 .qmax(128)
11850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11851 }
11852
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)11853 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
11854 TEST_REQUIRES_X86_AVX;
11855 GemmMicrokernelTester()
11856 .mr(3)
11857 .nr(4)
11858 .kr(2)
11859 .sr(1)
11860 .m(3)
11861 .n(4)
11862 .k(8)
11863 .cm_stride(7)
11864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11865 }
11866
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_a_zero_point)11867 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
11868 TEST_REQUIRES_X86_AVX;
11869 for (size_t k = 1; k <= 40; k += 9) {
11870 GemmMicrokernelTester()
11871 .mr(3)
11872 .nr(4)
11873 .kr(2)
11874 .sr(1)
11875 .m(3)
11876 .n(4)
11877 .k(k)
11878 .a_zero_point(0)
11879 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11880 }
11881 }
11882
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_b_zero_point)11883 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
11884 TEST_REQUIRES_X86_AVX;
11885 for (size_t k = 1; k <= 40; k += 9) {
11886 GemmMicrokernelTester()
11887 .mr(3)
11888 .nr(4)
11889 .kr(2)
11890 .sr(1)
11891 .m(3)
11892 .n(4)
11893 .k(k)
11894 .b_zero_point(0)
11895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11896 }
11897 }
11898
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_zero_point)11899 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
11900 TEST_REQUIRES_X86_AVX;
11901 for (size_t k = 1; k <= 40; k += 9) {
11902 GemmMicrokernelTester()
11903 .mr(3)
11904 .nr(4)
11905 .kr(2)
11906 .sr(1)
11907 .m(3)
11908 .n(4)
11909 .k(k)
11910 .a_zero_point(0)
11911 .b_zero_point(0)
11912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11913 }
11914 }
11915 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11916
11917
11918 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)11919 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
11920 TEST_REQUIRES_X86_XOP;
11921 GemmMicrokernelTester()
11922 .mr(4)
11923 .nr(4)
11924 .kr(2)
11925 .sr(1)
11926 .m(4)
11927 .n(4)
11928 .k(8)
11929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11930 }
11931
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)11932 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
11933 TEST_REQUIRES_X86_XOP;
11934 GemmMicrokernelTester()
11935 .mr(4)
11936 .nr(4)
11937 .kr(2)
11938 .sr(1)
11939 .m(4)
11940 .n(4)
11941 .k(8)
11942 .cn_stride(7)
11943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11944 }
11945
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)11946 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
11947 TEST_REQUIRES_X86_XOP;
11948 for (uint32_t n = 1; n <= 4; n++) {
11949 for (uint32_t m = 1; m <= 4; m++) {
11950 GemmMicrokernelTester()
11951 .mr(4)
11952 .nr(4)
11953 .kr(2)
11954 .sr(1)
11955 .m(m)
11956 .n(n)
11957 .k(8)
11958 .iterations(1)
11959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11960 }
11961 }
11962 }
11963
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)11964 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
11965 TEST_REQUIRES_X86_XOP;
11966 for (uint32_t m = 1; m <= 4; m++) {
11967 GemmMicrokernelTester()
11968 .mr(4)
11969 .nr(4)
11970 .kr(2)
11971 .sr(1)
11972 .m(m)
11973 .n(4)
11974 .k(8)
11975 .iterations(1)
11976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11977 }
11978 }
11979
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)11980 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
11981 TEST_REQUIRES_X86_XOP;
11982 for (uint32_t n = 1; n <= 4; n++) {
11983 GemmMicrokernelTester()
11984 .mr(4)
11985 .nr(4)
11986 .kr(2)
11987 .sr(1)
11988 .m(4)
11989 .n(n)
11990 .k(8)
11991 .iterations(1)
11992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11993 }
11994 }
11995
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)11996 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
11997 TEST_REQUIRES_X86_XOP;
11998 for (size_t k = 1; k < 8; k++) {
11999 GemmMicrokernelTester()
12000 .mr(4)
12001 .nr(4)
12002 .kr(2)
12003 .sr(1)
12004 .m(4)
12005 .n(4)
12006 .k(k)
12007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12008 }
12009 }
12010
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)12011 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
12012 TEST_REQUIRES_X86_XOP;
12013 for (size_t k = 1; k < 8; k++) {
12014 for (uint32_t n = 1; n <= 4; n++) {
12015 for (uint32_t m = 1; m <= 4; m++) {
12016 GemmMicrokernelTester()
12017 .mr(4)
12018 .nr(4)
12019 .kr(2)
12020 .sr(1)
12021 .m(m)
12022 .n(n)
12023 .k(k)
12024 .iterations(1)
12025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12026 }
12027 }
12028 }
12029 }
12030
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)12031 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
12032 TEST_REQUIRES_X86_XOP;
12033 for (size_t k = 9; k < 16; k++) {
12034 GemmMicrokernelTester()
12035 .mr(4)
12036 .nr(4)
12037 .kr(2)
12038 .sr(1)
12039 .m(4)
12040 .n(4)
12041 .k(k)
12042 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12043 }
12044 }
12045
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)12046 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
12047 TEST_REQUIRES_X86_XOP;
12048 for (size_t k = 9; k < 16; k++) {
12049 for (uint32_t n = 1; n <= 4; n++) {
12050 for (uint32_t m = 1; m <= 4; m++) {
12051 GemmMicrokernelTester()
12052 .mr(4)
12053 .nr(4)
12054 .kr(2)
12055 .sr(1)
12056 .m(m)
12057 .n(n)
12058 .k(k)
12059 .iterations(1)
12060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12061 }
12062 }
12063 }
12064 }
12065
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)12066 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
12067 TEST_REQUIRES_X86_XOP;
12068 for (size_t k = 16; k <= 80; k += 8) {
12069 GemmMicrokernelTester()
12070 .mr(4)
12071 .nr(4)
12072 .kr(2)
12073 .sr(1)
12074 .m(4)
12075 .n(4)
12076 .k(k)
12077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12078 }
12079 }
12080
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)12081 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
12082 TEST_REQUIRES_X86_XOP;
12083 for (size_t k = 16; k <= 80; k += 8) {
12084 for (uint32_t n = 1; n <= 4; n++) {
12085 for (uint32_t m = 1; m <= 4; m++) {
12086 GemmMicrokernelTester()
12087 .mr(4)
12088 .nr(4)
12089 .kr(2)
12090 .sr(1)
12091 .m(m)
12092 .n(n)
12093 .k(k)
12094 .iterations(1)
12095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12096 }
12097 }
12098 }
12099 }
12100
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)12101 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
12102 TEST_REQUIRES_X86_XOP;
12103 for (uint32_t n = 5; n < 8; n++) {
12104 for (size_t k = 1; k <= 40; k += 9) {
12105 GemmMicrokernelTester()
12106 .mr(4)
12107 .nr(4)
12108 .kr(2)
12109 .sr(1)
12110 .m(4)
12111 .n(n)
12112 .k(k)
12113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12114 }
12115 }
12116 }
12117
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)12118 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
12119 TEST_REQUIRES_X86_XOP;
12120 for (uint32_t n = 5; n < 8; n++) {
12121 for (size_t k = 1; k <= 40; k += 9) {
12122 GemmMicrokernelTester()
12123 .mr(4)
12124 .nr(4)
12125 .kr(2)
12126 .sr(1)
12127 .m(4)
12128 .n(n)
12129 .k(k)
12130 .cn_stride(7)
12131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12132 }
12133 }
12134 }
12135
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)12136 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
12137 TEST_REQUIRES_X86_XOP;
12138 for (uint32_t n = 5; n < 8; n++) {
12139 for (size_t k = 1; k <= 40; k += 9) {
12140 for (uint32_t m = 1; m <= 4; m++) {
12141 GemmMicrokernelTester()
12142 .mr(4)
12143 .nr(4)
12144 .kr(2)
12145 .sr(1)
12146 .m(m)
12147 .n(n)
12148 .k(k)
12149 .iterations(1)
12150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12151 }
12152 }
12153 }
12154 }
12155
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)12156 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
12157 TEST_REQUIRES_X86_XOP;
12158 for (uint32_t n = 8; n <= 12; n += 4) {
12159 for (size_t k = 1; k <= 40; k += 9) {
12160 GemmMicrokernelTester()
12161 .mr(4)
12162 .nr(4)
12163 .kr(2)
12164 .sr(1)
12165 .m(4)
12166 .n(n)
12167 .k(k)
12168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12169 }
12170 }
12171 }
12172
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)12173 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
12174 TEST_REQUIRES_X86_XOP;
12175 for (uint32_t n = 8; n <= 12; n += 4) {
12176 for (size_t k = 1; k <= 40; k += 9) {
12177 GemmMicrokernelTester()
12178 .mr(4)
12179 .nr(4)
12180 .kr(2)
12181 .sr(1)
12182 .m(4)
12183 .n(n)
12184 .k(k)
12185 .cn_stride(7)
12186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12187 }
12188 }
12189 }
12190
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)12191 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
12192 TEST_REQUIRES_X86_XOP;
12193 for (uint32_t n = 8; n <= 12; n += 4) {
12194 for (size_t k = 1; k <= 40; k += 9) {
12195 for (uint32_t m = 1; m <= 4; m++) {
12196 GemmMicrokernelTester()
12197 .mr(4)
12198 .nr(4)
12199 .kr(2)
12200 .sr(1)
12201 .m(m)
12202 .n(n)
12203 .k(k)
12204 .iterations(1)
12205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12206 }
12207 }
12208 }
12209 }
12210
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)12211 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
12212 TEST_REQUIRES_X86_XOP;
12213 for (size_t k = 1; k <= 40; k += 9) {
12214 GemmMicrokernelTester()
12215 .mr(4)
12216 .nr(4)
12217 .kr(2)
12218 .sr(1)
12219 .m(4)
12220 .n(4)
12221 .k(k)
12222 .ks(3)
12223 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12224 }
12225 }
12226
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)12227 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
12228 TEST_REQUIRES_X86_XOP;
12229 for (size_t k = 1; k <= 40; k += 9) {
12230 for (uint32_t n = 1; n <= 4; n++) {
12231 for (uint32_t m = 1; m <= 4; m++) {
12232 GemmMicrokernelTester()
12233 .mr(4)
12234 .nr(4)
12235 .kr(2)
12236 .sr(1)
12237 .m(m)
12238 .n(n)
12239 .k(k)
12240 .ks(3)
12241 .iterations(1)
12242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12243 }
12244 }
12245 }
12246 }
12247
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)12248 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
12249 TEST_REQUIRES_X86_XOP;
12250 for (uint32_t n = 5; n < 8; n++) {
12251 for (size_t k = 1; k <= 40; k += 9) {
12252 GemmMicrokernelTester()
12253 .mr(4)
12254 .nr(4)
12255 .kr(2)
12256 .sr(1)
12257 .m(4)
12258 .n(n)
12259 .k(k)
12260 .ks(3)
12261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12262 }
12263 }
12264 }
12265
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)12266 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
12267 TEST_REQUIRES_X86_XOP;
12268 for (uint32_t n = 8; n <= 12; n += 4) {
12269 for (size_t k = 1; k <= 40; k += 9) {
12270 GemmMicrokernelTester()
12271 .mr(4)
12272 .nr(4)
12273 .kr(2)
12274 .sr(1)
12275 .m(4)
12276 .n(n)
12277 .k(k)
12278 .ks(3)
12279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12280 }
12281 }
12282 }
12283
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)12284 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
12285 TEST_REQUIRES_X86_XOP;
12286 for (size_t k = 1; k <= 40; k += 9) {
12287 for (uint32_t n = 1; n <= 4; n++) {
12288 for (uint32_t m = 1; m <= 4; m++) {
12289 GemmMicrokernelTester()
12290 .mr(4)
12291 .nr(4)
12292 .kr(2)
12293 .sr(1)
12294 .m(m)
12295 .n(n)
12296 .k(k)
12297 .cm_stride(7)
12298 .iterations(1)
12299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12300 }
12301 }
12302 }
12303 }
12304
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)12305 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
12306 TEST_REQUIRES_X86_XOP;
12307 for (size_t k = 1; k <= 40; k += 9) {
12308 GemmMicrokernelTester()
12309 .mr(4)
12310 .nr(4)
12311 .kr(2)
12312 .sr(1)
12313 .m(4)
12314 .n(4)
12315 .k(k)
12316 .ks(3)
12317 .a_offset(163)
12318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12319 }
12320 }
12321
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)12322 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
12323 TEST_REQUIRES_X86_XOP;
12324 for (size_t k = 1; k <= 40; k += 9) {
12325 for (uint32_t mz = 0; mz < 4; mz++) {
12326 GemmMicrokernelTester()
12327 .mr(4)
12328 .nr(4)
12329 .kr(2)
12330 .sr(1)
12331 .m(4)
12332 .n(4)
12333 .k(k)
12334 .ks(3)
12335 .a_offset(163)
12336 .zero_index(mz)
12337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12338 }
12339 }
12340 }
12341
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)12342 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
12343 TEST_REQUIRES_X86_XOP;
12344 GemmMicrokernelTester()
12345 .mr(4)
12346 .nr(4)
12347 .kr(2)
12348 .sr(1)
12349 .m(4)
12350 .n(4)
12351 .k(8)
12352 .qmin(128)
12353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12354 }
12355
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)12356 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
12357 TEST_REQUIRES_X86_XOP;
12358 GemmMicrokernelTester()
12359 .mr(4)
12360 .nr(4)
12361 .kr(2)
12362 .sr(1)
12363 .m(4)
12364 .n(4)
12365 .k(8)
12366 .qmax(128)
12367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12368 }
12369
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)12370 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
12371 TEST_REQUIRES_X86_XOP;
12372 GemmMicrokernelTester()
12373 .mr(4)
12374 .nr(4)
12375 .kr(2)
12376 .sr(1)
12377 .m(4)
12378 .n(4)
12379 .k(8)
12380 .cm_stride(7)
12381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12382 }
12383
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_a_zero_point)12384 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
12385 TEST_REQUIRES_X86_XOP;
12386 for (size_t k = 1; k <= 40; k += 9) {
12387 GemmMicrokernelTester()
12388 .mr(4)
12389 .nr(4)
12390 .kr(2)
12391 .sr(1)
12392 .m(4)
12393 .n(4)
12394 .k(k)
12395 .a_zero_point(0)
12396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12397 }
12398 }
12399
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_b_zero_point)12400 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
12401 TEST_REQUIRES_X86_XOP;
12402 for (size_t k = 1; k <= 40; k += 9) {
12403 GemmMicrokernelTester()
12404 .mr(4)
12405 .nr(4)
12406 .kr(2)
12407 .sr(1)
12408 .m(4)
12409 .n(4)
12410 .k(k)
12411 .b_zero_point(0)
12412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12413 }
12414 }
12415
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_zero_point)12416 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
12417 TEST_REQUIRES_X86_XOP;
12418 for (size_t k = 1; k <= 40; k += 9) {
12419 GemmMicrokernelTester()
12420 .mr(4)
12421 .nr(4)
12422 .kr(2)
12423 .sr(1)
12424 .m(4)
12425 .n(4)
12426 .k(k)
12427 .a_zero_point(0)
12428 .b_zero_point(0)
12429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12430 }
12431 }
12432 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12433
12434
12435 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)12436 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
12437 TEST_REQUIRES_X86_SSE41;
12438 GemmMicrokernelTester()
12439 .mr(2)
12440 .nr(4)
12441 .kr(2)
12442 .sr(4)
12443 .m(2)
12444 .n(4)
12445 .k(8)
12446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12447 }
12448
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)12449 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
12450 TEST_REQUIRES_X86_SSE41;
12451 GemmMicrokernelTester()
12452 .mr(2)
12453 .nr(4)
12454 .kr(2)
12455 .sr(4)
12456 .m(2)
12457 .n(4)
12458 .k(8)
12459 .cn_stride(7)
12460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12461 }
12462
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)12463 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
12464 TEST_REQUIRES_X86_SSE41;
12465 for (uint32_t n = 1; n <= 4; n++) {
12466 for (uint32_t m = 1; m <= 2; m++) {
12467 GemmMicrokernelTester()
12468 .mr(2)
12469 .nr(4)
12470 .kr(2)
12471 .sr(4)
12472 .m(m)
12473 .n(n)
12474 .k(8)
12475 .iterations(1)
12476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12477 }
12478 }
12479 }
12480
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)12481 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
12482 TEST_REQUIRES_X86_SSE41;
12483 for (uint32_t m = 1; m <= 2; m++) {
12484 GemmMicrokernelTester()
12485 .mr(2)
12486 .nr(4)
12487 .kr(2)
12488 .sr(4)
12489 .m(m)
12490 .n(4)
12491 .k(8)
12492 .iterations(1)
12493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12494 }
12495 }
12496
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)12497 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
12498 TEST_REQUIRES_X86_SSE41;
12499 for (uint32_t n = 1; n <= 4; n++) {
12500 GemmMicrokernelTester()
12501 .mr(2)
12502 .nr(4)
12503 .kr(2)
12504 .sr(4)
12505 .m(2)
12506 .n(n)
12507 .k(8)
12508 .iterations(1)
12509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12510 }
12511 }
12512
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)12513 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
12514 TEST_REQUIRES_X86_SSE41;
12515 for (size_t k = 1; k < 8; k++) {
12516 GemmMicrokernelTester()
12517 .mr(2)
12518 .nr(4)
12519 .kr(2)
12520 .sr(4)
12521 .m(2)
12522 .n(4)
12523 .k(k)
12524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12525 }
12526 }
12527
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)12528 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
12529 TEST_REQUIRES_X86_SSE41;
12530 for (size_t k = 1; k < 8; k++) {
12531 for (uint32_t n = 1; n <= 4; n++) {
12532 for (uint32_t m = 1; m <= 2; m++) {
12533 GemmMicrokernelTester()
12534 .mr(2)
12535 .nr(4)
12536 .kr(2)
12537 .sr(4)
12538 .m(m)
12539 .n(n)
12540 .k(k)
12541 .iterations(1)
12542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12543 }
12544 }
12545 }
12546 }
12547
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)12548 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
12549 TEST_REQUIRES_X86_SSE41;
12550 for (size_t k = 9; k < 16; k++) {
12551 GemmMicrokernelTester()
12552 .mr(2)
12553 .nr(4)
12554 .kr(2)
12555 .sr(4)
12556 .m(2)
12557 .n(4)
12558 .k(k)
12559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12560 }
12561 }
12562
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)12563 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
12564 TEST_REQUIRES_X86_SSE41;
12565 for (size_t k = 9; k < 16; k++) {
12566 for (uint32_t n = 1; n <= 4; n++) {
12567 for (uint32_t m = 1; m <= 2; m++) {
12568 GemmMicrokernelTester()
12569 .mr(2)
12570 .nr(4)
12571 .kr(2)
12572 .sr(4)
12573 .m(m)
12574 .n(n)
12575 .k(k)
12576 .iterations(1)
12577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12578 }
12579 }
12580 }
12581 }
12582
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)12583 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
12584 TEST_REQUIRES_X86_SSE41;
12585 for (size_t k = 16; k <= 80; k += 8) {
12586 GemmMicrokernelTester()
12587 .mr(2)
12588 .nr(4)
12589 .kr(2)
12590 .sr(4)
12591 .m(2)
12592 .n(4)
12593 .k(k)
12594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12595 }
12596 }
12597
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)12598 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
12599 TEST_REQUIRES_X86_SSE41;
12600 for (size_t k = 16; k <= 80; k += 8) {
12601 for (uint32_t n = 1; n <= 4; n++) {
12602 for (uint32_t m = 1; m <= 2; m++) {
12603 GemmMicrokernelTester()
12604 .mr(2)
12605 .nr(4)
12606 .kr(2)
12607 .sr(4)
12608 .m(m)
12609 .n(n)
12610 .k(k)
12611 .iterations(1)
12612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12613 }
12614 }
12615 }
12616 }
12617
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)12618 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
12619 TEST_REQUIRES_X86_SSE41;
12620 for (uint32_t n = 5; n < 8; n++) {
12621 for (size_t k = 1; k <= 40; k += 9) {
12622 GemmMicrokernelTester()
12623 .mr(2)
12624 .nr(4)
12625 .kr(2)
12626 .sr(4)
12627 .m(2)
12628 .n(n)
12629 .k(k)
12630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12631 }
12632 }
12633 }
12634
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)12635 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
12636 TEST_REQUIRES_X86_SSE41;
12637 for (uint32_t n = 5; n < 8; n++) {
12638 for (size_t k = 1; k <= 40; k += 9) {
12639 GemmMicrokernelTester()
12640 .mr(2)
12641 .nr(4)
12642 .kr(2)
12643 .sr(4)
12644 .m(2)
12645 .n(n)
12646 .k(k)
12647 .cn_stride(7)
12648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12649 }
12650 }
12651 }
12652
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)12653 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
12654 TEST_REQUIRES_X86_SSE41;
12655 for (uint32_t n = 5; n < 8; n++) {
12656 for (size_t k = 1; k <= 40; k += 9) {
12657 for (uint32_t m = 1; m <= 2; m++) {
12658 GemmMicrokernelTester()
12659 .mr(2)
12660 .nr(4)
12661 .kr(2)
12662 .sr(4)
12663 .m(m)
12664 .n(n)
12665 .k(k)
12666 .iterations(1)
12667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12668 }
12669 }
12670 }
12671 }
12672
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)12673 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
12674 TEST_REQUIRES_X86_SSE41;
12675 for (uint32_t n = 8; n <= 12; n += 4) {
12676 for (size_t k = 1; k <= 40; k += 9) {
12677 GemmMicrokernelTester()
12678 .mr(2)
12679 .nr(4)
12680 .kr(2)
12681 .sr(4)
12682 .m(2)
12683 .n(n)
12684 .k(k)
12685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12686 }
12687 }
12688 }
12689
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)12690 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
12691 TEST_REQUIRES_X86_SSE41;
12692 for (uint32_t n = 8; n <= 12; n += 4) {
12693 for (size_t k = 1; k <= 40; k += 9) {
12694 GemmMicrokernelTester()
12695 .mr(2)
12696 .nr(4)
12697 .kr(2)
12698 .sr(4)
12699 .m(2)
12700 .n(n)
12701 .k(k)
12702 .cn_stride(7)
12703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12704 }
12705 }
12706 }
12707
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)12708 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
12709 TEST_REQUIRES_X86_SSE41;
12710 for (uint32_t n = 8; n <= 12; n += 4) {
12711 for (size_t k = 1; k <= 40; k += 9) {
12712 for (uint32_t m = 1; m <= 2; m++) {
12713 GemmMicrokernelTester()
12714 .mr(2)
12715 .nr(4)
12716 .kr(2)
12717 .sr(4)
12718 .m(m)
12719 .n(n)
12720 .k(k)
12721 .iterations(1)
12722 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12723 }
12724 }
12725 }
12726 }
12727
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel)12728 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel) {
12729 TEST_REQUIRES_X86_SSE41;
12730 for (size_t k = 1; k <= 40; k += 9) {
12731 GemmMicrokernelTester()
12732 .mr(2)
12733 .nr(4)
12734 .kr(2)
12735 .sr(4)
12736 .m(2)
12737 .n(4)
12738 .k(k)
12739 .ks(3)
12740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12741 }
12742 }
12743
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel_subtile)12744 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel_subtile) {
12745 TEST_REQUIRES_X86_SSE41;
12746 for (size_t k = 1; k <= 40; k += 9) {
12747 for (uint32_t n = 1; n <= 4; n++) {
12748 for (uint32_t m = 1; m <= 2; m++) {
12749 GemmMicrokernelTester()
12750 .mr(2)
12751 .nr(4)
12752 .kr(2)
12753 .sr(4)
12754 .m(m)
12755 .n(n)
12756 .k(k)
12757 .ks(3)
12758 .iterations(1)
12759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12760 }
12761 }
12762 }
12763 }
12764
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_small_kernel)12765 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
12766 TEST_REQUIRES_X86_SSE41;
12767 for (uint32_t n = 5; n < 8; n++) {
12768 for (size_t k = 1; k <= 40; k += 9) {
12769 GemmMicrokernelTester()
12770 .mr(2)
12771 .nr(4)
12772 .kr(2)
12773 .sr(4)
12774 .m(2)
12775 .n(n)
12776 .k(k)
12777 .ks(3)
12778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12779 }
12780 }
12781 }
12782
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_small_kernel)12783 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
12784 TEST_REQUIRES_X86_SSE41;
12785 for (uint32_t n = 8; n <= 12; n += 4) {
12786 for (size_t k = 1; k <= 40; k += 9) {
12787 GemmMicrokernelTester()
12788 .mr(2)
12789 .nr(4)
12790 .kr(2)
12791 .sr(4)
12792 .m(2)
12793 .n(n)
12794 .k(k)
12795 .ks(3)
12796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12797 }
12798 }
12799 }
12800
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)12801 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
12802 TEST_REQUIRES_X86_SSE41;
12803 for (size_t k = 1; k <= 40; k += 9) {
12804 for (uint32_t n = 1; n <= 4; n++) {
12805 for (uint32_t m = 1; m <= 2; m++) {
12806 GemmMicrokernelTester()
12807 .mr(2)
12808 .nr(4)
12809 .kr(2)
12810 .sr(4)
12811 .m(m)
12812 .n(n)
12813 .k(k)
12814 .cm_stride(7)
12815 .iterations(1)
12816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12817 }
12818 }
12819 }
12820 }
12821
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,a_offset)12822 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, a_offset) {
12823 TEST_REQUIRES_X86_SSE41;
12824 for (size_t k = 1; k <= 40; k += 9) {
12825 GemmMicrokernelTester()
12826 .mr(2)
12827 .nr(4)
12828 .kr(2)
12829 .sr(4)
12830 .m(2)
12831 .n(4)
12832 .k(k)
12833 .ks(3)
12834 .a_offset(83)
12835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12836 }
12837 }
12838
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,zero)12839 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, zero) {
12840 TEST_REQUIRES_X86_SSE41;
12841 for (size_t k = 1; k <= 40; k += 9) {
12842 for (uint32_t mz = 0; mz < 2; mz++) {
12843 GemmMicrokernelTester()
12844 .mr(2)
12845 .nr(4)
12846 .kr(2)
12847 .sr(4)
12848 .m(2)
12849 .n(4)
12850 .k(k)
12851 .ks(3)
12852 .a_offset(83)
12853 .zero_index(mz)
12854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12855 }
12856 }
12857 }
12858
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)12859 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
12860 TEST_REQUIRES_X86_SSE41;
12861 GemmMicrokernelTester()
12862 .mr(2)
12863 .nr(4)
12864 .kr(2)
12865 .sr(4)
12866 .m(2)
12867 .n(4)
12868 .k(8)
12869 .qmin(128)
12870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12871 }
12872
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)12873 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
12874 TEST_REQUIRES_X86_SSE41;
12875 GemmMicrokernelTester()
12876 .mr(2)
12877 .nr(4)
12878 .kr(2)
12879 .sr(4)
12880 .m(2)
12881 .n(4)
12882 .k(8)
12883 .qmax(128)
12884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12885 }
12886
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)12887 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
12888 TEST_REQUIRES_X86_SSE41;
12889 GemmMicrokernelTester()
12890 .mr(2)
12891 .nr(4)
12892 .kr(2)
12893 .sr(4)
12894 .m(2)
12895 .n(4)
12896 .k(8)
12897 .cm_stride(7)
12898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12899 }
12900
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_a_zero_point)12901 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_a_zero_point) {
12902 TEST_REQUIRES_X86_SSE41;
12903 for (size_t k = 1; k <= 40; k += 9) {
12904 GemmMicrokernelTester()
12905 .mr(2)
12906 .nr(4)
12907 .kr(2)
12908 .sr(4)
12909 .m(2)
12910 .n(4)
12911 .k(k)
12912 .a_zero_point(0)
12913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12914 }
12915 }
12916
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_b_zero_point)12917 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_b_zero_point) {
12918 TEST_REQUIRES_X86_SSE41;
12919 for (size_t k = 1; k <= 40; k += 9) {
12920 GemmMicrokernelTester()
12921 .mr(2)
12922 .nr(4)
12923 .kr(2)
12924 .sr(4)
12925 .m(2)
12926 .n(4)
12927 .k(k)
12928 .b_zero_point(0)
12929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12930 }
12931 }
12932
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_zero_point)12933 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_zero_point) {
12934 TEST_REQUIRES_X86_SSE41;
12935 for (size_t k = 1; k <= 40; k += 9) {
12936 GemmMicrokernelTester()
12937 .mr(2)
12938 .nr(4)
12939 .kr(2)
12940 .sr(4)
12941 .m(2)
12942 .n(4)
12943 .k(k)
12944 .a_zero_point(0)
12945 .b_zero_point(0)
12946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12947 }
12948 }
12949 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12950
12951
12952 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)12953 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
12954 TEST_REQUIRES_X86_SSE2;
12955 GemmMicrokernelTester()
12956 .mr(3)
12957 .nr(4)
12958 .kr(2)
12959 .sr(4)
12960 .m(3)
12961 .n(4)
12962 .k(8)
12963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12964 }
12965
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)12966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
12967 TEST_REQUIRES_X86_SSE2;
12968 GemmMicrokernelTester()
12969 .mr(3)
12970 .nr(4)
12971 .kr(2)
12972 .sr(4)
12973 .m(3)
12974 .n(4)
12975 .k(8)
12976 .cn_stride(7)
12977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12978 }
12979
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)12980 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
12981 TEST_REQUIRES_X86_SSE2;
12982 for (uint32_t n = 1; n <= 4; n++) {
12983 for (uint32_t m = 1; m <= 3; m++) {
12984 GemmMicrokernelTester()
12985 .mr(3)
12986 .nr(4)
12987 .kr(2)
12988 .sr(4)
12989 .m(m)
12990 .n(n)
12991 .k(8)
12992 .iterations(1)
12993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12994 }
12995 }
12996 }
12997
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)12998 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
12999 TEST_REQUIRES_X86_SSE2;
13000 for (uint32_t m = 1; m <= 3; m++) {
13001 GemmMicrokernelTester()
13002 .mr(3)
13003 .nr(4)
13004 .kr(2)
13005 .sr(4)
13006 .m(m)
13007 .n(4)
13008 .k(8)
13009 .iterations(1)
13010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13011 }
13012 }
13013
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)13014 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
13015 TEST_REQUIRES_X86_SSE2;
13016 for (uint32_t n = 1; n <= 4; n++) {
13017 GemmMicrokernelTester()
13018 .mr(3)
13019 .nr(4)
13020 .kr(2)
13021 .sr(4)
13022 .m(3)
13023 .n(n)
13024 .k(8)
13025 .iterations(1)
13026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13027 }
13028 }
13029
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)13030 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
13031 TEST_REQUIRES_X86_SSE2;
13032 for (size_t k = 1; k < 8; k++) {
13033 GemmMicrokernelTester()
13034 .mr(3)
13035 .nr(4)
13036 .kr(2)
13037 .sr(4)
13038 .m(3)
13039 .n(4)
13040 .k(k)
13041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13042 }
13043 }
13044
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)13045 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
13046 TEST_REQUIRES_X86_SSE2;
13047 for (size_t k = 1; k < 8; k++) {
13048 for (uint32_t n = 1; n <= 4; n++) {
13049 for (uint32_t m = 1; m <= 3; m++) {
13050 GemmMicrokernelTester()
13051 .mr(3)
13052 .nr(4)
13053 .kr(2)
13054 .sr(4)
13055 .m(m)
13056 .n(n)
13057 .k(k)
13058 .iterations(1)
13059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13060 }
13061 }
13062 }
13063 }
13064
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)13065 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
13066 TEST_REQUIRES_X86_SSE2;
13067 for (size_t k = 9; k < 16; k++) {
13068 GemmMicrokernelTester()
13069 .mr(3)
13070 .nr(4)
13071 .kr(2)
13072 .sr(4)
13073 .m(3)
13074 .n(4)
13075 .k(k)
13076 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13077 }
13078 }
13079
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)13080 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
13081 TEST_REQUIRES_X86_SSE2;
13082 for (size_t k = 9; k < 16; k++) {
13083 for (uint32_t n = 1; n <= 4; n++) {
13084 for (uint32_t m = 1; m <= 3; m++) {
13085 GemmMicrokernelTester()
13086 .mr(3)
13087 .nr(4)
13088 .kr(2)
13089 .sr(4)
13090 .m(m)
13091 .n(n)
13092 .k(k)
13093 .iterations(1)
13094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13095 }
13096 }
13097 }
13098 }
13099
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)13100 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
13101 TEST_REQUIRES_X86_SSE2;
13102 for (size_t k = 16; k <= 80; k += 8) {
13103 GemmMicrokernelTester()
13104 .mr(3)
13105 .nr(4)
13106 .kr(2)
13107 .sr(4)
13108 .m(3)
13109 .n(4)
13110 .k(k)
13111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13112 }
13113 }
13114
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)13115 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
13116 TEST_REQUIRES_X86_SSE2;
13117 for (size_t k = 16; k <= 80; k += 8) {
13118 for (uint32_t n = 1; n <= 4; n++) {
13119 for (uint32_t m = 1; m <= 3; m++) {
13120 GemmMicrokernelTester()
13121 .mr(3)
13122 .nr(4)
13123 .kr(2)
13124 .sr(4)
13125 .m(m)
13126 .n(n)
13127 .k(k)
13128 .iterations(1)
13129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13130 }
13131 }
13132 }
13133 }
13134
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)13135 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
13136 TEST_REQUIRES_X86_SSE2;
13137 for (uint32_t n = 5; n < 8; n++) {
13138 for (size_t k = 1; k <= 40; k += 9) {
13139 GemmMicrokernelTester()
13140 .mr(3)
13141 .nr(4)
13142 .kr(2)
13143 .sr(4)
13144 .m(3)
13145 .n(n)
13146 .k(k)
13147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13148 }
13149 }
13150 }
13151
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)13152 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
13153 TEST_REQUIRES_X86_SSE2;
13154 for (uint32_t n = 5; n < 8; n++) {
13155 for (size_t k = 1; k <= 40; k += 9) {
13156 GemmMicrokernelTester()
13157 .mr(3)
13158 .nr(4)
13159 .kr(2)
13160 .sr(4)
13161 .m(3)
13162 .n(n)
13163 .k(k)
13164 .cn_stride(7)
13165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13166 }
13167 }
13168 }
13169
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)13170 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
13171 TEST_REQUIRES_X86_SSE2;
13172 for (uint32_t n = 5; n < 8; n++) {
13173 for (size_t k = 1; k <= 40; k += 9) {
13174 for (uint32_t m = 1; m <= 3; m++) {
13175 GemmMicrokernelTester()
13176 .mr(3)
13177 .nr(4)
13178 .kr(2)
13179 .sr(4)
13180 .m(m)
13181 .n(n)
13182 .k(k)
13183 .iterations(1)
13184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13185 }
13186 }
13187 }
13188 }
13189
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)13190 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
13191 TEST_REQUIRES_X86_SSE2;
13192 for (uint32_t n = 8; n <= 12; n += 4) {
13193 for (size_t k = 1; k <= 40; k += 9) {
13194 GemmMicrokernelTester()
13195 .mr(3)
13196 .nr(4)
13197 .kr(2)
13198 .sr(4)
13199 .m(3)
13200 .n(n)
13201 .k(k)
13202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13203 }
13204 }
13205 }
13206
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)13207 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
13208 TEST_REQUIRES_X86_SSE2;
13209 for (uint32_t n = 8; n <= 12; n += 4) {
13210 for (size_t k = 1; k <= 40; k += 9) {
13211 GemmMicrokernelTester()
13212 .mr(3)
13213 .nr(4)
13214 .kr(2)
13215 .sr(4)
13216 .m(3)
13217 .n(n)
13218 .k(k)
13219 .cn_stride(7)
13220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13221 }
13222 }
13223 }
13224
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)13225 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
13226 TEST_REQUIRES_X86_SSE2;
13227 for (uint32_t n = 8; n <= 12; n += 4) {
13228 for (size_t k = 1; k <= 40; k += 9) {
13229 for (uint32_t m = 1; m <= 3; m++) {
13230 GemmMicrokernelTester()
13231 .mr(3)
13232 .nr(4)
13233 .kr(2)
13234 .sr(4)
13235 .m(m)
13236 .n(n)
13237 .k(k)
13238 .iterations(1)
13239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13240 }
13241 }
13242 }
13243 }
13244
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel)13245 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel) {
13246 TEST_REQUIRES_X86_SSE2;
13247 for (size_t k = 1; k <= 40; k += 9) {
13248 GemmMicrokernelTester()
13249 .mr(3)
13250 .nr(4)
13251 .kr(2)
13252 .sr(4)
13253 .m(3)
13254 .n(4)
13255 .k(k)
13256 .ks(3)
13257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13258 }
13259 }
13260
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel_subtile)13261 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel_subtile) {
13262 TEST_REQUIRES_X86_SSE2;
13263 for (size_t k = 1; k <= 40; k += 9) {
13264 for (uint32_t n = 1; n <= 4; n++) {
13265 for (uint32_t m = 1; m <= 3; m++) {
13266 GemmMicrokernelTester()
13267 .mr(3)
13268 .nr(4)
13269 .kr(2)
13270 .sr(4)
13271 .m(m)
13272 .n(n)
13273 .k(k)
13274 .ks(3)
13275 .iterations(1)
13276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13277 }
13278 }
13279 }
13280 }
13281
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_small_kernel)13282 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
13283 TEST_REQUIRES_X86_SSE2;
13284 for (uint32_t n = 5; n < 8; n++) {
13285 for (size_t k = 1; k <= 40; k += 9) {
13286 GemmMicrokernelTester()
13287 .mr(3)
13288 .nr(4)
13289 .kr(2)
13290 .sr(4)
13291 .m(3)
13292 .n(n)
13293 .k(k)
13294 .ks(3)
13295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13296 }
13297 }
13298 }
13299
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_small_kernel)13300 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
13301 TEST_REQUIRES_X86_SSE2;
13302 for (uint32_t n = 8; n <= 12; n += 4) {
13303 for (size_t k = 1; k <= 40; k += 9) {
13304 GemmMicrokernelTester()
13305 .mr(3)
13306 .nr(4)
13307 .kr(2)
13308 .sr(4)
13309 .m(3)
13310 .n(n)
13311 .k(k)
13312 .ks(3)
13313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13314 }
13315 }
13316 }
13317
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)13318 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
13319 TEST_REQUIRES_X86_SSE2;
13320 for (size_t k = 1; k <= 40; k += 9) {
13321 for (uint32_t n = 1; n <= 4; n++) {
13322 for (uint32_t m = 1; m <= 3; m++) {
13323 GemmMicrokernelTester()
13324 .mr(3)
13325 .nr(4)
13326 .kr(2)
13327 .sr(4)
13328 .m(m)
13329 .n(n)
13330 .k(k)
13331 .cm_stride(7)
13332 .iterations(1)
13333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13334 }
13335 }
13336 }
13337 }
13338
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,a_offset)13339 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, a_offset) {
13340 TEST_REQUIRES_X86_SSE2;
13341 for (size_t k = 1; k <= 40; k += 9) {
13342 GemmMicrokernelTester()
13343 .mr(3)
13344 .nr(4)
13345 .kr(2)
13346 .sr(4)
13347 .m(3)
13348 .n(4)
13349 .k(k)
13350 .ks(3)
13351 .a_offset(127)
13352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13353 }
13354 }
13355
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,zero)13356 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, zero) {
13357 TEST_REQUIRES_X86_SSE2;
13358 for (size_t k = 1; k <= 40; k += 9) {
13359 for (uint32_t mz = 0; mz < 3; mz++) {
13360 GemmMicrokernelTester()
13361 .mr(3)
13362 .nr(4)
13363 .kr(2)
13364 .sr(4)
13365 .m(3)
13366 .n(4)
13367 .k(k)
13368 .ks(3)
13369 .a_offset(127)
13370 .zero_index(mz)
13371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13372 }
13373 }
13374 }
13375
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)13376 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
13377 TEST_REQUIRES_X86_SSE2;
13378 GemmMicrokernelTester()
13379 .mr(3)
13380 .nr(4)
13381 .kr(2)
13382 .sr(4)
13383 .m(3)
13384 .n(4)
13385 .k(8)
13386 .qmin(128)
13387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13388 }
13389
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)13390 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
13391 TEST_REQUIRES_X86_SSE2;
13392 GemmMicrokernelTester()
13393 .mr(3)
13394 .nr(4)
13395 .kr(2)
13396 .sr(4)
13397 .m(3)
13398 .n(4)
13399 .k(8)
13400 .qmax(128)
13401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13402 }
13403
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)13404 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
13405 TEST_REQUIRES_X86_SSE2;
13406 GemmMicrokernelTester()
13407 .mr(3)
13408 .nr(4)
13409 .kr(2)
13410 .sr(4)
13411 .m(3)
13412 .n(4)
13413 .k(8)
13414 .cm_stride(7)
13415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13416 }
13417
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_a_zero_point)13418 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_a_zero_point) {
13419 TEST_REQUIRES_X86_SSE2;
13420 for (size_t k = 1; k <= 40; k += 9) {
13421 GemmMicrokernelTester()
13422 .mr(3)
13423 .nr(4)
13424 .kr(2)
13425 .sr(4)
13426 .m(3)
13427 .n(4)
13428 .k(k)
13429 .a_zero_point(0)
13430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13431 }
13432 }
13433
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_b_zero_point)13434 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_b_zero_point) {
13435 TEST_REQUIRES_X86_SSE2;
13436 for (size_t k = 1; k <= 40; k += 9) {
13437 GemmMicrokernelTester()
13438 .mr(3)
13439 .nr(4)
13440 .kr(2)
13441 .sr(4)
13442 .m(3)
13443 .n(4)
13444 .k(k)
13445 .b_zero_point(0)
13446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13447 }
13448 }
13449
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_zero_point)13450 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_zero_point) {
13451 TEST_REQUIRES_X86_SSE2;
13452 for (size_t k = 1; k <= 40; k += 9) {
13453 GemmMicrokernelTester()
13454 .mr(3)
13455 .nr(4)
13456 .kr(2)
13457 .sr(4)
13458 .m(3)
13459 .n(4)
13460 .k(k)
13461 .a_zero_point(0)
13462 .b_zero_point(0)
13463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13464 }
13465 }
13466 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13467
13468
13469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)13470 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
13471 TEST_REQUIRES_X86_SSE41;
13472 GemmMicrokernelTester()
13473 .mr(3)
13474 .nr(4)
13475 .kr(2)
13476 .sr(4)
13477 .m(3)
13478 .n(4)
13479 .k(8)
13480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13481 }
13482
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)13483 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
13484 TEST_REQUIRES_X86_SSE41;
13485 GemmMicrokernelTester()
13486 .mr(3)
13487 .nr(4)
13488 .kr(2)
13489 .sr(4)
13490 .m(3)
13491 .n(4)
13492 .k(8)
13493 .cn_stride(7)
13494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13495 }
13496
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)13497 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
13498 TEST_REQUIRES_X86_SSE41;
13499 for (uint32_t n = 1; n <= 4; n++) {
13500 for (uint32_t m = 1; m <= 3; m++) {
13501 GemmMicrokernelTester()
13502 .mr(3)
13503 .nr(4)
13504 .kr(2)
13505 .sr(4)
13506 .m(m)
13507 .n(n)
13508 .k(8)
13509 .iterations(1)
13510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13511 }
13512 }
13513 }
13514
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)13515 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
13516 TEST_REQUIRES_X86_SSE41;
13517 for (uint32_t m = 1; m <= 3; m++) {
13518 GemmMicrokernelTester()
13519 .mr(3)
13520 .nr(4)
13521 .kr(2)
13522 .sr(4)
13523 .m(m)
13524 .n(4)
13525 .k(8)
13526 .iterations(1)
13527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13528 }
13529 }
13530
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)13531 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
13532 TEST_REQUIRES_X86_SSE41;
13533 for (uint32_t n = 1; n <= 4; n++) {
13534 GemmMicrokernelTester()
13535 .mr(3)
13536 .nr(4)
13537 .kr(2)
13538 .sr(4)
13539 .m(3)
13540 .n(n)
13541 .k(8)
13542 .iterations(1)
13543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13544 }
13545 }
13546
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)13547 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
13548 TEST_REQUIRES_X86_SSE41;
13549 for (size_t k = 1; k < 8; k++) {
13550 GemmMicrokernelTester()
13551 .mr(3)
13552 .nr(4)
13553 .kr(2)
13554 .sr(4)
13555 .m(3)
13556 .n(4)
13557 .k(k)
13558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13559 }
13560 }
13561
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)13562 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
13563 TEST_REQUIRES_X86_SSE41;
13564 for (size_t k = 1; k < 8; k++) {
13565 for (uint32_t n = 1; n <= 4; n++) {
13566 for (uint32_t m = 1; m <= 3; m++) {
13567 GemmMicrokernelTester()
13568 .mr(3)
13569 .nr(4)
13570 .kr(2)
13571 .sr(4)
13572 .m(m)
13573 .n(n)
13574 .k(k)
13575 .iterations(1)
13576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13577 }
13578 }
13579 }
13580 }
13581
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)13582 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
13583 TEST_REQUIRES_X86_SSE41;
13584 for (size_t k = 9; k < 16; k++) {
13585 GemmMicrokernelTester()
13586 .mr(3)
13587 .nr(4)
13588 .kr(2)
13589 .sr(4)
13590 .m(3)
13591 .n(4)
13592 .k(k)
13593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13594 }
13595 }
13596
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)13597 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
13598 TEST_REQUIRES_X86_SSE41;
13599 for (size_t k = 9; k < 16; k++) {
13600 for (uint32_t n = 1; n <= 4; n++) {
13601 for (uint32_t m = 1; m <= 3; m++) {
13602 GemmMicrokernelTester()
13603 .mr(3)
13604 .nr(4)
13605 .kr(2)
13606 .sr(4)
13607 .m(m)
13608 .n(n)
13609 .k(k)
13610 .iterations(1)
13611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13612 }
13613 }
13614 }
13615 }
13616
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)13617 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
13618 TEST_REQUIRES_X86_SSE41;
13619 for (size_t k = 16; k <= 80; k += 8) {
13620 GemmMicrokernelTester()
13621 .mr(3)
13622 .nr(4)
13623 .kr(2)
13624 .sr(4)
13625 .m(3)
13626 .n(4)
13627 .k(k)
13628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13629 }
13630 }
13631
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)13632 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
13633 TEST_REQUIRES_X86_SSE41;
13634 for (size_t k = 16; k <= 80; k += 8) {
13635 for (uint32_t n = 1; n <= 4; n++) {
13636 for (uint32_t m = 1; m <= 3; m++) {
13637 GemmMicrokernelTester()
13638 .mr(3)
13639 .nr(4)
13640 .kr(2)
13641 .sr(4)
13642 .m(m)
13643 .n(n)
13644 .k(k)
13645 .iterations(1)
13646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13647 }
13648 }
13649 }
13650 }
13651
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)13652 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
13653 TEST_REQUIRES_X86_SSE41;
13654 for (uint32_t n = 5; n < 8; n++) {
13655 for (size_t k = 1; k <= 40; k += 9) {
13656 GemmMicrokernelTester()
13657 .mr(3)
13658 .nr(4)
13659 .kr(2)
13660 .sr(4)
13661 .m(3)
13662 .n(n)
13663 .k(k)
13664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13665 }
13666 }
13667 }
13668
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)13669 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
13670 TEST_REQUIRES_X86_SSE41;
13671 for (uint32_t n = 5; n < 8; n++) {
13672 for (size_t k = 1; k <= 40; k += 9) {
13673 GemmMicrokernelTester()
13674 .mr(3)
13675 .nr(4)
13676 .kr(2)
13677 .sr(4)
13678 .m(3)
13679 .n(n)
13680 .k(k)
13681 .cn_stride(7)
13682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13683 }
13684 }
13685 }
13686
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)13687 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
13688 TEST_REQUIRES_X86_SSE41;
13689 for (uint32_t n = 5; n < 8; n++) {
13690 for (size_t k = 1; k <= 40; k += 9) {
13691 for (uint32_t m = 1; m <= 3; m++) {
13692 GemmMicrokernelTester()
13693 .mr(3)
13694 .nr(4)
13695 .kr(2)
13696 .sr(4)
13697 .m(m)
13698 .n(n)
13699 .k(k)
13700 .iterations(1)
13701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13702 }
13703 }
13704 }
13705 }
13706
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)13707 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
13708 TEST_REQUIRES_X86_SSE41;
13709 for (uint32_t n = 8; n <= 12; n += 4) {
13710 for (size_t k = 1; k <= 40; k += 9) {
13711 GemmMicrokernelTester()
13712 .mr(3)
13713 .nr(4)
13714 .kr(2)
13715 .sr(4)
13716 .m(3)
13717 .n(n)
13718 .k(k)
13719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13720 }
13721 }
13722 }
13723
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)13724 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
13725 TEST_REQUIRES_X86_SSE41;
13726 for (uint32_t n = 8; n <= 12; n += 4) {
13727 for (size_t k = 1; k <= 40; k += 9) {
13728 GemmMicrokernelTester()
13729 .mr(3)
13730 .nr(4)
13731 .kr(2)
13732 .sr(4)
13733 .m(3)
13734 .n(n)
13735 .k(k)
13736 .cn_stride(7)
13737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13738 }
13739 }
13740 }
13741
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)13742 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
13743 TEST_REQUIRES_X86_SSE41;
13744 for (uint32_t n = 8; n <= 12; n += 4) {
13745 for (size_t k = 1; k <= 40; k += 9) {
13746 for (uint32_t m = 1; m <= 3; m++) {
13747 GemmMicrokernelTester()
13748 .mr(3)
13749 .nr(4)
13750 .kr(2)
13751 .sr(4)
13752 .m(m)
13753 .n(n)
13754 .k(k)
13755 .iterations(1)
13756 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13757 }
13758 }
13759 }
13760 }
13761
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel)13762 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel) {
13763 TEST_REQUIRES_X86_SSE41;
13764 for (size_t k = 1; k <= 40; k += 9) {
13765 GemmMicrokernelTester()
13766 .mr(3)
13767 .nr(4)
13768 .kr(2)
13769 .sr(4)
13770 .m(3)
13771 .n(4)
13772 .k(k)
13773 .ks(3)
13774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13775 }
13776 }
13777
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel_subtile)13778 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel_subtile) {
13779 TEST_REQUIRES_X86_SSE41;
13780 for (size_t k = 1; k <= 40; k += 9) {
13781 for (uint32_t n = 1; n <= 4; n++) {
13782 for (uint32_t m = 1; m <= 3; m++) {
13783 GemmMicrokernelTester()
13784 .mr(3)
13785 .nr(4)
13786 .kr(2)
13787 .sr(4)
13788 .m(m)
13789 .n(n)
13790 .k(k)
13791 .ks(3)
13792 .iterations(1)
13793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13794 }
13795 }
13796 }
13797 }
13798
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_small_kernel)13799 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
13800 TEST_REQUIRES_X86_SSE41;
13801 for (uint32_t n = 5; n < 8; n++) {
13802 for (size_t k = 1; k <= 40; k += 9) {
13803 GemmMicrokernelTester()
13804 .mr(3)
13805 .nr(4)
13806 .kr(2)
13807 .sr(4)
13808 .m(3)
13809 .n(n)
13810 .k(k)
13811 .ks(3)
13812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_small_kernel)13817 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
13818 TEST_REQUIRES_X86_SSE41;
13819 for (uint32_t n = 8; n <= 12; n += 4) {
13820 for (size_t k = 1; k <= 40; k += 9) {
13821 GemmMicrokernelTester()
13822 .mr(3)
13823 .nr(4)
13824 .kr(2)
13825 .sr(4)
13826 .m(3)
13827 .n(n)
13828 .k(k)
13829 .ks(3)
13830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13831 }
13832 }
13833 }
13834
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)13835 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
13836 TEST_REQUIRES_X86_SSE41;
13837 for (size_t k = 1; k <= 40; k += 9) {
13838 for (uint32_t n = 1; n <= 4; n++) {
13839 for (uint32_t m = 1; m <= 3; m++) {
13840 GemmMicrokernelTester()
13841 .mr(3)
13842 .nr(4)
13843 .kr(2)
13844 .sr(4)
13845 .m(m)
13846 .n(n)
13847 .k(k)
13848 .cm_stride(7)
13849 .iterations(1)
13850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13851 }
13852 }
13853 }
13854 }
13855
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,a_offset)13856 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, a_offset) {
13857 TEST_REQUIRES_X86_SSE41;
13858 for (size_t k = 1; k <= 40; k += 9) {
13859 GemmMicrokernelTester()
13860 .mr(3)
13861 .nr(4)
13862 .kr(2)
13863 .sr(4)
13864 .m(3)
13865 .n(4)
13866 .k(k)
13867 .ks(3)
13868 .a_offset(127)
13869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13870 }
13871 }
13872
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,zero)13873 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, zero) {
13874 TEST_REQUIRES_X86_SSE41;
13875 for (size_t k = 1; k <= 40; k += 9) {
13876 for (uint32_t mz = 0; mz < 3; mz++) {
13877 GemmMicrokernelTester()
13878 .mr(3)
13879 .nr(4)
13880 .kr(2)
13881 .sr(4)
13882 .m(3)
13883 .n(4)
13884 .k(k)
13885 .ks(3)
13886 .a_offset(127)
13887 .zero_index(mz)
13888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13889 }
13890 }
13891 }
13892
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)13893 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
13894 TEST_REQUIRES_X86_SSE41;
13895 GemmMicrokernelTester()
13896 .mr(3)
13897 .nr(4)
13898 .kr(2)
13899 .sr(4)
13900 .m(3)
13901 .n(4)
13902 .k(8)
13903 .qmin(128)
13904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13905 }
13906
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)13907 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
13908 TEST_REQUIRES_X86_SSE41;
13909 GemmMicrokernelTester()
13910 .mr(3)
13911 .nr(4)
13912 .kr(2)
13913 .sr(4)
13914 .m(3)
13915 .n(4)
13916 .k(8)
13917 .qmax(128)
13918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13919 }
13920
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)13921 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
13922 TEST_REQUIRES_X86_SSE41;
13923 GemmMicrokernelTester()
13924 .mr(3)
13925 .nr(4)
13926 .kr(2)
13927 .sr(4)
13928 .m(3)
13929 .n(4)
13930 .k(8)
13931 .cm_stride(7)
13932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13933 }
13934
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_a_zero_point)13935 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_a_zero_point) {
13936 TEST_REQUIRES_X86_SSE41;
13937 for (size_t k = 1; k <= 40; k += 9) {
13938 GemmMicrokernelTester()
13939 .mr(3)
13940 .nr(4)
13941 .kr(2)
13942 .sr(4)
13943 .m(3)
13944 .n(4)
13945 .k(k)
13946 .a_zero_point(0)
13947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13948 }
13949 }
13950
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_b_zero_point)13951 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_b_zero_point) {
13952 TEST_REQUIRES_X86_SSE41;
13953 for (size_t k = 1; k <= 40; k += 9) {
13954 GemmMicrokernelTester()
13955 .mr(3)
13956 .nr(4)
13957 .kr(2)
13958 .sr(4)
13959 .m(3)
13960 .n(4)
13961 .k(k)
13962 .b_zero_point(0)
13963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13964 }
13965 }
13966
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_zero_point)13967 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_zero_point) {
13968 TEST_REQUIRES_X86_SSE41;
13969 for (size_t k = 1; k <= 40; k += 9) {
13970 GemmMicrokernelTester()
13971 .mr(3)
13972 .nr(4)
13973 .kr(2)
13974 .sr(4)
13975 .m(3)
13976 .n(4)
13977 .k(k)
13978 .a_zero_point(0)
13979 .b_zero_point(0)
13980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13981 }
13982 }
13983 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13984
13985
13986 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)13987 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
13988 TEST_REQUIRES_X86_SSE2;
13989 GemmMicrokernelTester()
13990 .mr(4)
13991 .nr(4)
13992 .kr(2)
13993 .sr(4)
13994 .m(4)
13995 .n(4)
13996 .k(8)
13997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13998 }
13999
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)14000 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
14001 TEST_REQUIRES_X86_SSE2;
14002 GemmMicrokernelTester()
14003 .mr(4)
14004 .nr(4)
14005 .kr(2)
14006 .sr(4)
14007 .m(4)
14008 .n(4)
14009 .k(8)
14010 .cn_stride(7)
14011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14012 }
14013
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)14014 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
14015 TEST_REQUIRES_X86_SSE2;
14016 for (uint32_t n = 1; n <= 4; n++) {
14017 for (uint32_t m = 1; m <= 4; m++) {
14018 GemmMicrokernelTester()
14019 .mr(4)
14020 .nr(4)
14021 .kr(2)
14022 .sr(4)
14023 .m(m)
14024 .n(n)
14025 .k(8)
14026 .iterations(1)
14027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14028 }
14029 }
14030 }
14031
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)14032 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
14033 TEST_REQUIRES_X86_SSE2;
14034 for (uint32_t m = 1; m <= 4; m++) {
14035 GemmMicrokernelTester()
14036 .mr(4)
14037 .nr(4)
14038 .kr(2)
14039 .sr(4)
14040 .m(m)
14041 .n(4)
14042 .k(8)
14043 .iterations(1)
14044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14045 }
14046 }
14047
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)14048 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
14049 TEST_REQUIRES_X86_SSE2;
14050 for (uint32_t n = 1; n <= 4; n++) {
14051 GemmMicrokernelTester()
14052 .mr(4)
14053 .nr(4)
14054 .kr(2)
14055 .sr(4)
14056 .m(4)
14057 .n(n)
14058 .k(8)
14059 .iterations(1)
14060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14061 }
14062 }
14063
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)14064 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
14065 TEST_REQUIRES_X86_SSE2;
14066 for (size_t k = 1; k < 8; k++) {
14067 GemmMicrokernelTester()
14068 .mr(4)
14069 .nr(4)
14070 .kr(2)
14071 .sr(4)
14072 .m(4)
14073 .n(4)
14074 .k(k)
14075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14076 }
14077 }
14078
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)14079 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
14080 TEST_REQUIRES_X86_SSE2;
14081 for (size_t k = 1; k < 8; k++) {
14082 for (uint32_t n = 1; n <= 4; n++) {
14083 for (uint32_t m = 1; m <= 4; m++) {
14084 GemmMicrokernelTester()
14085 .mr(4)
14086 .nr(4)
14087 .kr(2)
14088 .sr(4)
14089 .m(m)
14090 .n(n)
14091 .k(k)
14092 .iterations(1)
14093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14094 }
14095 }
14096 }
14097 }
14098
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)14099 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
14100 TEST_REQUIRES_X86_SSE2;
14101 for (size_t k = 9; k < 16; k++) {
14102 GemmMicrokernelTester()
14103 .mr(4)
14104 .nr(4)
14105 .kr(2)
14106 .sr(4)
14107 .m(4)
14108 .n(4)
14109 .k(k)
14110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14111 }
14112 }
14113
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)14114 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
14115 TEST_REQUIRES_X86_SSE2;
14116 for (size_t k = 9; k < 16; k++) {
14117 for (uint32_t n = 1; n <= 4; n++) {
14118 for (uint32_t m = 1; m <= 4; m++) {
14119 GemmMicrokernelTester()
14120 .mr(4)
14121 .nr(4)
14122 .kr(2)
14123 .sr(4)
14124 .m(m)
14125 .n(n)
14126 .k(k)
14127 .iterations(1)
14128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14129 }
14130 }
14131 }
14132 }
14133
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)14134 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
14135 TEST_REQUIRES_X86_SSE2;
14136 for (size_t k = 16; k <= 80; k += 8) {
14137 GemmMicrokernelTester()
14138 .mr(4)
14139 .nr(4)
14140 .kr(2)
14141 .sr(4)
14142 .m(4)
14143 .n(4)
14144 .k(k)
14145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14146 }
14147 }
14148
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)14149 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
14150 TEST_REQUIRES_X86_SSE2;
14151 for (size_t k = 16; k <= 80; k += 8) {
14152 for (uint32_t n = 1; n <= 4; n++) {
14153 for (uint32_t m = 1; m <= 4; m++) {
14154 GemmMicrokernelTester()
14155 .mr(4)
14156 .nr(4)
14157 .kr(2)
14158 .sr(4)
14159 .m(m)
14160 .n(n)
14161 .k(k)
14162 .iterations(1)
14163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14164 }
14165 }
14166 }
14167 }
14168
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)14169 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
14170 TEST_REQUIRES_X86_SSE2;
14171 for (uint32_t n = 5; n < 8; n++) {
14172 for (size_t k = 1; k <= 40; k += 9) {
14173 GemmMicrokernelTester()
14174 .mr(4)
14175 .nr(4)
14176 .kr(2)
14177 .sr(4)
14178 .m(4)
14179 .n(n)
14180 .k(k)
14181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14182 }
14183 }
14184 }
14185
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)14186 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
14187 TEST_REQUIRES_X86_SSE2;
14188 for (uint32_t n = 5; n < 8; n++) {
14189 for (size_t k = 1; k <= 40; k += 9) {
14190 GemmMicrokernelTester()
14191 .mr(4)
14192 .nr(4)
14193 .kr(2)
14194 .sr(4)
14195 .m(4)
14196 .n(n)
14197 .k(k)
14198 .cn_stride(7)
14199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14200 }
14201 }
14202 }
14203
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)14204 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
14205 TEST_REQUIRES_X86_SSE2;
14206 for (uint32_t n = 5; n < 8; n++) {
14207 for (size_t k = 1; k <= 40; k += 9) {
14208 for (uint32_t m = 1; m <= 4; m++) {
14209 GemmMicrokernelTester()
14210 .mr(4)
14211 .nr(4)
14212 .kr(2)
14213 .sr(4)
14214 .m(m)
14215 .n(n)
14216 .k(k)
14217 .iterations(1)
14218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14219 }
14220 }
14221 }
14222 }
14223
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)14224 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
14225 TEST_REQUIRES_X86_SSE2;
14226 for (uint32_t n = 8; n <= 12; n += 4) {
14227 for (size_t k = 1; k <= 40; k += 9) {
14228 GemmMicrokernelTester()
14229 .mr(4)
14230 .nr(4)
14231 .kr(2)
14232 .sr(4)
14233 .m(4)
14234 .n(n)
14235 .k(k)
14236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14237 }
14238 }
14239 }
14240
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)14241 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
14242 TEST_REQUIRES_X86_SSE2;
14243 for (uint32_t n = 8; n <= 12; n += 4) {
14244 for (size_t k = 1; k <= 40; k += 9) {
14245 GemmMicrokernelTester()
14246 .mr(4)
14247 .nr(4)
14248 .kr(2)
14249 .sr(4)
14250 .m(4)
14251 .n(n)
14252 .k(k)
14253 .cn_stride(7)
14254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14255 }
14256 }
14257 }
14258
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)14259 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
14260 TEST_REQUIRES_X86_SSE2;
14261 for (uint32_t n = 8; n <= 12; n += 4) {
14262 for (size_t k = 1; k <= 40; k += 9) {
14263 for (uint32_t m = 1; m <= 4; m++) {
14264 GemmMicrokernelTester()
14265 .mr(4)
14266 .nr(4)
14267 .kr(2)
14268 .sr(4)
14269 .m(m)
14270 .n(n)
14271 .k(k)
14272 .iterations(1)
14273 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14274 }
14275 }
14276 }
14277 }
14278
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel)14279 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel) {
14280 TEST_REQUIRES_X86_SSE2;
14281 for (size_t k = 1; k <= 40; k += 9) {
14282 GemmMicrokernelTester()
14283 .mr(4)
14284 .nr(4)
14285 .kr(2)
14286 .sr(4)
14287 .m(4)
14288 .n(4)
14289 .k(k)
14290 .ks(3)
14291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14292 }
14293 }
14294
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel_subtile)14295 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel_subtile) {
14296 TEST_REQUIRES_X86_SSE2;
14297 for (size_t k = 1; k <= 40; k += 9) {
14298 for (uint32_t n = 1; n <= 4; n++) {
14299 for (uint32_t m = 1; m <= 4; m++) {
14300 GemmMicrokernelTester()
14301 .mr(4)
14302 .nr(4)
14303 .kr(2)
14304 .sr(4)
14305 .m(m)
14306 .n(n)
14307 .k(k)
14308 .ks(3)
14309 .iterations(1)
14310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14311 }
14312 }
14313 }
14314 }
14315
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_small_kernel)14316 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
14317 TEST_REQUIRES_X86_SSE2;
14318 for (uint32_t n = 5; n < 8; n++) {
14319 for (size_t k = 1; k <= 40; k += 9) {
14320 GemmMicrokernelTester()
14321 .mr(4)
14322 .nr(4)
14323 .kr(2)
14324 .sr(4)
14325 .m(4)
14326 .n(n)
14327 .k(k)
14328 .ks(3)
14329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14330 }
14331 }
14332 }
14333
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_small_kernel)14334 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
14335 TEST_REQUIRES_X86_SSE2;
14336 for (uint32_t n = 8; n <= 12; n += 4) {
14337 for (size_t k = 1; k <= 40; k += 9) {
14338 GemmMicrokernelTester()
14339 .mr(4)
14340 .nr(4)
14341 .kr(2)
14342 .sr(4)
14343 .m(4)
14344 .n(n)
14345 .k(k)
14346 .ks(3)
14347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14348 }
14349 }
14350 }
14351
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)14352 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
14353 TEST_REQUIRES_X86_SSE2;
14354 for (size_t k = 1; k <= 40; k += 9) {
14355 for (uint32_t n = 1; n <= 4; n++) {
14356 for (uint32_t m = 1; m <= 4; m++) {
14357 GemmMicrokernelTester()
14358 .mr(4)
14359 .nr(4)
14360 .kr(2)
14361 .sr(4)
14362 .m(m)
14363 .n(n)
14364 .k(k)
14365 .cm_stride(7)
14366 .iterations(1)
14367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14368 }
14369 }
14370 }
14371 }
14372
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,a_offset)14373 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, a_offset) {
14374 TEST_REQUIRES_X86_SSE2;
14375 for (size_t k = 1; k <= 40; k += 9) {
14376 GemmMicrokernelTester()
14377 .mr(4)
14378 .nr(4)
14379 .kr(2)
14380 .sr(4)
14381 .m(4)
14382 .n(4)
14383 .k(k)
14384 .ks(3)
14385 .a_offset(163)
14386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14387 }
14388 }
14389
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,zero)14390 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, zero) {
14391 TEST_REQUIRES_X86_SSE2;
14392 for (size_t k = 1; k <= 40; k += 9) {
14393 for (uint32_t mz = 0; mz < 4; mz++) {
14394 GemmMicrokernelTester()
14395 .mr(4)
14396 .nr(4)
14397 .kr(2)
14398 .sr(4)
14399 .m(4)
14400 .n(4)
14401 .k(k)
14402 .ks(3)
14403 .a_offset(163)
14404 .zero_index(mz)
14405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14406 }
14407 }
14408 }
14409
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)14410 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
14411 TEST_REQUIRES_X86_SSE2;
14412 GemmMicrokernelTester()
14413 .mr(4)
14414 .nr(4)
14415 .kr(2)
14416 .sr(4)
14417 .m(4)
14418 .n(4)
14419 .k(8)
14420 .qmin(128)
14421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14422 }
14423
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)14424 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
14425 TEST_REQUIRES_X86_SSE2;
14426 GemmMicrokernelTester()
14427 .mr(4)
14428 .nr(4)
14429 .kr(2)
14430 .sr(4)
14431 .m(4)
14432 .n(4)
14433 .k(8)
14434 .qmax(128)
14435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14436 }
14437
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)14438 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
14439 TEST_REQUIRES_X86_SSE2;
14440 GemmMicrokernelTester()
14441 .mr(4)
14442 .nr(4)
14443 .kr(2)
14444 .sr(4)
14445 .m(4)
14446 .n(4)
14447 .k(8)
14448 .cm_stride(7)
14449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14450 }
14451
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_a_zero_point)14452 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_a_zero_point) {
14453 TEST_REQUIRES_X86_SSE2;
14454 for (size_t k = 1; k <= 40; k += 9) {
14455 GemmMicrokernelTester()
14456 .mr(4)
14457 .nr(4)
14458 .kr(2)
14459 .sr(4)
14460 .m(4)
14461 .n(4)
14462 .k(k)
14463 .a_zero_point(0)
14464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14465 }
14466 }
14467
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_b_zero_point)14468 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_b_zero_point) {
14469 TEST_REQUIRES_X86_SSE2;
14470 for (size_t k = 1; k <= 40; k += 9) {
14471 GemmMicrokernelTester()
14472 .mr(4)
14473 .nr(4)
14474 .kr(2)
14475 .sr(4)
14476 .m(4)
14477 .n(4)
14478 .k(k)
14479 .b_zero_point(0)
14480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14481 }
14482 }
14483
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_zero_point)14484 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_zero_point) {
14485 TEST_REQUIRES_X86_SSE2;
14486 for (size_t k = 1; k <= 40; k += 9) {
14487 GemmMicrokernelTester()
14488 .mr(4)
14489 .nr(4)
14490 .kr(2)
14491 .sr(4)
14492 .m(4)
14493 .n(4)
14494 .k(k)
14495 .a_zero_point(0)
14496 .b_zero_point(0)
14497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14498 }
14499 }
14500 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14501
14502
14503 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8)14504 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8) {
14505 TEST_REQUIRES_X86_AVX;
14506 GemmMicrokernelTester()
14507 .mr(1)
14508 .nr(4)
14509 .kr(2)
14510 .sr(4)
14511 .m(1)
14512 .n(4)
14513 .k(8)
14514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14515 }
14516
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cn)14517 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cn) {
14518 TEST_REQUIRES_X86_AVX;
14519 GemmMicrokernelTester()
14520 .mr(1)
14521 .nr(4)
14522 .kr(2)
14523 .sr(4)
14524 .m(1)
14525 .n(4)
14526 .k(8)
14527 .cn_stride(7)
14528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14529 }
14530
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile)14531 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile) {
14532 TEST_REQUIRES_X86_AVX;
14533 for (uint32_t n = 1; n <= 4; n++) {
14534 for (uint32_t m = 1; m <= 1; m++) {
14535 GemmMicrokernelTester()
14536 .mr(1)
14537 .nr(4)
14538 .kr(2)
14539 .sr(4)
14540 .m(m)
14541 .n(n)
14542 .k(8)
14543 .iterations(1)
14544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14545 }
14546 }
14547 }
14548
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_m)14549 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
14550 TEST_REQUIRES_X86_AVX;
14551 for (uint32_t m = 1; m <= 1; m++) {
14552 GemmMicrokernelTester()
14553 .mr(1)
14554 .nr(4)
14555 .kr(2)
14556 .sr(4)
14557 .m(m)
14558 .n(4)
14559 .k(8)
14560 .iterations(1)
14561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14562 }
14563 }
14564
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_n)14565 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
14566 TEST_REQUIRES_X86_AVX;
14567 for (uint32_t n = 1; n <= 4; n++) {
14568 GemmMicrokernelTester()
14569 .mr(1)
14570 .nr(4)
14571 .kr(2)
14572 .sr(4)
14573 .m(1)
14574 .n(n)
14575 .k(8)
14576 .iterations(1)
14577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14578 }
14579 }
14580
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8)14581 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8) {
14582 TEST_REQUIRES_X86_AVX;
14583 for (size_t k = 1; k < 8; k++) {
14584 GemmMicrokernelTester()
14585 .mr(1)
14586 .nr(4)
14587 .kr(2)
14588 .sr(4)
14589 .m(1)
14590 .n(4)
14591 .k(k)
14592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14593 }
14594 }
14595
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_subtile)14596 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_subtile) {
14597 TEST_REQUIRES_X86_AVX;
14598 for (size_t k = 1; k < 8; k++) {
14599 for (uint32_t n = 1; n <= 4; n++) {
14600 for (uint32_t m = 1; m <= 1; m++) {
14601 GemmMicrokernelTester()
14602 .mr(1)
14603 .nr(4)
14604 .kr(2)
14605 .sr(4)
14606 .m(m)
14607 .n(n)
14608 .k(k)
14609 .iterations(1)
14610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14611 }
14612 }
14613 }
14614 }
14615
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8)14616 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8) {
14617 TEST_REQUIRES_X86_AVX;
14618 for (size_t k = 9; k < 16; k++) {
14619 GemmMicrokernelTester()
14620 .mr(1)
14621 .nr(4)
14622 .kr(2)
14623 .sr(4)
14624 .m(1)
14625 .n(4)
14626 .k(k)
14627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14628 }
14629 }
14630
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_subtile)14631 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_subtile) {
14632 TEST_REQUIRES_X86_AVX;
14633 for (size_t k = 9; k < 16; k++) {
14634 for (uint32_t n = 1; n <= 4; n++) {
14635 for (uint32_t m = 1; m <= 1; m++) {
14636 GemmMicrokernelTester()
14637 .mr(1)
14638 .nr(4)
14639 .kr(2)
14640 .sr(4)
14641 .m(m)
14642 .n(n)
14643 .k(k)
14644 .iterations(1)
14645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14646 }
14647 }
14648 }
14649 }
14650
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8)14651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8) {
14652 TEST_REQUIRES_X86_AVX;
14653 for (size_t k = 16; k <= 80; k += 8) {
14654 GemmMicrokernelTester()
14655 .mr(1)
14656 .nr(4)
14657 .kr(2)
14658 .sr(4)
14659 .m(1)
14660 .n(4)
14661 .k(k)
14662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14663 }
14664 }
14665
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_subtile)14666 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_subtile) {
14667 TEST_REQUIRES_X86_AVX;
14668 for (size_t k = 16; k <= 80; k += 8) {
14669 for (uint32_t n = 1; n <= 4; n++) {
14670 for (uint32_t m = 1; m <= 1; m++) {
14671 GemmMicrokernelTester()
14672 .mr(1)
14673 .nr(4)
14674 .kr(2)
14675 .sr(4)
14676 .m(m)
14677 .n(n)
14678 .k(k)
14679 .iterations(1)
14680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14681 }
14682 }
14683 }
14684 }
14685
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4)14686 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4) {
14687 TEST_REQUIRES_X86_AVX;
14688 for (uint32_t n = 5; n < 8; n++) {
14689 for (size_t k = 1; k <= 40; k += 9) {
14690 GemmMicrokernelTester()
14691 .mr(1)
14692 .nr(4)
14693 .kr(2)
14694 .sr(4)
14695 .m(1)
14696 .n(n)
14697 .k(k)
14698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14699 }
14700 }
14701 }
14702
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_cn)14703 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
14704 TEST_REQUIRES_X86_AVX;
14705 for (uint32_t n = 5; n < 8; n++) {
14706 for (size_t k = 1; k <= 40; k += 9) {
14707 GemmMicrokernelTester()
14708 .mr(1)
14709 .nr(4)
14710 .kr(2)
14711 .sr(4)
14712 .m(1)
14713 .n(n)
14714 .k(k)
14715 .cn_stride(7)
14716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14717 }
14718 }
14719 }
14720
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_subtile)14721 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_subtile) {
14722 TEST_REQUIRES_X86_AVX;
14723 for (uint32_t n = 5; n < 8; n++) {
14724 for (size_t k = 1; k <= 40; k += 9) {
14725 for (uint32_t m = 1; m <= 1; m++) {
14726 GemmMicrokernelTester()
14727 .mr(1)
14728 .nr(4)
14729 .kr(2)
14730 .sr(4)
14731 .m(m)
14732 .n(n)
14733 .k(k)
14734 .iterations(1)
14735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14736 }
14737 }
14738 }
14739 }
14740
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4)14741 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4) {
14742 TEST_REQUIRES_X86_AVX;
14743 for (uint32_t n = 8; n <= 12; n += 4) {
14744 for (size_t k = 1; k <= 40; k += 9) {
14745 GemmMicrokernelTester()
14746 .mr(1)
14747 .nr(4)
14748 .kr(2)
14749 .sr(4)
14750 .m(1)
14751 .n(n)
14752 .k(k)
14753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14754 }
14755 }
14756 }
14757
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_cn)14758 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_cn) {
14759 TEST_REQUIRES_X86_AVX;
14760 for (uint32_t n = 8; n <= 12; n += 4) {
14761 for (size_t k = 1; k <= 40; k += 9) {
14762 GemmMicrokernelTester()
14763 .mr(1)
14764 .nr(4)
14765 .kr(2)
14766 .sr(4)
14767 .m(1)
14768 .n(n)
14769 .k(k)
14770 .cn_stride(7)
14771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14772 }
14773 }
14774 }
14775
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_subtile)14776 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_subtile) {
14777 TEST_REQUIRES_X86_AVX;
14778 for (uint32_t n = 8; n <= 12; n += 4) {
14779 for (size_t k = 1; k <= 40; k += 9) {
14780 for (uint32_t m = 1; m <= 1; m++) {
14781 GemmMicrokernelTester()
14782 .mr(1)
14783 .nr(4)
14784 .kr(2)
14785 .sr(4)
14786 .m(m)
14787 .n(n)
14788 .k(k)
14789 .iterations(1)
14790 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14791 }
14792 }
14793 }
14794 }
14795
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel)14796 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel) {
14797 TEST_REQUIRES_X86_AVX;
14798 for (size_t k = 1; k <= 40; k += 9) {
14799 GemmMicrokernelTester()
14800 .mr(1)
14801 .nr(4)
14802 .kr(2)
14803 .sr(4)
14804 .m(1)
14805 .n(4)
14806 .k(k)
14807 .ks(3)
14808 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14809 }
14810 }
14811
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel_subtile)14812 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel_subtile) {
14813 TEST_REQUIRES_X86_AVX;
14814 for (size_t k = 1; k <= 40; k += 9) {
14815 for (uint32_t n = 1; n <= 4; n++) {
14816 for (uint32_t m = 1; m <= 1; m++) {
14817 GemmMicrokernelTester()
14818 .mr(1)
14819 .nr(4)
14820 .kr(2)
14821 .sr(4)
14822 .m(m)
14823 .n(n)
14824 .k(k)
14825 .ks(3)
14826 .iterations(1)
14827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14828 }
14829 }
14830 }
14831 }
14832
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_small_kernel)14833 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
14834 TEST_REQUIRES_X86_AVX;
14835 for (uint32_t n = 5; n < 8; n++) {
14836 for (size_t k = 1; k <= 40; k += 9) {
14837 GemmMicrokernelTester()
14838 .mr(1)
14839 .nr(4)
14840 .kr(2)
14841 .sr(4)
14842 .m(1)
14843 .n(n)
14844 .k(k)
14845 .ks(3)
14846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14847 }
14848 }
14849 }
14850
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_small_kernel)14851 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_small_kernel) {
14852 TEST_REQUIRES_X86_AVX;
14853 for (uint32_t n = 8; n <= 12; n += 4) {
14854 for (size_t k = 1; k <= 40; k += 9) {
14855 GemmMicrokernelTester()
14856 .mr(1)
14857 .nr(4)
14858 .kr(2)
14859 .sr(4)
14860 .m(1)
14861 .n(n)
14862 .k(k)
14863 .ks(3)
14864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14865 }
14866 }
14867 }
14868
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm_subtile)14869 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm_subtile) {
14870 TEST_REQUIRES_X86_AVX;
14871 for (size_t k = 1; k <= 40; k += 9) {
14872 for (uint32_t n = 1; n <= 4; n++) {
14873 for (uint32_t m = 1; m <= 1; m++) {
14874 GemmMicrokernelTester()
14875 .mr(1)
14876 .nr(4)
14877 .kr(2)
14878 .sr(4)
14879 .m(m)
14880 .n(n)
14881 .k(k)
14882 .cm_stride(7)
14883 .iterations(1)
14884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14885 }
14886 }
14887 }
14888 }
14889
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,a_offset)14890 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, a_offset) {
14891 TEST_REQUIRES_X86_AVX;
14892 for (size_t k = 1; k <= 40; k += 9) {
14893 GemmMicrokernelTester()
14894 .mr(1)
14895 .nr(4)
14896 .kr(2)
14897 .sr(4)
14898 .m(1)
14899 .n(4)
14900 .k(k)
14901 .ks(3)
14902 .a_offset(43)
14903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14904 }
14905 }
14906
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,zero)14907 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, zero) {
14908 TEST_REQUIRES_X86_AVX;
14909 for (size_t k = 1; k <= 40; k += 9) {
14910 for (uint32_t mz = 0; mz < 1; mz++) {
14911 GemmMicrokernelTester()
14912 .mr(1)
14913 .nr(4)
14914 .kr(2)
14915 .sr(4)
14916 .m(1)
14917 .n(4)
14918 .k(k)
14919 .ks(3)
14920 .a_offset(43)
14921 .zero_index(mz)
14922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14923 }
14924 }
14925 }
14926
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmin)14927 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmin) {
14928 TEST_REQUIRES_X86_AVX;
14929 GemmMicrokernelTester()
14930 .mr(1)
14931 .nr(4)
14932 .kr(2)
14933 .sr(4)
14934 .m(1)
14935 .n(4)
14936 .k(8)
14937 .qmin(128)
14938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14939 }
14940
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmax)14941 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmax) {
14942 TEST_REQUIRES_X86_AVX;
14943 GemmMicrokernelTester()
14944 .mr(1)
14945 .nr(4)
14946 .kr(2)
14947 .sr(4)
14948 .m(1)
14949 .n(4)
14950 .k(8)
14951 .qmax(128)
14952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14953 }
14954
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm)14955 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm) {
14956 TEST_REQUIRES_X86_AVX;
14957 GemmMicrokernelTester()
14958 .mr(1)
14959 .nr(4)
14960 .kr(2)
14961 .sr(4)
14962 .m(1)
14963 .n(4)
14964 .k(8)
14965 .cm_stride(7)
14966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14967 }
14968
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_a_zero_point)14969 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_a_zero_point) {
14970 TEST_REQUIRES_X86_AVX;
14971 for (size_t k = 1; k <= 40; k += 9) {
14972 GemmMicrokernelTester()
14973 .mr(1)
14974 .nr(4)
14975 .kr(2)
14976 .sr(4)
14977 .m(1)
14978 .n(4)
14979 .k(k)
14980 .a_zero_point(0)
14981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14982 }
14983 }
14984
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_b_zero_point)14985 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_b_zero_point) {
14986 TEST_REQUIRES_X86_AVX;
14987 for (size_t k = 1; k <= 40; k += 9) {
14988 GemmMicrokernelTester()
14989 .mr(1)
14990 .nr(4)
14991 .kr(2)
14992 .sr(4)
14993 .m(1)
14994 .n(4)
14995 .k(k)
14996 .b_zero_point(0)
14997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14998 }
14999 }
15000
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_zero_point)15001 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_zero_point) {
15002 TEST_REQUIRES_X86_AVX;
15003 for (size_t k = 1; k <= 40; k += 9) {
15004 GemmMicrokernelTester()
15005 .mr(1)
15006 .nr(4)
15007 .kr(2)
15008 .sr(4)
15009 .m(1)
15010 .n(4)
15011 .k(k)
15012 .a_zero_point(0)
15013 .b_zero_point(0)
15014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15015 }
15016 }
15017 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15018
15019
15020 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8)15021 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8) {
15022 TEST_REQUIRES_X86_XOP;
15023 GemmMicrokernelTester()
15024 .mr(1)
15025 .nr(4)
15026 .kr(2)
15027 .sr(4)
15028 .m(1)
15029 .n(4)
15030 .k(8)
15031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15032 }
15033
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cn)15034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cn) {
15035 TEST_REQUIRES_X86_XOP;
15036 GemmMicrokernelTester()
15037 .mr(1)
15038 .nr(4)
15039 .kr(2)
15040 .sr(4)
15041 .m(1)
15042 .n(4)
15043 .k(8)
15044 .cn_stride(7)
15045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15046 }
15047
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile)15048 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile) {
15049 TEST_REQUIRES_X86_XOP;
15050 for (uint32_t n = 1; n <= 4; n++) {
15051 for (uint32_t m = 1; m <= 1; m++) {
15052 GemmMicrokernelTester()
15053 .mr(1)
15054 .nr(4)
15055 .kr(2)
15056 .sr(4)
15057 .m(m)
15058 .n(n)
15059 .k(8)
15060 .iterations(1)
15061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15062 }
15063 }
15064 }
15065
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_m)15066 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
15067 TEST_REQUIRES_X86_XOP;
15068 for (uint32_t m = 1; m <= 1; m++) {
15069 GemmMicrokernelTester()
15070 .mr(1)
15071 .nr(4)
15072 .kr(2)
15073 .sr(4)
15074 .m(m)
15075 .n(4)
15076 .k(8)
15077 .iterations(1)
15078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15079 }
15080 }
15081
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_n)15082 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
15083 TEST_REQUIRES_X86_XOP;
15084 for (uint32_t n = 1; n <= 4; n++) {
15085 GemmMicrokernelTester()
15086 .mr(1)
15087 .nr(4)
15088 .kr(2)
15089 .sr(4)
15090 .m(1)
15091 .n(n)
15092 .k(8)
15093 .iterations(1)
15094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15095 }
15096 }
15097
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8)15098 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8) {
15099 TEST_REQUIRES_X86_XOP;
15100 for (size_t k = 1; k < 8; k++) {
15101 GemmMicrokernelTester()
15102 .mr(1)
15103 .nr(4)
15104 .kr(2)
15105 .sr(4)
15106 .m(1)
15107 .n(4)
15108 .k(k)
15109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15110 }
15111 }
15112
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_subtile)15113 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_subtile) {
15114 TEST_REQUIRES_X86_XOP;
15115 for (size_t k = 1; k < 8; k++) {
15116 for (uint32_t n = 1; n <= 4; n++) {
15117 for (uint32_t m = 1; m <= 1; m++) {
15118 GemmMicrokernelTester()
15119 .mr(1)
15120 .nr(4)
15121 .kr(2)
15122 .sr(4)
15123 .m(m)
15124 .n(n)
15125 .k(k)
15126 .iterations(1)
15127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15128 }
15129 }
15130 }
15131 }
15132
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8)15133 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8) {
15134 TEST_REQUIRES_X86_XOP;
15135 for (size_t k = 9; k < 16; k++) {
15136 GemmMicrokernelTester()
15137 .mr(1)
15138 .nr(4)
15139 .kr(2)
15140 .sr(4)
15141 .m(1)
15142 .n(4)
15143 .k(k)
15144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15145 }
15146 }
15147
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_subtile)15148 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_subtile) {
15149 TEST_REQUIRES_X86_XOP;
15150 for (size_t k = 9; k < 16; k++) {
15151 for (uint32_t n = 1; n <= 4; n++) {
15152 for (uint32_t m = 1; m <= 1; m++) {
15153 GemmMicrokernelTester()
15154 .mr(1)
15155 .nr(4)
15156 .kr(2)
15157 .sr(4)
15158 .m(m)
15159 .n(n)
15160 .k(k)
15161 .iterations(1)
15162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15163 }
15164 }
15165 }
15166 }
15167
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8)15168 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8) {
15169 TEST_REQUIRES_X86_XOP;
15170 for (size_t k = 16; k <= 80; k += 8) {
15171 GemmMicrokernelTester()
15172 .mr(1)
15173 .nr(4)
15174 .kr(2)
15175 .sr(4)
15176 .m(1)
15177 .n(4)
15178 .k(k)
15179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15180 }
15181 }
15182
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_subtile)15183 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_subtile) {
15184 TEST_REQUIRES_X86_XOP;
15185 for (size_t k = 16; k <= 80; k += 8) {
15186 for (uint32_t n = 1; n <= 4; n++) {
15187 for (uint32_t m = 1; m <= 1; m++) {
15188 GemmMicrokernelTester()
15189 .mr(1)
15190 .nr(4)
15191 .kr(2)
15192 .sr(4)
15193 .m(m)
15194 .n(n)
15195 .k(k)
15196 .iterations(1)
15197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15198 }
15199 }
15200 }
15201 }
15202
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4)15203 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4) {
15204 TEST_REQUIRES_X86_XOP;
15205 for (uint32_t n = 5; n < 8; n++) {
15206 for (size_t k = 1; k <= 40; k += 9) {
15207 GemmMicrokernelTester()
15208 .mr(1)
15209 .nr(4)
15210 .kr(2)
15211 .sr(4)
15212 .m(1)
15213 .n(n)
15214 .k(k)
15215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15216 }
15217 }
15218 }
15219
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_cn)15220 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
15221 TEST_REQUIRES_X86_XOP;
15222 for (uint32_t n = 5; n < 8; n++) {
15223 for (size_t k = 1; k <= 40; k += 9) {
15224 GemmMicrokernelTester()
15225 .mr(1)
15226 .nr(4)
15227 .kr(2)
15228 .sr(4)
15229 .m(1)
15230 .n(n)
15231 .k(k)
15232 .cn_stride(7)
15233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15234 }
15235 }
15236 }
15237
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_subtile)15238 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_subtile) {
15239 TEST_REQUIRES_X86_XOP;
15240 for (uint32_t n = 5; n < 8; n++) {
15241 for (size_t k = 1; k <= 40; k += 9) {
15242 for (uint32_t m = 1; m <= 1; m++) {
15243 GemmMicrokernelTester()
15244 .mr(1)
15245 .nr(4)
15246 .kr(2)
15247 .sr(4)
15248 .m(m)
15249 .n(n)
15250 .k(k)
15251 .iterations(1)
15252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15253 }
15254 }
15255 }
15256 }
15257
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4)15258 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4) {
15259 TEST_REQUIRES_X86_XOP;
15260 for (uint32_t n = 8; n <= 12; n += 4) {
15261 for (size_t k = 1; k <= 40; k += 9) {
15262 GemmMicrokernelTester()
15263 .mr(1)
15264 .nr(4)
15265 .kr(2)
15266 .sr(4)
15267 .m(1)
15268 .n(n)
15269 .k(k)
15270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15271 }
15272 }
15273 }
15274
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_cn)15275 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_cn) {
15276 TEST_REQUIRES_X86_XOP;
15277 for (uint32_t n = 8; n <= 12; n += 4) {
15278 for (size_t k = 1; k <= 40; k += 9) {
15279 GemmMicrokernelTester()
15280 .mr(1)
15281 .nr(4)
15282 .kr(2)
15283 .sr(4)
15284 .m(1)
15285 .n(n)
15286 .k(k)
15287 .cn_stride(7)
15288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15289 }
15290 }
15291 }
15292
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_subtile)15293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_subtile) {
15294 TEST_REQUIRES_X86_XOP;
15295 for (uint32_t n = 8; n <= 12; n += 4) {
15296 for (size_t k = 1; k <= 40; k += 9) {
15297 for (uint32_t m = 1; m <= 1; m++) {
15298 GemmMicrokernelTester()
15299 .mr(1)
15300 .nr(4)
15301 .kr(2)
15302 .sr(4)
15303 .m(m)
15304 .n(n)
15305 .k(k)
15306 .iterations(1)
15307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15308 }
15309 }
15310 }
15311 }
15312
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel)15313 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel) {
15314 TEST_REQUIRES_X86_XOP;
15315 for (size_t k = 1; k <= 40; k += 9) {
15316 GemmMicrokernelTester()
15317 .mr(1)
15318 .nr(4)
15319 .kr(2)
15320 .sr(4)
15321 .m(1)
15322 .n(4)
15323 .k(k)
15324 .ks(3)
15325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15326 }
15327 }
15328
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel_subtile)15329 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel_subtile) {
15330 TEST_REQUIRES_X86_XOP;
15331 for (size_t k = 1; k <= 40; k += 9) {
15332 for (uint32_t n = 1; n <= 4; n++) {
15333 for (uint32_t m = 1; m <= 1; m++) {
15334 GemmMicrokernelTester()
15335 .mr(1)
15336 .nr(4)
15337 .kr(2)
15338 .sr(4)
15339 .m(m)
15340 .n(n)
15341 .k(k)
15342 .ks(3)
15343 .iterations(1)
15344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15345 }
15346 }
15347 }
15348 }
15349
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_small_kernel)15350 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
15351 TEST_REQUIRES_X86_XOP;
15352 for (uint32_t n = 5; n < 8; n++) {
15353 for (size_t k = 1; k <= 40; k += 9) {
15354 GemmMicrokernelTester()
15355 .mr(1)
15356 .nr(4)
15357 .kr(2)
15358 .sr(4)
15359 .m(1)
15360 .n(n)
15361 .k(k)
15362 .ks(3)
15363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15364 }
15365 }
15366 }
15367
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_small_kernel)15368 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_small_kernel) {
15369 TEST_REQUIRES_X86_XOP;
15370 for (uint32_t n = 8; n <= 12; n += 4) {
15371 for (size_t k = 1; k <= 40; k += 9) {
15372 GemmMicrokernelTester()
15373 .mr(1)
15374 .nr(4)
15375 .kr(2)
15376 .sr(4)
15377 .m(1)
15378 .n(n)
15379 .k(k)
15380 .ks(3)
15381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15382 }
15383 }
15384 }
15385
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm_subtile)15386 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm_subtile) {
15387 TEST_REQUIRES_X86_XOP;
15388 for (size_t k = 1; k <= 40; k += 9) {
15389 for (uint32_t n = 1; n <= 4; n++) {
15390 for (uint32_t m = 1; m <= 1; m++) {
15391 GemmMicrokernelTester()
15392 .mr(1)
15393 .nr(4)
15394 .kr(2)
15395 .sr(4)
15396 .m(m)
15397 .n(n)
15398 .k(k)
15399 .cm_stride(7)
15400 .iterations(1)
15401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15402 }
15403 }
15404 }
15405 }
15406
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,a_offset)15407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, a_offset) {
15408 TEST_REQUIRES_X86_XOP;
15409 for (size_t k = 1; k <= 40; k += 9) {
15410 GemmMicrokernelTester()
15411 .mr(1)
15412 .nr(4)
15413 .kr(2)
15414 .sr(4)
15415 .m(1)
15416 .n(4)
15417 .k(k)
15418 .ks(3)
15419 .a_offset(43)
15420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15421 }
15422 }
15423
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,zero)15424 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, zero) {
15425 TEST_REQUIRES_X86_XOP;
15426 for (size_t k = 1; k <= 40; k += 9) {
15427 for (uint32_t mz = 0; mz < 1; mz++) {
15428 GemmMicrokernelTester()
15429 .mr(1)
15430 .nr(4)
15431 .kr(2)
15432 .sr(4)
15433 .m(1)
15434 .n(4)
15435 .k(k)
15436 .ks(3)
15437 .a_offset(43)
15438 .zero_index(mz)
15439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15440 }
15441 }
15442 }
15443
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmin)15444 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmin) {
15445 TEST_REQUIRES_X86_XOP;
15446 GemmMicrokernelTester()
15447 .mr(1)
15448 .nr(4)
15449 .kr(2)
15450 .sr(4)
15451 .m(1)
15452 .n(4)
15453 .k(8)
15454 .qmin(128)
15455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15456 }
15457
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmax)15458 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmax) {
15459 TEST_REQUIRES_X86_XOP;
15460 GemmMicrokernelTester()
15461 .mr(1)
15462 .nr(4)
15463 .kr(2)
15464 .sr(4)
15465 .m(1)
15466 .n(4)
15467 .k(8)
15468 .qmax(128)
15469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15470 }
15471
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm)15472 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm) {
15473 TEST_REQUIRES_X86_XOP;
15474 GemmMicrokernelTester()
15475 .mr(1)
15476 .nr(4)
15477 .kr(2)
15478 .sr(4)
15479 .m(1)
15480 .n(4)
15481 .k(8)
15482 .cm_stride(7)
15483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15484 }
15485
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_a_zero_point)15486 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_a_zero_point) {
15487 TEST_REQUIRES_X86_XOP;
15488 for (size_t k = 1; k <= 40; k += 9) {
15489 GemmMicrokernelTester()
15490 .mr(1)
15491 .nr(4)
15492 .kr(2)
15493 .sr(4)
15494 .m(1)
15495 .n(4)
15496 .k(k)
15497 .a_zero_point(0)
15498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15499 }
15500 }
15501
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_b_zero_point)15502 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_b_zero_point) {
15503 TEST_REQUIRES_X86_XOP;
15504 for (size_t k = 1; k <= 40; k += 9) {
15505 GemmMicrokernelTester()
15506 .mr(1)
15507 .nr(4)
15508 .kr(2)
15509 .sr(4)
15510 .m(1)
15511 .n(4)
15512 .k(k)
15513 .b_zero_point(0)
15514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15515 }
15516 }
15517
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_zero_point)15518 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_zero_point) {
15519 TEST_REQUIRES_X86_XOP;
15520 for (size_t k = 1; k <= 40; k += 9) {
15521 GemmMicrokernelTester()
15522 .mr(1)
15523 .nr(4)
15524 .kr(2)
15525 .sr(4)
15526 .m(1)
15527 .n(4)
15528 .k(k)
15529 .a_zero_point(0)
15530 .b_zero_point(0)
15531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15532 }
15533 }
15534 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15535
15536
15537 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8)15538 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8) {
15539 TEST_REQUIRES_X86_AVX;
15540 GemmMicrokernelTester()
15541 .mr(2)
15542 .nr(4)
15543 .kr(2)
15544 .sr(4)
15545 .m(2)
15546 .n(4)
15547 .k(8)
15548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15549 }
15550
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cn)15551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cn) {
15552 TEST_REQUIRES_X86_AVX;
15553 GemmMicrokernelTester()
15554 .mr(2)
15555 .nr(4)
15556 .kr(2)
15557 .sr(4)
15558 .m(2)
15559 .n(4)
15560 .k(8)
15561 .cn_stride(7)
15562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15563 }
15564
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile)15565 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile) {
15566 TEST_REQUIRES_X86_AVX;
15567 for (uint32_t n = 1; n <= 4; n++) {
15568 for (uint32_t m = 1; m <= 2; m++) {
15569 GemmMicrokernelTester()
15570 .mr(2)
15571 .nr(4)
15572 .kr(2)
15573 .sr(4)
15574 .m(m)
15575 .n(n)
15576 .k(8)
15577 .iterations(1)
15578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15579 }
15580 }
15581 }
15582
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_m)15583 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
15584 TEST_REQUIRES_X86_AVX;
15585 for (uint32_t m = 1; m <= 2; m++) {
15586 GemmMicrokernelTester()
15587 .mr(2)
15588 .nr(4)
15589 .kr(2)
15590 .sr(4)
15591 .m(m)
15592 .n(4)
15593 .k(8)
15594 .iterations(1)
15595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15596 }
15597 }
15598
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_n)15599 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
15600 TEST_REQUIRES_X86_AVX;
15601 for (uint32_t n = 1; n <= 4; n++) {
15602 GemmMicrokernelTester()
15603 .mr(2)
15604 .nr(4)
15605 .kr(2)
15606 .sr(4)
15607 .m(2)
15608 .n(n)
15609 .k(8)
15610 .iterations(1)
15611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15612 }
15613 }
15614
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8)15615 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8) {
15616 TEST_REQUIRES_X86_AVX;
15617 for (size_t k = 1; k < 8; k++) {
15618 GemmMicrokernelTester()
15619 .mr(2)
15620 .nr(4)
15621 .kr(2)
15622 .sr(4)
15623 .m(2)
15624 .n(4)
15625 .k(k)
15626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15627 }
15628 }
15629
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_subtile)15630 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_subtile) {
15631 TEST_REQUIRES_X86_AVX;
15632 for (size_t k = 1; k < 8; k++) {
15633 for (uint32_t n = 1; n <= 4; n++) {
15634 for (uint32_t m = 1; m <= 2; m++) {
15635 GemmMicrokernelTester()
15636 .mr(2)
15637 .nr(4)
15638 .kr(2)
15639 .sr(4)
15640 .m(m)
15641 .n(n)
15642 .k(k)
15643 .iterations(1)
15644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15645 }
15646 }
15647 }
15648 }
15649
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8)15650 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8) {
15651 TEST_REQUIRES_X86_AVX;
15652 for (size_t k = 9; k < 16; k++) {
15653 GemmMicrokernelTester()
15654 .mr(2)
15655 .nr(4)
15656 .kr(2)
15657 .sr(4)
15658 .m(2)
15659 .n(4)
15660 .k(k)
15661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15662 }
15663 }
15664
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_subtile)15665 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_subtile) {
15666 TEST_REQUIRES_X86_AVX;
15667 for (size_t k = 9; k < 16; k++) {
15668 for (uint32_t n = 1; n <= 4; n++) {
15669 for (uint32_t m = 1; m <= 2; m++) {
15670 GemmMicrokernelTester()
15671 .mr(2)
15672 .nr(4)
15673 .kr(2)
15674 .sr(4)
15675 .m(m)
15676 .n(n)
15677 .k(k)
15678 .iterations(1)
15679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15680 }
15681 }
15682 }
15683 }
15684
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8)15685 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8) {
15686 TEST_REQUIRES_X86_AVX;
15687 for (size_t k = 16; k <= 80; k += 8) {
15688 GemmMicrokernelTester()
15689 .mr(2)
15690 .nr(4)
15691 .kr(2)
15692 .sr(4)
15693 .m(2)
15694 .n(4)
15695 .k(k)
15696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15697 }
15698 }
15699
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_subtile)15700 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_subtile) {
15701 TEST_REQUIRES_X86_AVX;
15702 for (size_t k = 16; k <= 80; k += 8) {
15703 for (uint32_t n = 1; n <= 4; n++) {
15704 for (uint32_t m = 1; m <= 2; m++) {
15705 GemmMicrokernelTester()
15706 .mr(2)
15707 .nr(4)
15708 .kr(2)
15709 .sr(4)
15710 .m(m)
15711 .n(n)
15712 .k(k)
15713 .iterations(1)
15714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15715 }
15716 }
15717 }
15718 }
15719
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4)15720 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4) {
15721 TEST_REQUIRES_X86_AVX;
15722 for (uint32_t n = 5; n < 8; n++) {
15723 for (size_t k = 1; k <= 40; k += 9) {
15724 GemmMicrokernelTester()
15725 .mr(2)
15726 .nr(4)
15727 .kr(2)
15728 .sr(4)
15729 .m(2)
15730 .n(n)
15731 .k(k)
15732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15733 }
15734 }
15735 }
15736
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_cn)15737 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
15738 TEST_REQUIRES_X86_AVX;
15739 for (uint32_t n = 5; n < 8; n++) {
15740 for (size_t k = 1; k <= 40; k += 9) {
15741 GemmMicrokernelTester()
15742 .mr(2)
15743 .nr(4)
15744 .kr(2)
15745 .sr(4)
15746 .m(2)
15747 .n(n)
15748 .k(k)
15749 .cn_stride(7)
15750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15751 }
15752 }
15753 }
15754
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_subtile)15755 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_subtile) {
15756 TEST_REQUIRES_X86_AVX;
15757 for (uint32_t n = 5; n < 8; n++) {
15758 for (size_t k = 1; k <= 40; k += 9) {
15759 for (uint32_t m = 1; m <= 2; m++) {
15760 GemmMicrokernelTester()
15761 .mr(2)
15762 .nr(4)
15763 .kr(2)
15764 .sr(4)
15765 .m(m)
15766 .n(n)
15767 .k(k)
15768 .iterations(1)
15769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15770 }
15771 }
15772 }
15773 }
15774
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4)15775 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4) {
15776 TEST_REQUIRES_X86_AVX;
15777 for (uint32_t n = 8; n <= 12; n += 4) {
15778 for (size_t k = 1; k <= 40; k += 9) {
15779 GemmMicrokernelTester()
15780 .mr(2)
15781 .nr(4)
15782 .kr(2)
15783 .sr(4)
15784 .m(2)
15785 .n(n)
15786 .k(k)
15787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15788 }
15789 }
15790 }
15791
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_cn)15792 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_cn) {
15793 TEST_REQUIRES_X86_AVX;
15794 for (uint32_t n = 8; n <= 12; n += 4) {
15795 for (size_t k = 1; k <= 40; k += 9) {
15796 GemmMicrokernelTester()
15797 .mr(2)
15798 .nr(4)
15799 .kr(2)
15800 .sr(4)
15801 .m(2)
15802 .n(n)
15803 .k(k)
15804 .cn_stride(7)
15805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15806 }
15807 }
15808 }
15809
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_subtile)15810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_subtile) {
15811 TEST_REQUIRES_X86_AVX;
15812 for (uint32_t n = 8; n <= 12; n += 4) {
15813 for (size_t k = 1; k <= 40; k += 9) {
15814 for (uint32_t m = 1; m <= 2; m++) {
15815 GemmMicrokernelTester()
15816 .mr(2)
15817 .nr(4)
15818 .kr(2)
15819 .sr(4)
15820 .m(m)
15821 .n(n)
15822 .k(k)
15823 .iterations(1)
15824 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15825 }
15826 }
15827 }
15828 }
15829
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel)15830 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel) {
15831 TEST_REQUIRES_X86_AVX;
15832 for (size_t k = 1; k <= 40; k += 9) {
15833 GemmMicrokernelTester()
15834 .mr(2)
15835 .nr(4)
15836 .kr(2)
15837 .sr(4)
15838 .m(2)
15839 .n(4)
15840 .k(k)
15841 .ks(3)
15842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15843 }
15844 }
15845
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel_subtile)15846 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel_subtile) {
15847 TEST_REQUIRES_X86_AVX;
15848 for (size_t k = 1; k <= 40; k += 9) {
15849 for (uint32_t n = 1; n <= 4; n++) {
15850 for (uint32_t m = 1; m <= 2; m++) {
15851 GemmMicrokernelTester()
15852 .mr(2)
15853 .nr(4)
15854 .kr(2)
15855 .sr(4)
15856 .m(m)
15857 .n(n)
15858 .k(k)
15859 .ks(3)
15860 .iterations(1)
15861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15862 }
15863 }
15864 }
15865 }
15866
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_small_kernel)15867 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
15868 TEST_REQUIRES_X86_AVX;
15869 for (uint32_t n = 5; n < 8; n++) {
15870 for (size_t k = 1; k <= 40; k += 9) {
15871 GemmMicrokernelTester()
15872 .mr(2)
15873 .nr(4)
15874 .kr(2)
15875 .sr(4)
15876 .m(2)
15877 .n(n)
15878 .k(k)
15879 .ks(3)
15880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15881 }
15882 }
15883 }
15884
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_small_kernel)15885 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_small_kernel) {
15886 TEST_REQUIRES_X86_AVX;
15887 for (uint32_t n = 8; n <= 12; n += 4) {
15888 for (size_t k = 1; k <= 40; k += 9) {
15889 GemmMicrokernelTester()
15890 .mr(2)
15891 .nr(4)
15892 .kr(2)
15893 .sr(4)
15894 .m(2)
15895 .n(n)
15896 .k(k)
15897 .ks(3)
15898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15899 }
15900 }
15901 }
15902
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm_subtile)15903 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm_subtile) {
15904 TEST_REQUIRES_X86_AVX;
15905 for (size_t k = 1; k <= 40; k += 9) {
15906 for (uint32_t n = 1; n <= 4; n++) {
15907 for (uint32_t m = 1; m <= 2; m++) {
15908 GemmMicrokernelTester()
15909 .mr(2)
15910 .nr(4)
15911 .kr(2)
15912 .sr(4)
15913 .m(m)
15914 .n(n)
15915 .k(k)
15916 .cm_stride(7)
15917 .iterations(1)
15918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15919 }
15920 }
15921 }
15922 }
15923
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,a_offset)15924 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, a_offset) {
15925 TEST_REQUIRES_X86_AVX;
15926 for (size_t k = 1; k <= 40; k += 9) {
15927 GemmMicrokernelTester()
15928 .mr(2)
15929 .nr(4)
15930 .kr(2)
15931 .sr(4)
15932 .m(2)
15933 .n(4)
15934 .k(k)
15935 .ks(3)
15936 .a_offset(83)
15937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15938 }
15939 }
15940
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,zero)15941 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, zero) {
15942 TEST_REQUIRES_X86_AVX;
15943 for (size_t k = 1; k <= 40; k += 9) {
15944 for (uint32_t mz = 0; mz < 2; mz++) {
15945 GemmMicrokernelTester()
15946 .mr(2)
15947 .nr(4)
15948 .kr(2)
15949 .sr(4)
15950 .m(2)
15951 .n(4)
15952 .k(k)
15953 .ks(3)
15954 .a_offset(83)
15955 .zero_index(mz)
15956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15957 }
15958 }
15959 }
15960
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmin)15961 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmin) {
15962 TEST_REQUIRES_X86_AVX;
15963 GemmMicrokernelTester()
15964 .mr(2)
15965 .nr(4)
15966 .kr(2)
15967 .sr(4)
15968 .m(2)
15969 .n(4)
15970 .k(8)
15971 .qmin(128)
15972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15973 }
15974
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmax)15975 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmax) {
15976 TEST_REQUIRES_X86_AVX;
15977 GemmMicrokernelTester()
15978 .mr(2)
15979 .nr(4)
15980 .kr(2)
15981 .sr(4)
15982 .m(2)
15983 .n(4)
15984 .k(8)
15985 .qmax(128)
15986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15987 }
15988
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm)15989 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm) {
15990 TEST_REQUIRES_X86_AVX;
15991 GemmMicrokernelTester()
15992 .mr(2)
15993 .nr(4)
15994 .kr(2)
15995 .sr(4)
15996 .m(2)
15997 .n(4)
15998 .k(8)
15999 .cm_stride(7)
16000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16001 }
16002
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_a_zero_point)16003 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_a_zero_point) {
16004 TEST_REQUIRES_X86_AVX;
16005 for (size_t k = 1; k <= 40; k += 9) {
16006 GemmMicrokernelTester()
16007 .mr(2)
16008 .nr(4)
16009 .kr(2)
16010 .sr(4)
16011 .m(2)
16012 .n(4)
16013 .k(k)
16014 .a_zero_point(0)
16015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16016 }
16017 }
16018
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_b_zero_point)16019 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_b_zero_point) {
16020 TEST_REQUIRES_X86_AVX;
16021 for (size_t k = 1; k <= 40; k += 9) {
16022 GemmMicrokernelTester()
16023 .mr(2)
16024 .nr(4)
16025 .kr(2)
16026 .sr(4)
16027 .m(2)
16028 .n(4)
16029 .k(k)
16030 .b_zero_point(0)
16031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16032 }
16033 }
16034
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_zero_point)16035 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_zero_point) {
16036 TEST_REQUIRES_X86_AVX;
16037 for (size_t k = 1; k <= 40; k += 9) {
16038 GemmMicrokernelTester()
16039 .mr(2)
16040 .nr(4)
16041 .kr(2)
16042 .sr(4)
16043 .m(2)
16044 .n(4)
16045 .k(k)
16046 .a_zero_point(0)
16047 .b_zero_point(0)
16048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16049 }
16050 }
16051 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16052
16053
16054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8)16055 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8) {
16056 TEST_REQUIRES_X86_XOP;
16057 GemmMicrokernelTester()
16058 .mr(2)
16059 .nr(4)
16060 .kr(2)
16061 .sr(4)
16062 .m(2)
16063 .n(4)
16064 .k(8)
16065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16066 }
16067
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cn)16068 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cn) {
16069 TEST_REQUIRES_X86_XOP;
16070 GemmMicrokernelTester()
16071 .mr(2)
16072 .nr(4)
16073 .kr(2)
16074 .sr(4)
16075 .m(2)
16076 .n(4)
16077 .k(8)
16078 .cn_stride(7)
16079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16080 }
16081
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile)16082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile) {
16083 TEST_REQUIRES_X86_XOP;
16084 for (uint32_t n = 1; n <= 4; n++) {
16085 for (uint32_t m = 1; m <= 2; m++) {
16086 GemmMicrokernelTester()
16087 .mr(2)
16088 .nr(4)
16089 .kr(2)
16090 .sr(4)
16091 .m(m)
16092 .n(n)
16093 .k(8)
16094 .iterations(1)
16095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16096 }
16097 }
16098 }
16099
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_m)16100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
16101 TEST_REQUIRES_X86_XOP;
16102 for (uint32_t m = 1; m <= 2; m++) {
16103 GemmMicrokernelTester()
16104 .mr(2)
16105 .nr(4)
16106 .kr(2)
16107 .sr(4)
16108 .m(m)
16109 .n(4)
16110 .k(8)
16111 .iterations(1)
16112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16113 }
16114 }
16115
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_n)16116 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
16117 TEST_REQUIRES_X86_XOP;
16118 for (uint32_t n = 1; n <= 4; n++) {
16119 GemmMicrokernelTester()
16120 .mr(2)
16121 .nr(4)
16122 .kr(2)
16123 .sr(4)
16124 .m(2)
16125 .n(n)
16126 .k(8)
16127 .iterations(1)
16128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16129 }
16130 }
16131
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8)16132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8) {
16133 TEST_REQUIRES_X86_XOP;
16134 for (size_t k = 1; k < 8; k++) {
16135 GemmMicrokernelTester()
16136 .mr(2)
16137 .nr(4)
16138 .kr(2)
16139 .sr(4)
16140 .m(2)
16141 .n(4)
16142 .k(k)
16143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16144 }
16145 }
16146
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_subtile)16147 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_subtile) {
16148 TEST_REQUIRES_X86_XOP;
16149 for (size_t k = 1; k < 8; k++) {
16150 for (uint32_t n = 1; n <= 4; n++) {
16151 for (uint32_t m = 1; m <= 2; m++) {
16152 GemmMicrokernelTester()
16153 .mr(2)
16154 .nr(4)
16155 .kr(2)
16156 .sr(4)
16157 .m(m)
16158 .n(n)
16159 .k(k)
16160 .iterations(1)
16161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16162 }
16163 }
16164 }
16165 }
16166
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8)16167 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8) {
16168 TEST_REQUIRES_X86_XOP;
16169 for (size_t k = 9; k < 16; k++) {
16170 GemmMicrokernelTester()
16171 .mr(2)
16172 .nr(4)
16173 .kr(2)
16174 .sr(4)
16175 .m(2)
16176 .n(4)
16177 .k(k)
16178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16179 }
16180 }
16181
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_subtile)16182 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_subtile) {
16183 TEST_REQUIRES_X86_XOP;
16184 for (size_t k = 9; k < 16; k++) {
16185 for (uint32_t n = 1; n <= 4; n++) {
16186 for (uint32_t m = 1; m <= 2; m++) {
16187 GemmMicrokernelTester()
16188 .mr(2)
16189 .nr(4)
16190 .kr(2)
16191 .sr(4)
16192 .m(m)
16193 .n(n)
16194 .k(k)
16195 .iterations(1)
16196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16197 }
16198 }
16199 }
16200 }
16201
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8)16202 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8) {
16203 TEST_REQUIRES_X86_XOP;
16204 for (size_t k = 16; k <= 80; k += 8) {
16205 GemmMicrokernelTester()
16206 .mr(2)
16207 .nr(4)
16208 .kr(2)
16209 .sr(4)
16210 .m(2)
16211 .n(4)
16212 .k(k)
16213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16214 }
16215 }
16216
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_subtile)16217 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_subtile) {
16218 TEST_REQUIRES_X86_XOP;
16219 for (size_t k = 16; k <= 80; k += 8) {
16220 for (uint32_t n = 1; n <= 4; n++) {
16221 for (uint32_t m = 1; m <= 2; m++) {
16222 GemmMicrokernelTester()
16223 .mr(2)
16224 .nr(4)
16225 .kr(2)
16226 .sr(4)
16227 .m(m)
16228 .n(n)
16229 .k(k)
16230 .iterations(1)
16231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16232 }
16233 }
16234 }
16235 }
16236
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4)16237 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4) {
16238 TEST_REQUIRES_X86_XOP;
16239 for (uint32_t n = 5; n < 8; n++) {
16240 for (size_t k = 1; k <= 40; k += 9) {
16241 GemmMicrokernelTester()
16242 .mr(2)
16243 .nr(4)
16244 .kr(2)
16245 .sr(4)
16246 .m(2)
16247 .n(n)
16248 .k(k)
16249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16250 }
16251 }
16252 }
16253
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_cn)16254 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
16255 TEST_REQUIRES_X86_XOP;
16256 for (uint32_t n = 5; n < 8; n++) {
16257 for (size_t k = 1; k <= 40; k += 9) {
16258 GemmMicrokernelTester()
16259 .mr(2)
16260 .nr(4)
16261 .kr(2)
16262 .sr(4)
16263 .m(2)
16264 .n(n)
16265 .k(k)
16266 .cn_stride(7)
16267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16268 }
16269 }
16270 }
16271
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_subtile)16272 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_subtile) {
16273 TEST_REQUIRES_X86_XOP;
16274 for (uint32_t n = 5; n < 8; n++) {
16275 for (size_t k = 1; k <= 40; k += 9) {
16276 for (uint32_t m = 1; m <= 2; m++) {
16277 GemmMicrokernelTester()
16278 .mr(2)
16279 .nr(4)
16280 .kr(2)
16281 .sr(4)
16282 .m(m)
16283 .n(n)
16284 .k(k)
16285 .iterations(1)
16286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16287 }
16288 }
16289 }
16290 }
16291
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4)16292 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4) {
16293 TEST_REQUIRES_X86_XOP;
16294 for (uint32_t n = 8; n <= 12; n += 4) {
16295 for (size_t k = 1; k <= 40; k += 9) {
16296 GemmMicrokernelTester()
16297 .mr(2)
16298 .nr(4)
16299 .kr(2)
16300 .sr(4)
16301 .m(2)
16302 .n(n)
16303 .k(k)
16304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16305 }
16306 }
16307 }
16308
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_cn)16309 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_cn) {
16310 TEST_REQUIRES_X86_XOP;
16311 for (uint32_t n = 8; n <= 12; n += 4) {
16312 for (size_t k = 1; k <= 40; k += 9) {
16313 GemmMicrokernelTester()
16314 .mr(2)
16315 .nr(4)
16316 .kr(2)
16317 .sr(4)
16318 .m(2)
16319 .n(n)
16320 .k(k)
16321 .cn_stride(7)
16322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16323 }
16324 }
16325 }
16326
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_subtile)16327 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_subtile) {
16328 TEST_REQUIRES_X86_XOP;
16329 for (uint32_t n = 8; n <= 12; n += 4) {
16330 for (size_t k = 1; k <= 40; k += 9) {
16331 for (uint32_t m = 1; m <= 2; m++) {
16332 GemmMicrokernelTester()
16333 .mr(2)
16334 .nr(4)
16335 .kr(2)
16336 .sr(4)
16337 .m(m)
16338 .n(n)
16339 .k(k)
16340 .iterations(1)
16341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16342 }
16343 }
16344 }
16345 }
16346
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel)16347 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel) {
16348 TEST_REQUIRES_X86_XOP;
16349 for (size_t k = 1; k <= 40; k += 9) {
16350 GemmMicrokernelTester()
16351 .mr(2)
16352 .nr(4)
16353 .kr(2)
16354 .sr(4)
16355 .m(2)
16356 .n(4)
16357 .k(k)
16358 .ks(3)
16359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16360 }
16361 }
16362
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel_subtile)16363 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel_subtile) {
16364 TEST_REQUIRES_X86_XOP;
16365 for (size_t k = 1; k <= 40; k += 9) {
16366 for (uint32_t n = 1; n <= 4; n++) {
16367 for (uint32_t m = 1; m <= 2; m++) {
16368 GemmMicrokernelTester()
16369 .mr(2)
16370 .nr(4)
16371 .kr(2)
16372 .sr(4)
16373 .m(m)
16374 .n(n)
16375 .k(k)
16376 .ks(3)
16377 .iterations(1)
16378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16379 }
16380 }
16381 }
16382 }
16383
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_small_kernel)16384 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
16385 TEST_REQUIRES_X86_XOP;
16386 for (uint32_t n = 5; n < 8; n++) {
16387 for (size_t k = 1; k <= 40; k += 9) {
16388 GemmMicrokernelTester()
16389 .mr(2)
16390 .nr(4)
16391 .kr(2)
16392 .sr(4)
16393 .m(2)
16394 .n(n)
16395 .k(k)
16396 .ks(3)
16397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16398 }
16399 }
16400 }
16401
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_small_kernel)16402 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_small_kernel) {
16403 TEST_REQUIRES_X86_XOP;
16404 for (uint32_t n = 8; n <= 12; n += 4) {
16405 for (size_t k = 1; k <= 40; k += 9) {
16406 GemmMicrokernelTester()
16407 .mr(2)
16408 .nr(4)
16409 .kr(2)
16410 .sr(4)
16411 .m(2)
16412 .n(n)
16413 .k(k)
16414 .ks(3)
16415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16416 }
16417 }
16418 }
16419
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm_subtile)16420 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm_subtile) {
16421 TEST_REQUIRES_X86_XOP;
16422 for (size_t k = 1; k <= 40; k += 9) {
16423 for (uint32_t n = 1; n <= 4; n++) {
16424 for (uint32_t m = 1; m <= 2; m++) {
16425 GemmMicrokernelTester()
16426 .mr(2)
16427 .nr(4)
16428 .kr(2)
16429 .sr(4)
16430 .m(m)
16431 .n(n)
16432 .k(k)
16433 .cm_stride(7)
16434 .iterations(1)
16435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16436 }
16437 }
16438 }
16439 }
16440
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,a_offset)16441 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, a_offset) {
16442 TEST_REQUIRES_X86_XOP;
16443 for (size_t k = 1; k <= 40; k += 9) {
16444 GemmMicrokernelTester()
16445 .mr(2)
16446 .nr(4)
16447 .kr(2)
16448 .sr(4)
16449 .m(2)
16450 .n(4)
16451 .k(k)
16452 .ks(3)
16453 .a_offset(83)
16454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16455 }
16456 }
16457
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,zero)16458 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, zero) {
16459 TEST_REQUIRES_X86_XOP;
16460 for (size_t k = 1; k <= 40; k += 9) {
16461 for (uint32_t mz = 0; mz < 2; mz++) {
16462 GemmMicrokernelTester()
16463 .mr(2)
16464 .nr(4)
16465 .kr(2)
16466 .sr(4)
16467 .m(2)
16468 .n(4)
16469 .k(k)
16470 .ks(3)
16471 .a_offset(83)
16472 .zero_index(mz)
16473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16474 }
16475 }
16476 }
16477
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmin)16478 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmin) {
16479 TEST_REQUIRES_X86_XOP;
16480 GemmMicrokernelTester()
16481 .mr(2)
16482 .nr(4)
16483 .kr(2)
16484 .sr(4)
16485 .m(2)
16486 .n(4)
16487 .k(8)
16488 .qmin(128)
16489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16490 }
16491
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmax)16492 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmax) {
16493 TEST_REQUIRES_X86_XOP;
16494 GemmMicrokernelTester()
16495 .mr(2)
16496 .nr(4)
16497 .kr(2)
16498 .sr(4)
16499 .m(2)
16500 .n(4)
16501 .k(8)
16502 .qmax(128)
16503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16504 }
16505
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm)16506 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm) {
16507 TEST_REQUIRES_X86_XOP;
16508 GemmMicrokernelTester()
16509 .mr(2)
16510 .nr(4)
16511 .kr(2)
16512 .sr(4)
16513 .m(2)
16514 .n(4)
16515 .k(8)
16516 .cm_stride(7)
16517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16518 }
16519
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_a_zero_point)16520 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_a_zero_point) {
16521 TEST_REQUIRES_X86_XOP;
16522 for (size_t k = 1; k <= 40; k += 9) {
16523 GemmMicrokernelTester()
16524 .mr(2)
16525 .nr(4)
16526 .kr(2)
16527 .sr(4)
16528 .m(2)
16529 .n(4)
16530 .k(k)
16531 .a_zero_point(0)
16532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16533 }
16534 }
16535
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_b_zero_point)16536 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_b_zero_point) {
16537 TEST_REQUIRES_X86_XOP;
16538 for (size_t k = 1; k <= 40; k += 9) {
16539 GemmMicrokernelTester()
16540 .mr(2)
16541 .nr(4)
16542 .kr(2)
16543 .sr(4)
16544 .m(2)
16545 .n(4)
16546 .k(k)
16547 .b_zero_point(0)
16548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16549 }
16550 }
16551
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_zero_point)16552 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_zero_point) {
16553 TEST_REQUIRES_X86_XOP;
16554 for (size_t k = 1; k <= 40; k += 9) {
16555 GemmMicrokernelTester()
16556 .mr(2)
16557 .nr(4)
16558 .kr(2)
16559 .sr(4)
16560 .m(2)
16561 .n(4)
16562 .k(k)
16563 .a_zero_point(0)
16564 .b_zero_point(0)
16565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16566 }
16567 }
16568 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16569
16570
16571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8)16572 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8) {
16573 TEST_REQUIRES_X86_AVX;
16574 GemmMicrokernelTester()
16575 .mr(4)
16576 .nr(4)
16577 .kr(2)
16578 .sr(4)
16579 .m(4)
16580 .n(4)
16581 .k(8)
16582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16583 }
16584
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cn)16585 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cn) {
16586 TEST_REQUIRES_X86_AVX;
16587 GemmMicrokernelTester()
16588 .mr(4)
16589 .nr(4)
16590 .kr(2)
16591 .sr(4)
16592 .m(4)
16593 .n(4)
16594 .k(8)
16595 .cn_stride(7)
16596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16597 }
16598
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile)16599 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile) {
16600 TEST_REQUIRES_X86_AVX;
16601 for (uint32_t n = 1; n <= 4; n++) {
16602 for (uint32_t m = 1; m <= 4; m++) {
16603 GemmMicrokernelTester()
16604 .mr(4)
16605 .nr(4)
16606 .kr(2)
16607 .sr(4)
16608 .m(m)
16609 .n(n)
16610 .k(8)
16611 .iterations(1)
16612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16613 }
16614 }
16615 }
16616
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_m)16617 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
16618 TEST_REQUIRES_X86_AVX;
16619 for (uint32_t m = 1; m <= 4; m++) {
16620 GemmMicrokernelTester()
16621 .mr(4)
16622 .nr(4)
16623 .kr(2)
16624 .sr(4)
16625 .m(m)
16626 .n(4)
16627 .k(8)
16628 .iterations(1)
16629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16630 }
16631 }
16632
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_n)16633 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
16634 TEST_REQUIRES_X86_AVX;
16635 for (uint32_t n = 1; n <= 4; n++) {
16636 GemmMicrokernelTester()
16637 .mr(4)
16638 .nr(4)
16639 .kr(2)
16640 .sr(4)
16641 .m(4)
16642 .n(n)
16643 .k(8)
16644 .iterations(1)
16645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16646 }
16647 }
16648
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8)16649 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8) {
16650 TEST_REQUIRES_X86_AVX;
16651 for (size_t k = 1; k < 8; k++) {
16652 GemmMicrokernelTester()
16653 .mr(4)
16654 .nr(4)
16655 .kr(2)
16656 .sr(4)
16657 .m(4)
16658 .n(4)
16659 .k(k)
16660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16661 }
16662 }
16663
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_subtile)16664 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_subtile) {
16665 TEST_REQUIRES_X86_AVX;
16666 for (size_t k = 1; k < 8; k++) {
16667 for (uint32_t n = 1; n <= 4; n++) {
16668 for (uint32_t m = 1; m <= 4; m++) {
16669 GemmMicrokernelTester()
16670 .mr(4)
16671 .nr(4)
16672 .kr(2)
16673 .sr(4)
16674 .m(m)
16675 .n(n)
16676 .k(k)
16677 .iterations(1)
16678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16679 }
16680 }
16681 }
16682 }
16683
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8)16684 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8) {
16685 TEST_REQUIRES_X86_AVX;
16686 for (size_t k = 9; k < 16; k++) {
16687 GemmMicrokernelTester()
16688 .mr(4)
16689 .nr(4)
16690 .kr(2)
16691 .sr(4)
16692 .m(4)
16693 .n(4)
16694 .k(k)
16695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16696 }
16697 }
16698
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_subtile)16699 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_subtile) {
16700 TEST_REQUIRES_X86_AVX;
16701 for (size_t k = 9; k < 16; k++) {
16702 for (uint32_t n = 1; n <= 4; n++) {
16703 for (uint32_t m = 1; m <= 4; m++) {
16704 GemmMicrokernelTester()
16705 .mr(4)
16706 .nr(4)
16707 .kr(2)
16708 .sr(4)
16709 .m(m)
16710 .n(n)
16711 .k(k)
16712 .iterations(1)
16713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16714 }
16715 }
16716 }
16717 }
16718
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8)16719 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8) {
16720 TEST_REQUIRES_X86_AVX;
16721 for (size_t k = 16; k <= 80; k += 8) {
16722 GemmMicrokernelTester()
16723 .mr(4)
16724 .nr(4)
16725 .kr(2)
16726 .sr(4)
16727 .m(4)
16728 .n(4)
16729 .k(k)
16730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16731 }
16732 }
16733
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_subtile)16734 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_subtile) {
16735 TEST_REQUIRES_X86_AVX;
16736 for (size_t k = 16; k <= 80; k += 8) {
16737 for (uint32_t n = 1; n <= 4; n++) {
16738 for (uint32_t m = 1; m <= 4; m++) {
16739 GemmMicrokernelTester()
16740 .mr(4)
16741 .nr(4)
16742 .kr(2)
16743 .sr(4)
16744 .m(m)
16745 .n(n)
16746 .k(k)
16747 .iterations(1)
16748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16749 }
16750 }
16751 }
16752 }
16753
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4)16754 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4) {
16755 TEST_REQUIRES_X86_AVX;
16756 for (uint32_t n = 5; n < 8; n++) {
16757 for (size_t k = 1; k <= 40; k += 9) {
16758 GemmMicrokernelTester()
16759 .mr(4)
16760 .nr(4)
16761 .kr(2)
16762 .sr(4)
16763 .m(4)
16764 .n(n)
16765 .k(k)
16766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16767 }
16768 }
16769 }
16770
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_cn)16771 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
16772 TEST_REQUIRES_X86_AVX;
16773 for (uint32_t n = 5; n < 8; n++) {
16774 for (size_t k = 1; k <= 40; k += 9) {
16775 GemmMicrokernelTester()
16776 .mr(4)
16777 .nr(4)
16778 .kr(2)
16779 .sr(4)
16780 .m(4)
16781 .n(n)
16782 .k(k)
16783 .cn_stride(7)
16784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16785 }
16786 }
16787 }
16788
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_subtile)16789 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_subtile) {
16790 TEST_REQUIRES_X86_AVX;
16791 for (uint32_t n = 5; n < 8; n++) {
16792 for (size_t k = 1; k <= 40; k += 9) {
16793 for (uint32_t m = 1; m <= 4; m++) {
16794 GemmMicrokernelTester()
16795 .mr(4)
16796 .nr(4)
16797 .kr(2)
16798 .sr(4)
16799 .m(m)
16800 .n(n)
16801 .k(k)
16802 .iterations(1)
16803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16804 }
16805 }
16806 }
16807 }
16808
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4)16809 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4) {
16810 TEST_REQUIRES_X86_AVX;
16811 for (uint32_t n = 8; n <= 12; n += 4) {
16812 for (size_t k = 1; k <= 40; k += 9) {
16813 GemmMicrokernelTester()
16814 .mr(4)
16815 .nr(4)
16816 .kr(2)
16817 .sr(4)
16818 .m(4)
16819 .n(n)
16820 .k(k)
16821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16822 }
16823 }
16824 }
16825
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_cn)16826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_cn) {
16827 TEST_REQUIRES_X86_AVX;
16828 for (uint32_t n = 8; n <= 12; n += 4) {
16829 for (size_t k = 1; k <= 40; k += 9) {
16830 GemmMicrokernelTester()
16831 .mr(4)
16832 .nr(4)
16833 .kr(2)
16834 .sr(4)
16835 .m(4)
16836 .n(n)
16837 .k(k)
16838 .cn_stride(7)
16839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16840 }
16841 }
16842 }
16843
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_subtile)16844 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_subtile) {
16845 TEST_REQUIRES_X86_AVX;
16846 for (uint32_t n = 8; n <= 12; n += 4) {
16847 for (size_t k = 1; k <= 40; k += 9) {
16848 for (uint32_t m = 1; m <= 4; m++) {
16849 GemmMicrokernelTester()
16850 .mr(4)
16851 .nr(4)
16852 .kr(2)
16853 .sr(4)
16854 .m(m)
16855 .n(n)
16856 .k(k)
16857 .iterations(1)
16858 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16859 }
16860 }
16861 }
16862 }
16863
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel)16864 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel) {
16865 TEST_REQUIRES_X86_AVX;
16866 for (size_t k = 1; k <= 40; k += 9) {
16867 GemmMicrokernelTester()
16868 .mr(4)
16869 .nr(4)
16870 .kr(2)
16871 .sr(4)
16872 .m(4)
16873 .n(4)
16874 .k(k)
16875 .ks(3)
16876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16877 }
16878 }
16879
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel_subtile)16880 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel_subtile) {
16881 TEST_REQUIRES_X86_AVX;
16882 for (size_t k = 1; k <= 40; k += 9) {
16883 for (uint32_t n = 1; n <= 4; n++) {
16884 for (uint32_t m = 1; m <= 4; m++) {
16885 GemmMicrokernelTester()
16886 .mr(4)
16887 .nr(4)
16888 .kr(2)
16889 .sr(4)
16890 .m(m)
16891 .n(n)
16892 .k(k)
16893 .ks(3)
16894 .iterations(1)
16895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16896 }
16897 }
16898 }
16899 }
16900
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_small_kernel)16901 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
16902 TEST_REQUIRES_X86_AVX;
16903 for (uint32_t n = 5; n < 8; n++) {
16904 for (size_t k = 1; k <= 40; k += 9) {
16905 GemmMicrokernelTester()
16906 .mr(4)
16907 .nr(4)
16908 .kr(2)
16909 .sr(4)
16910 .m(4)
16911 .n(n)
16912 .k(k)
16913 .ks(3)
16914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16915 }
16916 }
16917 }
16918
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_small_kernel)16919 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_small_kernel) {
16920 TEST_REQUIRES_X86_AVX;
16921 for (uint32_t n = 8; n <= 12; n += 4) {
16922 for (size_t k = 1; k <= 40; k += 9) {
16923 GemmMicrokernelTester()
16924 .mr(4)
16925 .nr(4)
16926 .kr(2)
16927 .sr(4)
16928 .m(4)
16929 .n(n)
16930 .k(k)
16931 .ks(3)
16932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16933 }
16934 }
16935 }
16936
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm_subtile)16937 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm_subtile) {
16938 TEST_REQUIRES_X86_AVX;
16939 for (size_t k = 1; k <= 40; k += 9) {
16940 for (uint32_t n = 1; n <= 4; n++) {
16941 for (uint32_t m = 1; m <= 4; m++) {
16942 GemmMicrokernelTester()
16943 .mr(4)
16944 .nr(4)
16945 .kr(2)
16946 .sr(4)
16947 .m(m)
16948 .n(n)
16949 .k(k)
16950 .cm_stride(7)
16951 .iterations(1)
16952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16953 }
16954 }
16955 }
16956 }
16957
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,a_offset)16958 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, a_offset) {
16959 TEST_REQUIRES_X86_AVX;
16960 for (size_t k = 1; k <= 40; k += 9) {
16961 GemmMicrokernelTester()
16962 .mr(4)
16963 .nr(4)
16964 .kr(2)
16965 .sr(4)
16966 .m(4)
16967 .n(4)
16968 .k(k)
16969 .ks(3)
16970 .a_offset(163)
16971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16972 }
16973 }
16974
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,zero)16975 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, zero) {
16976 TEST_REQUIRES_X86_AVX;
16977 for (size_t k = 1; k <= 40; k += 9) {
16978 for (uint32_t mz = 0; mz < 4; mz++) {
16979 GemmMicrokernelTester()
16980 .mr(4)
16981 .nr(4)
16982 .kr(2)
16983 .sr(4)
16984 .m(4)
16985 .n(4)
16986 .k(k)
16987 .ks(3)
16988 .a_offset(163)
16989 .zero_index(mz)
16990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16991 }
16992 }
16993 }
16994
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmin)16995 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmin) {
16996 TEST_REQUIRES_X86_AVX;
16997 GemmMicrokernelTester()
16998 .mr(4)
16999 .nr(4)
17000 .kr(2)
17001 .sr(4)
17002 .m(4)
17003 .n(4)
17004 .k(8)
17005 .qmin(128)
17006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17007 }
17008
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmax)17009 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmax) {
17010 TEST_REQUIRES_X86_AVX;
17011 GemmMicrokernelTester()
17012 .mr(4)
17013 .nr(4)
17014 .kr(2)
17015 .sr(4)
17016 .m(4)
17017 .n(4)
17018 .k(8)
17019 .qmax(128)
17020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17021 }
17022
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm)17023 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm) {
17024 TEST_REQUIRES_X86_AVX;
17025 GemmMicrokernelTester()
17026 .mr(4)
17027 .nr(4)
17028 .kr(2)
17029 .sr(4)
17030 .m(4)
17031 .n(4)
17032 .k(8)
17033 .cm_stride(7)
17034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17035 }
17036
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_a_zero_point)17037 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_a_zero_point) {
17038 TEST_REQUIRES_X86_AVX;
17039 for (size_t k = 1; k <= 40; k += 9) {
17040 GemmMicrokernelTester()
17041 .mr(4)
17042 .nr(4)
17043 .kr(2)
17044 .sr(4)
17045 .m(4)
17046 .n(4)
17047 .k(k)
17048 .a_zero_point(0)
17049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17050 }
17051 }
17052
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_b_zero_point)17053 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_b_zero_point) {
17054 TEST_REQUIRES_X86_AVX;
17055 for (size_t k = 1; k <= 40; k += 9) {
17056 GemmMicrokernelTester()
17057 .mr(4)
17058 .nr(4)
17059 .kr(2)
17060 .sr(4)
17061 .m(4)
17062 .n(4)
17063 .k(k)
17064 .b_zero_point(0)
17065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17066 }
17067 }
17068
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_zero_point)17069 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_zero_point) {
17070 TEST_REQUIRES_X86_AVX;
17071 for (size_t k = 1; k <= 40; k += 9) {
17072 GemmMicrokernelTester()
17073 .mr(4)
17074 .nr(4)
17075 .kr(2)
17076 .sr(4)
17077 .m(4)
17078 .n(4)
17079 .k(k)
17080 .a_zero_point(0)
17081 .b_zero_point(0)
17082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17083 }
17084 }
17085 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17086
17087
17088 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8)17089 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8) {
17090 TEST_REQUIRES_X86_XOP;
17091 GemmMicrokernelTester()
17092 .mr(4)
17093 .nr(4)
17094 .kr(2)
17095 .sr(4)
17096 .m(4)
17097 .n(4)
17098 .k(8)
17099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17100 }
17101
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cn)17102 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cn) {
17103 TEST_REQUIRES_X86_XOP;
17104 GemmMicrokernelTester()
17105 .mr(4)
17106 .nr(4)
17107 .kr(2)
17108 .sr(4)
17109 .m(4)
17110 .n(4)
17111 .k(8)
17112 .cn_stride(7)
17113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17114 }
17115
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile)17116 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile) {
17117 TEST_REQUIRES_X86_XOP;
17118 for (uint32_t n = 1; n <= 4; n++) {
17119 for (uint32_t m = 1; m <= 4; m++) {
17120 GemmMicrokernelTester()
17121 .mr(4)
17122 .nr(4)
17123 .kr(2)
17124 .sr(4)
17125 .m(m)
17126 .n(n)
17127 .k(8)
17128 .iterations(1)
17129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17130 }
17131 }
17132 }
17133
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_m)17134 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
17135 TEST_REQUIRES_X86_XOP;
17136 for (uint32_t m = 1; m <= 4; m++) {
17137 GemmMicrokernelTester()
17138 .mr(4)
17139 .nr(4)
17140 .kr(2)
17141 .sr(4)
17142 .m(m)
17143 .n(4)
17144 .k(8)
17145 .iterations(1)
17146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17147 }
17148 }
17149
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_n)17150 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
17151 TEST_REQUIRES_X86_XOP;
17152 for (uint32_t n = 1; n <= 4; n++) {
17153 GemmMicrokernelTester()
17154 .mr(4)
17155 .nr(4)
17156 .kr(2)
17157 .sr(4)
17158 .m(4)
17159 .n(n)
17160 .k(8)
17161 .iterations(1)
17162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17163 }
17164 }
17165
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8)17166 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8) {
17167 TEST_REQUIRES_X86_XOP;
17168 for (size_t k = 1; k < 8; k++) {
17169 GemmMicrokernelTester()
17170 .mr(4)
17171 .nr(4)
17172 .kr(2)
17173 .sr(4)
17174 .m(4)
17175 .n(4)
17176 .k(k)
17177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17178 }
17179 }
17180
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_subtile)17181 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_subtile) {
17182 TEST_REQUIRES_X86_XOP;
17183 for (size_t k = 1; k < 8; k++) {
17184 for (uint32_t n = 1; n <= 4; n++) {
17185 for (uint32_t m = 1; m <= 4; m++) {
17186 GemmMicrokernelTester()
17187 .mr(4)
17188 .nr(4)
17189 .kr(2)
17190 .sr(4)
17191 .m(m)
17192 .n(n)
17193 .k(k)
17194 .iterations(1)
17195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17196 }
17197 }
17198 }
17199 }
17200
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8)17201 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8) {
17202 TEST_REQUIRES_X86_XOP;
17203 for (size_t k = 9; k < 16; k++) {
17204 GemmMicrokernelTester()
17205 .mr(4)
17206 .nr(4)
17207 .kr(2)
17208 .sr(4)
17209 .m(4)
17210 .n(4)
17211 .k(k)
17212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17213 }
17214 }
17215
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_subtile)17216 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_subtile) {
17217 TEST_REQUIRES_X86_XOP;
17218 for (size_t k = 9; k < 16; k++) {
17219 for (uint32_t n = 1; n <= 4; n++) {
17220 for (uint32_t m = 1; m <= 4; m++) {
17221 GemmMicrokernelTester()
17222 .mr(4)
17223 .nr(4)
17224 .kr(2)
17225 .sr(4)
17226 .m(m)
17227 .n(n)
17228 .k(k)
17229 .iterations(1)
17230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17231 }
17232 }
17233 }
17234 }
17235
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8)17236 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8) {
17237 TEST_REQUIRES_X86_XOP;
17238 for (size_t k = 16; k <= 80; k += 8) {
17239 GemmMicrokernelTester()
17240 .mr(4)
17241 .nr(4)
17242 .kr(2)
17243 .sr(4)
17244 .m(4)
17245 .n(4)
17246 .k(k)
17247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17248 }
17249 }
17250
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_subtile)17251 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_subtile) {
17252 TEST_REQUIRES_X86_XOP;
17253 for (size_t k = 16; k <= 80; k += 8) {
17254 for (uint32_t n = 1; n <= 4; n++) {
17255 for (uint32_t m = 1; m <= 4; m++) {
17256 GemmMicrokernelTester()
17257 .mr(4)
17258 .nr(4)
17259 .kr(2)
17260 .sr(4)
17261 .m(m)
17262 .n(n)
17263 .k(k)
17264 .iterations(1)
17265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17266 }
17267 }
17268 }
17269 }
17270
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4)17271 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4) {
17272 TEST_REQUIRES_X86_XOP;
17273 for (uint32_t n = 5; n < 8; n++) {
17274 for (size_t k = 1; k <= 40; k += 9) {
17275 GemmMicrokernelTester()
17276 .mr(4)
17277 .nr(4)
17278 .kr(2)
17279 .sr(4)
17280 .m(4)
17281 .n(n)
17282 .k(k)
17283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17284 }
17285 }
17286 }
17287
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_cn)17288 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
17289 TEST_REQUIRES_X86_XOP;
17290 for (uint32_t n = 5; n < 8; n++) {
17291 for (size_t k = 1; k <= 40; k += 9) {
17292 GemmMicrokernelTester()
17293 .mr(4)
17294 .nr(4)
17295 .kr(2)
17296 .sr(4)
17297 .m(4)
17298 .n(n)
17299 .k(k)
17300 .cn_stride(7)
17301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17302 }
17303 }
17304 }
17305
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_subtile)17306 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_subtile) {
17307 TEST_REQUIRES_X86_XOP;
17308 for (uint32_t n = 5; n < 8; n++) {
17309 for (size_t k = 1; k <= 40; k += 9) {
17310 for (uint32_t m = 1; m <= 4; m++) {
17311 GemmMicrokernelTester()
17312 .mr(4)
17313 .nr(4)
17314 .kr(2)
17315 .sr(4)
17316 .m(m)
17317 .n(n)
17318 .k(k)
17319 .iterations(1)
17320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17321 }
17322 }
17323 }
17324 }
17325
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4)17326 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4) {
17327 TEST_REQUIRES_X86_XOP;
17328 for (uint32_t n = 8; n <= 12; n += 4) {
17329 for (size_t k = 1; k <= 40; k += 9) {
17330 GemmMicrokernelTester()
17331 .mr(4)
17332 .nr(4)
17333 .kr(2)
17334 .sr(4)
17335 .m(4)
17336 .n(n)
17337 .k(k)
17338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17339 }
17340 }
17341 }
17342
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_cn)17343 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_cn) {
17344 TEST_REQUIRES_X86_XOP;
17345 for (uint32_t n = 8; n <= 12; n += 4) {
17346 for (size_t k = 1; k <= 40; k += 9) {
17347 GemmMicrokernelTester()
17348 .mr(4)
17349 .nr(4)
17350 .kr(2)
17351 .sr(4)
17352 .m(4)
17353 .n(n)
17354 .k(k)
17355 .cn_stride(7)
17356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17357 }
17358 }
17359 }
17360
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_subtile)17361 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_subtile) {
17362 TEST_REQUIRES_X86_XOP;
17363 for (uint32_t n = 8; n <= 12; n += 4) {
17364 for (size_t k = 1; k <= 40; k += 9) {
17365 for (uint32_t m = 1; m <= 4; m++) {
17366 GemmMicrokernelTester()
17367 .mr(4)
17368 .nr(4)
17369 .kr(2)
17370 .sr(4)
17371 .m(m)
17372 .n(n)
17373 .k(k)
17374 .iterations(1)
17375 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17376 }
17377 }
17378 }
17379 }
17380
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel)17381 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel) {
17382 TEST_REQUIRES_X86_XOP;
17383 for (size_t k = 1; k <= 40; k += 9) {
17384 GemmMicrokernelTester()
17385 .mr(4)
17386 .nr(4)
17387 .kr(2)
17388 .sr(4)
17389 .m(4)
17390 .n(4)
17391 .k(k)
17392 .ks(3)
17393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17394 }
17395 }
17396
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel_subtile)17397 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel_subtile) {
17398 TEST_REQUIRES_X86_XOP;
17399 for (size_t k = 1; k <= 40; k += 9) {
17400 for (uint32_t n = 1; n <= 4; n++) {
17401 for (uint32_t m = 1; m <= 4; m++) {
17402 GemmMicrokernelTester()
17403 .mr(4)
17404 .nr(4)
17405 .kr(2)
17406 .sr(4)
17407 .m(m)
17408 .n(n)
17409 .k(k)
17410 .ks(3)
17411 .iterations(1)
17412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17413 }
17414 }
17415 }
17416 }
17417
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_small_kernel)17418 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
17419 TEST_REQUIRES_X86_XOP;
17420 for (uint32_t n = 5; n < 8; n++) {
17421 for (size_t k = 1; k <= 40; k += 9) {
17422 GemmMicrokernelTester()
17423 .mr(4)
17424 .nr(4)
17425 .kr(2)
17426 .sr(4)
17427 .m(4)
17428 .n(n)
17429 .k(k)
17430 .ks(3)
17431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17432 }
17433 }
17434 }
17435
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_small_kernel)17436 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_small_kernel) {
17437 TEST_REQUIRES_X86_XOP;
17438 for (uint32_t n = 8; n <= 12; n += 4) {
17439 for (size_t k = 1; k <= 40; k += 9) {
17440 GemmMicrokernelTester()
17441 .mr(4)
17442 .nr(4)
17443 .kr(2)
17444 .sr(4)
17445 .m(4)
17446 .n(n)
17447 .k(k)
17448 .ks(3)
17449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17450 }
17451 }
17452 }
17453
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm_subtile)17454 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm_subtile) {
17455 TEST_REQUIRES_X86_XOP;
17456 for (size_t k = 1; k <= 40; k += 9) {
17457 for (uint32_t n = 1; n <= 4; n++) {
17458 for (uint32_t m = 1; m <= 4; m++) {
17459 GemmMicrokernelTester()
17460 .mr(4)
17461 .nr(4)
17462 .kr(2)
17463 .sr(4)
17464 .m(m)
17465 .n(n)
17466 .k(k)
17467 .cm_stride(7)
17468 .iterations(1)
17469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17470 }
17471 }
17472 }
17473 }
17474
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,a_offset)17475 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, a_offset) {
17476 TEST_REQUIRES_X86_XOP;
17477 for (size_t k = 1; k <= 40; k += 9) {
17478 GemmMicrokernelTester()
17479 .mr(4)
17480 .nr(4)
17481 .kr(2)
17482 .sr(4)
17483 .m(4)
17484 .n(4)
17485 .k(k)
17486 .ks(3)
17487 .a_offset(163)
17488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17489 }
17490 }
17491
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,zero)17492 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, zero) {
17493 TEST_REQUIRES_X86_XOP;
17494 for (size_t k = 1; k <= 40; k += 9) {
17495 for (uint32_t mz = 0; mz < 4; mz++) {
17496 GemmMicrokernelTester()
17497 .mr(4)
17498 .nr(4)
17499 .kr(2)
17500 .sr(4)
17501 .m(4)
17502 .n(4)
17503 .k(k)
17504 .ks(3)
17505 .a_offset(163)
17506 .zero_index(mz)
17507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17508 }
17509 }
17510 }
17511
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmin)17512 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmin) {
17513 TEST_REQUIRES_X86_XOP;
17514 GemmMicrokernelTester()
17515 .mr(4)
17516 .nr(4)
17517 .kr(2)
17518 .sr(4)
17519 .m(4)
17520 .n(4)
17521 .k(8)
17522 .qmin(128)
17523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17524 }
17525
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmax)17526 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmax) {
17527 TEST_REQUIRES_X86_XOP;
17528 GemmMicrokernelTester()
17529 .mr(4)
17530 .nr(4)
17531 .kr(2)
17532 .sr(4)
17533 .m(4)
17534 .n(4)
17535 .k(8)
17536 .qmax(128)
17537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17538 }
17539
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm)17540 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm) {
17541 TEST_REQUIRES_X86_XOP;
17542 GemmMicrokernelTester()
17543 .mr(4)
17544 .nr(4)
17545 .kr(2)
17546 .sr(4)
17547 .m(4)
17548 .n(4)
17549 .k(8)
17550 .cm_stride(7)
17551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17552 }
17553
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_a_zero_point)17554 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_a_zero_point) {
17555 TEST_REQUIRES_X86_XOP;
17556 for (size_t k = 1; k <= 40; k += 9) {
17557 GemmMicrokernelTester()
17558 .mr(4)
17559 .nr(4)
17560 .kr(2)
17561 .sr(4)
17562 .m(4)
17563 .n(4)
17564 .k(k)
17565 .a_zero_point(0)
17566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17567 }
17568 }
17569
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_b_zero_point)17570 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_b_zero_point) {
17571 TEST_REQUIRES_X86_XOP;
17572 for (size_t k = 1; k <= 40; k += 9) {
17573 GemmMicrokernelTester()
17574 .mr(4)
17575 .nr(4)
17576 .kr(2)
17577 .sr(4)
17578 .m(4)
17579 .n(4)
17580 .k(k)
17581 .b_zero_point(0)
17582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17583 }
17584 }
17585
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_zero_point)17586 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_zero_point) {
17587 TEST_REQUIRES_X86_XOP;
17588 for (size_t k = 1; k <= 40; k += 9) {
17589 GemmMicrokernelTester()
17590 .mr(4)
17591 .nr(4)
17592 .kr(2)
17593 .sr(4)
17594 .m(4)
17595 .n(4)
17596 .k(k)
17597 .a_zero_point(0)
17598 .b_zero_point(0)
17599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17600 }
17601 }
17602 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17603
17604
17605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)17606 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
17607 TEST_REQUIRES_X86_SSE2;
17608 GemmMicrokernelTester()
17609 .mr(1)
17610 .nr(4)
17611 .kr(2)
17612 .sr(4)
17613 .m(1)
17614 .n(4)
17615 .k(8)
17616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17617 }
17618
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)17619 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
17620 TEST_REQUIRES_X86_SSE2;
17621 GemmMicrokernelTester()
17622 .mr(1)
17623 .nr(4)
17624 .kr(2)
17625 .sr(4)
17626 .m(1)
17627 .n(4)
17628 .k(8)
17629 .cn_stride(7)
17630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17631 }
17632
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)17633 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
17634 TEST_REQUIRES_X86_SSE2;
17635 for (uint32_t n = 1; n <= 4; n++) {
17636 for (uint32_t m = 1; m <= 1; m++) {
17637 GemmMicrokernelTester()
17638 .mr(1)
17639 .nr(4)
17640 .kr(2)
17641 .sr(4)
17642 .m(m)
17643 .n(n)
17644 .k(8)
17645 .iterations(1)
17646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17647 }
17648 }
17649 }
17650
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)17651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
17652 TEST_REQUIRES_X86_SSE2;
17653 for (uint32_t m = 1; m <= 1; m++) {
17654 GemmMicrokernelTester()
17655 .mr(1)
17656 .nr(4)
17657 .kr(2)
17658 .sr(4)
17659 .m(m)
17660 .n(4)
17661 .k(8)
17662 .iterations(1)
17663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17664 }
17665 }
17666
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)17667 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
17668 TEST_REQUIRES_X86_SSE2;
17669 for (uint32_t n = 1; n <= 4; n++) {
17670 GemmMicrokernelTester()
17671 .mr(1)
17672 .nr(4)
17673 .kr(2)
17674 .sr(4)
17675 .m(1)
17676 .n(n)
17677 .k(8)
17678 .iterations(1)
17679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17680 }
17681 }
17682
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)17683 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
17684 TEST_REQUIRES_X86_SSE2;
17685 for (size_t k = 1; k < 8; k++) {
17686 GemmMicrokernelTester()
17687 .mr(1)
17688 .nr(4)
17689 .kr(2)
17690 .sr(4)
17691 .m(1)
17692 .n(4)
17693 .k(k)
17694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17695 }
17696 }
17697
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)17698 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
17699 TEST_REQUIRES_X86_SSE2;
17700 for (size_t k = 1; k < 8; k++) {
17701 for (uint32_t n = 1; n <= 4; n++) {
17702 for (uint32_t m = 1; m <= 1; m++) {
17703 GemmMicrokernelTester()
17704 .mr(1)
17705 .nr(4)
17706 .kr(2)
17707 .sr(4)
17708 .m(m)
17709 .n(n)
17710 .k(k)
17711 .iterations(1)
17712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17713 }
17714 }
17715 }
17716 }
17717
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)17718 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
17719 TEST_REQUIRES_X86_SSE2;
17720 for (size_t k = 9; k < 16; k++) {
17721 GemmMicrokernelTester()
17722 .mr(1)
17723 .nr(4)
17724 .kr(2)
17725 .sr(4)
17726 .m(1)
17727 .n(4)
17728 .k(k)
17729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17730 }
17731 }
17732
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)17733 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
17734 TEST_REQUIRES_X86_SSE2;
17735 for (size_t k = 9; k < 16; k++) {
17736 for (uint32_t n = 1; n <= 4; n++) {
17737 for (uint32_t m = 1; m <= 1; m++) {
17738 GemmMicrokernelTester()
17739 .mr(1)
17740 .nr(4)
17741 .kr(2)
17742 .sr(4)
17743 .m(m)
17744 .n(n)
17745 .k(k)
17746 .iterations(1)
17747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17748 }
17749 }
17750 }
17751 }
17752
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)17753 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
17754 TEST_REQUIRES_X86_SSE2;
17755 for (size_t k = 16; k <= 80; k += 8) {
17756 GemmMicrokernelTester()
17757 .mr(1)
17758 .nr(4)
17759 .kr(2)
17760 .sr(4)
17761 .m(1)
17762 .n(4)
17763 .k(k)
17764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17765 }
17766 }
17767
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)17768 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
17769 TEST_REQUIRES_X86_SSE2;
17770 for (size_t k = 16; k <= 80; k += 8) {
17771 for (uint32_t n = 1; n <= 4; n++) {
17772 for (uint32_t m = 1; m <= 1; m++) {
17773 GemmMicrokernelTester()
17774 .mr(1)
17775 .nr(4)
17776 .kr(2)
17777 .sr(4)
17778 .m(m)
17779 .n(n)
17780 .k(k)
17781 .iterations(1)
17782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17783 }
17784 }
17785 }
17786 }
17787
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)17788 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
17789 TEST_REQUIRES_X86_SSE2;
17790 for (uint32_t n = 5; n < 8; n++) {
17791 for (size_t k = 1; k <= 40; k += 9) {
17792 GemmMicrokernelTester()
17793 .mr(1)
17794 .nr(4)
17795 .kr(2)
17796 .sr(4)
17797 .m(1)
17798 .n(n)
17799 .k(k)
17800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17801 }
17802 }
17803 }
17804
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)17805 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
17806 TEST_REQUIRES_X86_SSE2;
17807 for (uint32_t n = 5; n < 8; n++) {
17808 for (size_t k = 1; k <= 40; k += 9) {
17809 GemmMicrokernelTester()
17810 .mr(1)
17811 .nr(4)
17812 .kr(2)
17813 .sr(4)
17814 .m(1)
17815 .n(n)
17816 .k(k)
17817 .cn_stride(7)
17818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17819 }
17820 }
17821 }
17822
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)17823 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
17824 TEST_REQUIRES_X86_SSE2;
17825 for (uint32_t n = 5; n < 8; n++) {
17826 for (size_t k = 1; k <= 40; k += 9) {
17827 for (uint32_t m = 1; m <= 1; m++) {
17828 GemmMicrokernelTester()
17829 .mr(1)
17830 .nr(4)
17831 .kr(2)
17832 .sr(4)
17833 .m(m)
17834 .n(n)
17835 .k(k)
17836 .iterations(1)
17837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17838 }
17839 }
17840 }
17841 }
17842
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)17843 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
17844 TEST_REQUIRES_X86_SSE2;
17845 for (uint32_t n = 8; n <= 12; n += 4) {
17846 for (size_t k = 1; k <= 40; k += 9) {
17847 GemmMicrokernelTester()
17848 .mr(1)
17849 .nr(4)
17850 .kr(2)
17851 .sr(4)
17852 .m(1)
17853 .n(n)
17854 .k(k)
17855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17856 }
17857 }
17858 }
17859
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)17860 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
17861 TEST_REQUIRES_X86_SSE2;
17862 for (uint32_t n = 8; n <= 12; n += 4) {
17863 for (size_t k = 1; k <= 40; k += 9) {
17864 GemmMicrokernelTester()
17865 .mr(1)
17866 .nr(4)
17867 .kr(2)
17868 .sr(4)
17869 .m(1)
17870 .n(n)
17871 .k(k)
17872 .cn_stride(7)
17873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17874 }
17875 }
17876 }
17877
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)17878 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
17879 TEST_REQUIRES_X86_SSE2;
17880 for (uint32_t n = 8; n <= 12; n += 4) {
17881 for (size_t k = 1; k <= 40; k += 9) {
17882 for (uint32_t m = 1; m <= 1; m++) {
17883 GemmMicrokernelTester()
17884 .mr(1)
17885 .nr(4)
17886 .kr(2)
17887 .sr(4)
17888 .m(m)
17889 .n(n)
17890 .k(k)
17891 .iterations(1)
17892 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17893 }
17894 }
17895 }
17896 }
17897
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel)17898 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel) {
17899 TEST_REQUIRES_X86_SSE2;
17900 for (size_t k = 1; k <= 40; k += 9) {
17901 GemmMicrokernelTester()
17902 .mr(1)
17903 .nr(4)
17904 .kr(2)
17905 .sr(4)
17906 .m(1)
17907 .n(4)
17908 .k(k)
17909 .ks(3)
17910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17911 }
17912 }
17913
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel_subtile)17914 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel_subtile) {
17915 TEST_REQUIRES_X86_SSE2;
17916 for (size_t k = 1; k <= 40; k += 9) {
17917 for (uint32_t n = 1; n <= 4; n++) {
17918 for (uint32_t m = 1; m <= 1; m++) {
17919 GemmMicrokernelTester()
17920 .mr(1)
17921 .nr(4)
17922 .kr(2)
17923 .sr(4)
17924 .m(m)
17925 .n(n)
17926 .k(k)
17927 .ks(3)
17928 .iterations(1)
17929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17930 }
17931 }
17932 }
17933 }
17934
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_small_kernel)17935 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
17936 TEST_REQUIRES_X86_SSE2;
17937 for (uint32_t n = 5; n < 8; n++) {
17938 for (size_t k = 1; k <= 40; k += 9) {
17939 GemmMicrokernelTester()
17940 .mr(1)
17941 .nr(4)
17942 .kr(2)
17943 .sr(4)
17944 .m(1)
17945 .n(n)
17946 .k(k)
17947 .ks(3)
17948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17949 }
17950 }
17951 }
17952
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_small_kernel)17953 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
17954 TEST_REQUIRES_X86_SSE2;
17955 for (uint32_t n = 8; n <= 12; n += 4) {
17956 for (size_t k = 1; k <= 40; k += 9) {
17957 GemmMicrokernelTester()
17958 .mr(1)
17959 .nr(4)
17960 .kr(2)
17961 .sr(4)
17962 .m(1)
17963 .n(n)
17964 .k(k)
17965 .ks(3)
17966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17967 }
17968 }
17969 }
17970
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)17971 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
17972 TEST_REQUIRES_X86_SSE2;
17973 for (size_t k = 1; k <= 40; k += 9) {
17974 for (uint32_t n = 1; n <= 4; n++) {
17975 for (uint32_t m = 1; m <= 1; m++) {
17976 GemmMicrokernelTester()
17977 .mr(1)
17978 .nr(4)
17979 .kr(2)
17980 .sr(4)
17981 .m(m)
17982 .n(n)
17983 .k(k)
17984 .cm_stride(7)
17985 .iterations(1)
17986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17987 }
17988 }
17989 }
17990 }
17991
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,a_offset)17992 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, a_offset) {
17993 TEST_REQUIRES_X86_SSE2;
17994 for (size_t k = 1; k <= 40; k += 9) {
17995 GemmMicrokernelTester()
17996 .mr(1)
17997 .nr(4)
17998 .kr(2)
17999 .sr(4)
18000 .m(1)
18001 .n(4)
18002 .k(k)
18003 .ks(3)
18004 .a_offset(43)
18005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18006 }
18007 }
18008
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,zero)18009 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, zero) {
18010 TEST_REQUIRES_X86_SSE2;
18011 for (size_t k = 1; k <= 40; k += 9) {
18012 for (uint32_t mz = 0; mz < 1; mz++) {
18013 GemmMicrokernelTester()
18014 .mr(1)
18015 .nr(4)
18016 .kr(2)
18017 .sr(4)
18018 .m(1)
18019 .n(4)
18020 .k(k)
18021 .ks(3)
18022 .a_offset(43)
18023 .zero_index(mz)
18024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)18029 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
18030 TEST_REQUIRES_X86_SSE2;
18031 GemmMicrokernelTester()
18032 .mr(1)
18033 .nr(4)
18034 .kr(2)
18035 .sr(4)
18036 .m(1)
18037 .n(4)
18038 .k(8)
18039 .qmin(128)
18040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18041 }
18042
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)18043 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
18044 TEST_REQUIRES_X86_SSE2;
18045 GemmMicrokernelTester()
18046 .mr(1)
18047 .nr(4)
18048 .kr(2)
18049 .sr(4)
18050 .m(1)
18051 .n(4)
18052 .k(8)
18053 .qmax(128)
18054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18055 }
18056
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)18057 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
18058 TEST_REQUIRES_X86_SSE2;
18059 GemmMicrokernelTester()
18060 .mr(1)
18061 .nr(4)
18062 .kr(2)
18063 .sr(4)
18064 .m(1)
18065 .n(4)
18066 .k(8)
18067 .cm_stride(7)
18068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18069 }
18070
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_a_zero_point)18071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_a_zero_point) {
18072 TEST_REQUIRES_X86_SSE2;
18073 for (size_t k = 1; k <= 40; k += 9) {
18074 GemmMicrokernelTester()
18075 .mr(1)
18076 .nr(4)
18077 .kr(2)
18078 .sr(4)
18079 .m(1)
18080 .n(4)
18081 .k(k)
18082 .a_zero_point(0)
18083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18084 }
18085 }
18086
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_b_zero_point)18087 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_b_zero_point) {
18088 TEST_REQUIRES_X86_SSE2;
18089 for (size_t k = 1; k <= 40; k += 9) {
18090 GemmMicrokernelTester()
18091 .mr(1)
18092 .nr(4)
18093 .kr(2)
18094 .sr(4)
18095 .m(1)
18096 .n(4)
18097 .k(k)
18098 .b_zero_point(0)
18099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18100 }
18101 }
18102
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_zero_point)18103 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_zero_point) {
18104 TEST_REQUIRES_X86_SSE2;
18105 for (size_t k = 1; k <= 40; k += 9) {
18106 GemmMicrokernelTester()
18107 .mr(1)
18108 .nr(4)
18109 .kr(2)
18110 .sr(4)
18111 .m(1)
18112 .n(4)
18113 .k(k)
18114 .a_zero_point(0)
18115 .b_zero_point(0)
18116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18117 }
18118 }
18119 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18120
18121
18122 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)18123 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
18124 TEST_REQUIRES_X86_SSE2;
18125 GemmMicrokernelTester()
18126 .mr(4)
18127 .nr(4)
18128 .kr(2)
18129 .sr(4)
18130 .m(4)
18131 .n(4)
18132 .k(8)
18133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18134 }
18135
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)18136 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
18137 TEST_REQUIRES_X86_SSE2;
18138 GemmMicrokernelTester()
18139 .mr(4)
18140 .nr(4)
18141 .kr(2)
18142 .sr(4)
18143 .m(4)
18144 .n(4)
18145 .k(8)
18146 .cn_stride(7)
18147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18148 }
18149
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)18150 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
18151 TEST_REQUIRES_X86_SSE2;
18152 for (uint32_t n = 1; n <= 4; n++) {
18153 for (uint32_t m = 1; m <= 4; m++) {
18154 GemmMicrokernelTester()
18155 .mr(4)
18156 .nr(4)
18157 .kr(2)
18158 .sr(4)
18159 .m(m)
18160 .n(n)
18161 .k(8)
18162 .iterations(1)
18163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18164 }
18165 }
18166 }
18167
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)18168 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
18169 TEST_REQUIRES_X86_SSE2;
18170 for (uint32_t m = 1; m <= 4; m++) {
18171 GemmMicrokernelTester()
18172 .mr(4)
18173 .nr(4)
18174 .kr(2)
18175 .sr(4)
18176 .m(m)
18177 .n(4)
18178 .k(8)
18179 .iterations(1)
18180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18181 }
18182 }
18183
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)18184 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
18185 TEST_REQUIRES_X86_SSE2;
18186 for (uint32_t n = 1; n <= 4; n++) {
18187 GemmMicrokernelTester()
18188 .mr(4)
18189 .nr(4)
18190 .kr(2)
18191 .sr(4)
18192 .m(4)
18193 .n(n)
18194 .k(8)
18195 .iterations(1)
18196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18197 }
18198 }
18199
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)18200 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
18201 TEST_REQUIRES_X86_SSE2;
18202 for (size_t k = 1; k < 8; k++) {
18203 GemmMicrokernelTester()
18204 .mr(4)
18205 .nr(4)
18206 .kr(2)
18207 .sr(4)
18208 .m(4)
18209 .n(4)
18210 .k(k)
18211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18212 }
18213 }
18214
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)18215 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
18216 TEST_REQUIRES_X86_SSE2;
18217 for (size_t k = 1; k < 8; k++) {
18218 for (uint32_t n = 1; n <= 4; n++) {
18219 for (uint32_t m = 1; m <= 4; m++) {
18220 GemmMicrokernelTester()
18221 .mr(4)
18222 .nr(4)
18223 .kr(2)
18224 .sr(4)
18225 .m(m)
18226 .n(n)
18227 .k(k)
18228 .iterations(1)
18229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18230 }
18231 }
18232 }
18233 }
18234
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)18235 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
18236 TEST_REQUIRES_X86_SSE2;
18237 for (size_t k = 9; k < 16; k++) {
18238 GemmMicrokernelTester()
18239 .mr(4)
18240 .nr(4)
18241 .kr(2)
18242 .sr(4)
18243 .m(4)
18244 .n(4)
18245 .k(k)
18246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18247 }
18248 }
18249
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)18250 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
18251 TEST_REQUIRES_X86_SSE2;
18252 for (size_t k = 9; k < 16; k++) {
18253 for (uint32_t n = 1; n <= 4; n++) {
18254 for (uint32_t m = 1; m <= 4; m++) {
18255 GemmMicrokernelTester()
18256 .mr(4)
18257 .nr(4)
18258 .kr(2)
18259 .sr(4)
18260 .m(m)
18261 .n(n)
18262 .k(k)
18263 .iterations(1)
18264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18265 }
18266 }
18267 }
18268 }
18269
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)18270 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
18271 TEST_REQUIRES_X86_SSE2;
18272 for (size_t k = 16; k <= 80; k += 8) {
18273 GemmMicrokernelTester()
18274 .mr(4)
18275 .nr(4)
18276 .kr(2)
18277 .sr(4)
18278 .m(4)
18279 .n(4)
18280 .k(k)
18281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18282 }
18283 }
18284
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)18285 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
18286 TEST_REQUIRES_X86_SSE2;
18287 for (size_t k = 16; k <= 80; k += 8) {
18288 for (uint32_t n = 1; n <= 4; n++) {
18289 for (uint32_t m = 1; m <= 4; m++) {
18290 GemmMicrokernelTester()
18291 .mr(4)
18292 .nr(4)
18293 .kr(2)
18294 .sr(4)
18295 .m(m)
18296 .n(n)
18297 .k(k)
18298 .iterations(1)
18299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18300 }
18301 }
18302 }
18303 }
18304
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)18305 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
18306 TEST_REQUIRES_X86_SSE2;
18307 for (uint32_t n = 5; n < 8; n++) {
18308 for (size_t k = 1; k <= 40; k += 9) {
18309 GemmMicrokernelTester()
18310 .mr(4)
18311 .nr(4)
18312 .kr(2)
18313 .sr(4)
18314 .m(4)
18315 .n(n)
18316 .k(k)
18317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18318 }
18319 }
18320 }
18321
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)18322 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
18323 TEST_REQUIRES_X86_SSE2;
18324 for (uint32_t n = 5; n < 8; n++) {
18325 for (size_t k = 1; k <= 40; k += 9) {
18326 GemmMicrokernelTester()
18327 .mr(4)
18328 .nr(4)
18329 .kr(2)
18330 .sr(4)
18331 .m(4)
18332 .n(n)
18333 .k(k)
18334 .cn_stride(7)
18335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18336 }
18337 }
18338 }
18339
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)18340 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
18341 TEST_REQUIRES_X86_SSE2;
18342 for (uint32_t n = 5; n < 8; n++) {
18343 for (size_t k = 1; k <= 40; k += 9) {
18344 for (uint32_t m = 1; m <= 4; m++) {
18345 GemmMicrokernelTester()
18346 .mr(4)
18347 .nr(4)
18348 .kr(2)
18349 .sr(4)
18350 .m(m)
18351 .n(n)
18352 .k(k)
18353 .iterations(1)
18354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18355 }
18356 }
18357 }
18358 }
18359
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)18360 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
18361 TEST_REQUIRES_X86_SSE2;
18362 for (uint32_t n = 8; n <= 12; n += 4) {
18363 for (size_t k = 1; k <= 40; k += 9) {
18364 GemmMicrokernelTester()
18365 .mr(4)
18366 .nr(4)
18367 .kr(2)
18368 .sr(4)
18369 .m(4)
18370 .n(n)
18371 .k(k)
18372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18373 }
18374 }
18375 }
18376
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)18377 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
18378 TEST_REQUIRES_X86_SSE2;
18379 for (uint32_t n = 8; n <= 12; n += 4) {
18380 for (size_t k = 1; k <= 40; k += 9) {
18381 GemmMicrokernelTester()
18382 .mr(4)
18383 .nr(4)
18384 .kr(2)
18385 .sr(4)
18386 .m(4)
18387 .n(n)
18388 .k(k)
18389 .cn_stride(7)
18390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18391 }
18392 }
18393 }
18394
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)18395 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
18396 TEST_REQUIRES_X86_SSE2;
18397 for (uint32_t n = 8; n <= 12; n += 4) {
18398 for (size_t k = 1; k <= 40; k += 9) {
18399 for (uint32_t m = 1; m <= 4; m++) {
18400 GemmMicrokernelTester()
18401 .mr(4)
18402 .nr(4)
18403 .kr(2)
18404 .sr(4)
18405 .m(m)
18406 .n(n)
18407 .k(k)
18408 .iterations(1)
18409 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18410 }
18411 }
18412 }
18413 }
18414
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel)18415 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel) {
18416 TEST_REQUIRES_X86_SSE2;
18417 for (size_t k = 1; k <= 40; k += 9) {
18418 GemmMicrokernelTester()
18419 .mr(4)
18420 .nr(4)
18421 .kr(2)
18422 .sr(4)
18423 .m(4)
18424 .n(4)
18425 .k(k)
18426 .ks(3)
18427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18428 }
18429 }
18430
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel_subtile)18431 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel_subtile) {
18432 TEST_REQUIRES_X86_SSE2;
18433 for (size_t k = 1; k <= 40; k += 9) {
18434 for (uint32_t n = 1; n <= 4; n++) {
18435 for (uint32_t m = 1; m <= 4; m++) {
18436 GemmMicrokernelTester()
18437 .mr(4)
18438 .nr(4)
18439 .kr(2)
18440 .sr(4)
18441 .m(m)
18442 .n(n)
18443 .k(k)
18444 .ks(3)
18445 .iterations(1)
18446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18447 }
18448 }
18449 }
18450 }
18451
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_small_kernel)18452 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
18453 TEST_REQUIRES_X86_SSE2;
18454 for (uint32_t n = 5; n < 8; n++) {
18455 for (size_t k = 1; k <= 40; k += 9) {
18456 GemmMicrokernelTester()
18457 .mr(4)
18458 .nr(4)
18459 .kr(2)
18460 .sr(4)
18461 .m(4)
18462 .n(n)
18463 .k(k)
18464 .ks(3)
18465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18466 }
18467 }
18468 }
18469
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_small_kernel)18470 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
18471 TEST_REQUIRES_X86_SSE2;
18472 for (uint32_t n = 8; n <= 12; n += 4) {
18473 for (size_t k = 1; k <= 40; k += 9) {
18474 GemmMicrokernelTester()
18475 .mr(4)
18476 .nr(4)
18477 .kr(2)
18478 .sr(4)
18479 .m(4)
18480 .n(n)
18481 .k(k)
18482 .ks(3)
18483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18484 }
18485 }
18486 }
18487
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)18488 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
18489 TEST_REQUIRES_X86_SSE2;
18490 for (size_t k = 1; k <= 40; k += 9) {
18491 for (uint32_t n = 1; n <= 4; n++) {
18492 for (uint32_t m = 1; m <= 4; m++) {
18493 GemmMicrokernelTester()
18494 .mr(4)
18495 .nr(4)
18496 .kr(2)
18497 .sr(4)
18498 .m(m)
18499 .n(n)
18500 .k(k)
18501 .cm_stride(7)
18502 .iterations(1)
18503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18504 }
18505 }
18506 }
18507 }
18508
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,a_offset)18509 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, a_offset) {
18510 TEST_REQUIRES_X86_SSE2;
18511 for (size_t k = 1; k <= 40; k += 9) {
18512 GemmMicrokernelTester()
18513 .mr(4)
18514 .nr(4)
18515 .kr(2)
18516 .sr(4)
18517 .m(4)
18518 .n(4)
18519 .k(k)
18520 .ks(3)
18521 .a_offset(163)
18522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18523 }
18524 }
18525
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,zero)18526 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, zero) {
18527 TEST_REQUIRES_X86_SSE2;
18528 for (size_t k = 1; k <= 40; k += 9) {
18529 for (uint32_t mz = 0; mz < 4; mz++) {
18530 GemmMicrokernelTester()
18531 .mr(4)
18532 .nr(4)
18533 .kr(2)
18534 .sr(4)
18535 .m(4)
18536 .n(4)
18537 .k(k)
18538 .ks(3)
18539 .a_offset(163)
18540 .zero_index(mz)
18541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18542 }
18543 }
18544 }
18545
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)18546 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
18547 TEST_REQUIRES_X86_SSE2;
18548 GemmMicrokernelTester()
18549 .mr(4)
18550 .nr(4)
18551 .kr(2)
18552 .sr(4)
18553 .m(4)
18554 .n(4)
18555 .k(8)
18556 .qmin(128)
18557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18558 }
18559
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)18560 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
18561 TEST_REQUIRES_X86_SSE2;
18562 GemmMicrokernelTester()
18563 .mr(4)
18564 .nr(4)
18565 .kr(2)
18566 .sr(4)
18567 .m(4)
18568 .n(4)
18569 .k(8)
18570 .qmax(128)
18571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18572 }
18573
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)18574 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
18575 TEST_REQUIRES_X86_SSE2;
18576 GemmMicrokernelTester()
18577 .mr(4)
18578 .nr(4)
18579 .kr(2)
18580 .sr(4)
18581 .m(4)
18582 .n(4)
18583 .k(8)
18584 .cm_stride(7)
18585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18586 }
18587
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_a_zero_point)18588 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_a_zero_point) {
18589 TEST_REQUIRES_X86_SSE2;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 GemmMicrokernelTester()
18592 .mr(4)
18593 .nr(4)
18594 .kr(2)
18595 .sr(4)
18596 .m(4)
18597 .n(4)
18598 .k(k)
18599 .a_zero_point(0)
18600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18601 }
18602 }
18603
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_b_zero_point)18604 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_b_zero_point) {
18605 TEST_REQUIRES_X86_SSE2;
18606 for (size_t k = 1; k <= 40; k += 9) {
18607 GemmMicrokernelTester()
18608 .mr(4)
18609 .nr(4)
18610 .kr(2)
18611 .sr(4)
18612 .m(4)
18613 .n(4)
18614 .k(k)
18615 .b_zero_point(0)
18616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18617 }
18618 }
18619
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_zero_point)18620 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_zero_point) {
18621 TEST_REQUIRES_X86_SSE2;
18622 for (size_t k = 1; k <= 40; k += 9) {
18623 GemmMicrokernelTester()
18624 .mr(4)
18625 .nr(4)
18626 .kr(2)
18627 .sr(4)
18628 .m(4)
18629 .n(4)
18630 .k(k)
18631 .a_zero_point(0)
18632 .b_zero_point(0)
18633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18634 }
18635 }
18636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18637
18638
18639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)18640 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
18641 TEST_REQUIRES_X86_SSE41;
18642 GemmMicrokernelTester()
18643 .mr(4)
18644 .nr(4)
18645 .kr(2)
18646 .sr(4)
18647 .m(4)
18648 .n(4)
18649 .k(8)
18650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18651 }
18652
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)18653 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
18654 TEST_REQUIRES_X86_SSE41;
18655 GemmMicrokernelTester()
18656 .mr(4)
18657 .nr(4)
18658 .kr(2)
18659 .sr(4)
18660 .m(4)
18661 .n(4)
18662 .k(8)
18663 .cn_stride(7)
18664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18665 }
18666
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)18667 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
18668 TEST_REQUIRES_X86_SSE41;
18669 for (uint32_t n = 1; n <= 4; n++) {
18670 for (uint32_t m = 1; m <= 4; m++) {
18671 GemmMicrokernelTester()
18672 .mr(4)
18673 .nr(4)
18674 .kr(2)
18675 .sr(4)
18676 .m(m)
18677 .n(n)
18678 .k(8)
18679 .iterations(1)
18680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18681 }
18682 }
18683 }
18684
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)18685 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
18686 TEST_REQUIRES_X86_SSE41;
18687 for (uint32_t m = 1; m <= 4; m++) {
18688 GemmMicrokernelTester()
18689 .mr(4)
18690 .nr(4)
18691 .kr(2)
18692 .sr(4)
18693 .m(m)
18694 .n(4)
18695 .k(8)
18696 .iterations(1)
18697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18698 }
18699 }
18700
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)18701 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
18702 TEST_REQUIRES_X86_SSE41;
18703 for (uint32_t n = 1; n <= 4; n++) {
18704 GemmMicrokernelTester()
18705 .mr(4)
18706 .nr(4)
18707 .kr(2)
18708 .sr(4)
18709 .m(4)
18710 .n(n)
18711 .k(8)
18712 .iterations(1)
18713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18714 }
18715 }
18716
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)18717 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
18718 TEST_REQUIRES_X86_SSE41;
18719 for (size_t k = 1; k < 8; k++) {
18720 GemmMicrokernelTester()
18721 .mr(4)
18722 .nr(4)
18723 .kr(2)
18724 .sr(4)
18725 .m(4)
18726 .n(4)
18727 .k(k)
18728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18729 }
18730 }
18731
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)18732 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
18733 TEST_REQUIRES_X86_SSE41;
18734 for (size_t k = 1; k < 8; k++) {
18735 for (uint32_t n = 1; n <= 4; n++) {
18736 for (uint32_t m = 1; m <= 4; m++) {
18737 GemmMicrokernelTester()
18738 .mr(4)
18739 .nr(4)
18740 .kr(2)
18741 .sr(4)
18742 .m(m)
18743 .n(n)
18744 .k(k)
18745 .iterations(1)
18746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18747 }
18748 }
18749 }
18750 }
18751
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)18752 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
18753 TEST_REQUIRES_X86_SSE41;
18754 for (size_t k = 9; k < 16; k++) {
18755 GemmMicrokernelTester()
18756 .mr(4)
18757 .nr(4)
18758 .kr(2)
18759 .sr(4)
18760 .m(4)
18761 .n(4)
18762 .k(k)
18763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18764 }
18765 }
18766
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)18767 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
18768 TEST_REQUIRES_X86_SSE41;
18769 for (size_t k = 9; k < 16; k++) {
18770 for (uint32_t n = 1; n <= 4; n++) {
18771 for (uint32_t m = 1; m <= 4; m++) {
18772 GemmMicrokernelTester()
18773 .mr(4)
18774 .nr(4)
18775 .kr(2)
18776 .sr(4)
18777 .m(m)
18778 .n(n)
18779 .k(k)
18780 .iterations(1)
18781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18782 }
18783 }
18784 }
18785 }
18786
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)18787 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
18788 TEST_REQUIRES_X86_SSE41;
18789 for (size_t k = 16; k <= 80; k += 8) {
18790 GemmMicrokernelTester()
18791 .mr(4)
18792 .nr(4)
18793 .kr(2)
18794 .sr(4)
18795 .m(4)
18796 .n(4)
18797 .k(k)
18798 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18799 }
18800 }
18801
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)18802 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
18803 TEST_REQUIRES_X86_SSE41;
18804 for (size_t k = 16; k <= 80; k += 8) {
18805 for (uint32_t n = 1; n <= 4; n++) {
18806 for (uint32_t m = 1; m <= 4; m++) {
18807 GemmMicrokernelTester()
18808 .mr(4)
18809 .nr(4)
18810 .kr(2)
18811 .sr(4)
18812 .m(m)
18813 .n(n)
18814 .k(k)
18815 .iterations(1)
18816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18817 }
18818 }
18819 }
18820 }
18821
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)18822 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
18823 TEST_REQUIRES_X86_SSE41;
18824 for (uint32_t n = 5; n < 8; n++) {
18825 for (size_t k = 1; k <= 40; k += 9) {
18826 GemmMicrokernelTester()
18827 .mr(4)
18828 .nr(4)
18829 .kr(2)
18830 .sr(4)
18831 .m(4)
18832 .n(n)
18833 .k(k)
18834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18835 }
18836 }
18837 }
18838
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)18839 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
18840 TEST_REQUIRES_X86_SSE41;
18841 for (uint32_t n = 5; n < 8; n++) {
18842 for (size_t k = 1; k <= 40; k += 9) {
18843 GemmMicrokernelTester()
18844 .mr(4)
18845 .nr(4)
18846 .kr(2)
18847 .sr(4)
18848 .m(4)
18849 .n(n)
18850 .k(k)
18851 .cn_stride(7)
18852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18853 }
18854 }
18855 }
18856
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)18857 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
18858 TEST_REQUIRES_X86_SSE41;
18859 for (uint32_t n = 5; n < 8; n++) {
18860 for (size_t k = 1; k <= 40; k += 9) {
18861 for (uint32_t m = 1; m <= 4; m++) {
18862 GemmMicrokernelTester()
18863 .mr(4)
18864 .nr(4)
18865 .kr(2)
18866 .sr(4)
18867 .m(m)
18868 .n(n)
18869 .k(k)
18870 .iterations(1)
18871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18872 }
18873 }
18874 }
18875 }
18876
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)18877 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
18878 TEST_REQUIRES_X86_SSE41;
18879 for (uint32_t n = 8; n <= 12; n += 4) {
18880 for (size_t k = 1; k <= 40; k += 9) {
18881 GemmMicrokernelTester()
18882 .mr(4)
18883 .nr(4)
18884 .kr(2)
18885 .sr(4)
18886 .m(4)
18887 .n(n)
18888 .k(k)
18889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18890 }
18891 }
18892 }
18893
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)18894 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
18895 TEST_REQUIRES_X86_SSE41;
18896 for (uint32_t n = 8; n <= 12; n += 4) {
18897 for (size_t k = 1; k <= 40; k += 9) {
18898 GemmMicrokernelTester()
18899 .mr(4)
18900 .nr(4)
18901 .kr(2)
18902 .sr(4)
18903 .m(4)
18904 .n(n)
18905 .k(k)
18906 .cn_stride(7)
18907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18908 }
18909 }
18910 }
18911
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)18912 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
18913 TEST_REQUIRES_X86_SSE41;
18914 for (uint32_t n = 8; n <= 12; n += 4) {
18915 for (size_t k = 1; k <= 40; k += 9) {
18916 for (uint32_t m = 1; m <= 4; m++) {
18917 GemmMicrokernelTester()
18918 .mr(4)
18919 .nr(4)
18920 .kr(2)
18921 .sr(4)
18922 .m(m)
18923 .n(n)
18924 .k(k)
18925 .iterations(1)
18926 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18927 }
18928 }
18929 }
18930 }
18931
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)18932 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
18933 TEST_REQUIRES_X86_SSE41;
18934 for (size_t k = 1; k <= 40; k += 9) {
18935 GemmMicrokernelTester()
18936 .mr(4)
18937 .nr(4)
18938 .kr(2)
18939 .sr(4)
18940 .m(4)
18941 .n(4)
18942 .k(k)
18943 .ks(3)
18944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18945 }
18946 }
18947
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)18948 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
18949 TEST_REQUIRES_X86_SSE41;
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 for (uint32_t n = 1; n <= 4; n++) {
18952 for (uint32_t m = 1; m <= 4; m++) {
18953 GemmMicrokernelTester()
18954 .mr(4)
18955 .nr(4)
18956 .kr(2)
18957 .sr(4)
18958 .m(m)
18959 .n(n)
18960 .k(k)
18961 .ks(3)
18962 .iterations(1)
18963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18964 }
18965 }
18966 }
18967 }
18968
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)18969 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
18970 TEST_REQUIRES_X86_SSE41;
18971 for (uint32_t n = 5; n < 8; n++) {
18972 for (size_t k = 1; k <= 40; k += 9) {
18973 GemmMicrokernelTester()
18974 .mr(4)
18975 .nr(4)
18976 .kr(2)
18977 .sr(4)
18978 .m(4)
18979 .n(n)
18980 .k(k)
18981 .ks(3)
18982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18983 }
18984 }
18985 }
18986
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)18987 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
18988 TEST_REQUIRES_X86_SSE41;
18989 for (uint32_t n = 8; n <= 12; n += 4) {
18990 for (size_t k = 1; k <= 40; k += 9) {
18991 GemmMicrokernelTester()
18992 .mr(4)
18993 .nr(4)
18994 .kr(2)
18995 .sr(4)
18996 .m(4)
18997 .n(n)
18998 .k(k)
18999 .ks(3)
19000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19001 }
19002 }
19003 }
19004
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)19005 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
19006 TEST_REQUIRES_X86_SSE41;
19007 for (size_t k = 1; k <= 40; k += 9) {
19008 for (uint32_t n = 1; n <= 4; n++) {
19009 for (uint32_t m = 1; m <= 4; m++) {
19010 GemmMicrokernelTester()
19011 .mr(4)
19012 .nr(4)
19013 .kr(2)
19014 .sr(4)
19015 .m(m)
19016 .n(n)
19017 .k(k)
19018 .cm_stride(7)
19019 .iterations(1)
19020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19021 }
19022 }
19023 }
19024 }
19025
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)19026 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
19027 TEST_REQUIRES_X86_SSE41;
19028 for (size_t k = 1; k <= 40; k += 9) {
19029 GemmMicrokernelTester()
19030 .mr(4)
19031 .nr(4)
19032 .kr(2)
19033 .sr(4)
19034 .m(4)
19035 .n(4)
19036 .k(k)
19037 .ks(3)
19038 .a_offset(163)
19039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19040 }
19041 }
19042
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)19043 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
19044 TEST_REQUIRES_X86_SSE41;
19045 for (size_t k = 1; k <= 40; k += 9) {
19046 for (uint32_t mz = 0; mz < 4; mz++) {
19047 GemmMicrokernelTester()
19048 .mr(4)
19049 .nr(4)
19050 .kr(2)
19051 .sr(4)
19052 .m(4)
19053 .n(4)
19054 .k(k)
19055 .ks(3)
19056 .a_offset(163)
19057 .zero_index(mz)
19058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19059 }
19060 }
19061 }
19062
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)19063 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
19064 TEST_REQUIRES_X86_SSE41;
19065 GemmMicrokernelTester()
19066 .mr(4)
19067 .nr(4)
19068 .kr(2)
19069 .sr(4)
19070 .m(4)
19071 .n(4)
19072 .k(8)
19073 .qmin(128)
19074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19075 }
19076
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)19077 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
19078 TEST_REQUIRES_X86_SSE41;
19079 GemmMicrokernelTester()
19080 .mr(4)
19081 .nr(4)
19082 .kr(2)
19083 .sr(4)
19084 .m(4)
19085 .n(4)
19086 .k(8)
19087 .qmax(128)
19088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19089 }
19090
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)19091 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
19092 TEST_REQUIRES_X86_SSE41;
19093 GemmMicrokernelTester()
19094 .mr(4)
19095 .nr(4)
19096 .kr(2)
19097 .sr(4)
19098 .m(4)
19099 .n(4)
19100 .k(8)
19101 .cm_stride(7)
19102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19103 }
19104
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_a_zero_point)19105 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_a_zero_point) {
19106 TEST_REQUIRES_X86_SSE41;
19107 for (size_t k = 1; k <= 40; k += 9) {
19108 GemmMicrokernelTester()
19109 .mr(4)
19110 .nr(4)
19111 .kr(2)
19112 .sr(4)
19113 .m(4)
19114 .n(4)
19115 .k(k)
19116 .a_zero_point(0)
19117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19118 }
19119 }
19120
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_b_zero_point)19121 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_b_zero_point) {
19122 TEST_REQUIRES_X86_SSE41;
19123 for (size_t k = 1; k <= 40; k += 9) {
19124 GemmMicrokernelTester()
19125 .mr(4)
19126 .nr(4)
19127 .kr(2)
19128 .sr(4)
19129 .m(4)
19130 .n(4)
19131 .k(k)
19132 .b_zero_point(0)
19133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19134 }
19135 }
19136
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_zero_point)19137 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_zero_point) {
19138 TEST_REQUIRES_X86_SSE41;
19139 for (size_t k = 1; k <= 40; k += 9) {
19140 GemmMicrokernelTester()
19141 .mr(4)
19142 .nr(4)
19143 .kr(2)
19144 .sr(4)
19145 .m(4)
19146 .n(4)
19147 .k(k)
19148 .a_zero_point(0)
19149 .b_zero_point(0)
19150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19151 }
19152 }
19153 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19154
19155
19156 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)19157 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
19158 TEST_REQUIRES_X86_XOP;
19159 GemmMicrokernelTester()
19160 .mr(1)
19161 .nr(4)
19162 .kr(2)
19163 .sr(4)
19164 .m(1)
19165 .n(4)
19166 .k(8)
19167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19168 }
19169
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)19170 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
19171 TEST_REQUIRES_X86_XOP;
19172 GemmMicrokernelTester()
19173 .mr(1)
19174 .nr(4)
19175 .kr(2)
19176 .sr(4)
19177 .m(1)
19178 .n(4)
19179 .k(8)
19180 .cn_stride(7)
19181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19182 }
19183
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)19184 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
19185 TEST_REQUIRES_X86_XOP;
19186 for (uint32_t n = 1; n <= 4; n++) {
19187 for (uint32_t m = 1; m <= 1; m++) {
19188 GemmMicrokernelTester()
19189 .mr(1)
19190 .nr(4)
19191 .kr(2)
19192 .sr(4)
19193 .m(m)
19194 .n(n)
19195 .k(8)
19196 .iterations(1)
19197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19198 }
19199 }
19200 }
19201
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)19202 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19203 TEST_REQUIRES_X86_XOP;
19204 for (uint32_t m = 1; m <= 1; m++) {
19205 GemmMicrokernelTester()
19206 .mr(1)
19207 .nr(4)
19208 .kr(2)
19209 .sr(4)
19210 .m(m)
19211 .n(4)
19212 .k(8)
19213 .iterations(1)
19214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19215 }
19216 }
19217
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)19218 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19219 TEST_REQUIRES_X86_XOP;
19220 for (uint32_t n = 1; n <= 4; n++) {
19221 GemmMicrokernelTester()
19222 .mr(1)
19223 .nr(4)
19224 .kr(2)
19225 .sr(4)
19226 .m(1)
19227 .n(n)
19228 .k(8)
19229 .iterations(1)
19230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19231 }
19232 }
19233
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)19234 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
19235 TEST_REQUIRES_X86_XOP;
19236 for (size_t k = 1; k < 8; k++) {
19237 GemmMicrokernelTester()
19238 .mr(1)
19239 .nr(4)
19240 .kr(2)
19241 .sr(4)
19242 .m(1)
19243 .n(4)
19244 .k(k)
19245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19246 }
19247 }
19248
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)19249 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
19250 TEST_REQUIRES_X86_XOP;
19251 for (size_t k = 1; k < 8; k++) {
19252 for (uint32_t n = 1; n <= 4; n++) {
19253 for (uint32_t m = 1; m <= 1; m++) {
19254 GemmMicrokernelTester()
19255 .mr(1)
19256 .nr(4)
19257 .kr(2)
19258 .sr(4)
19259 .m(m)
19260 .n(n)
19261 .k(k)
19262 .iterations(1)
19263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19264 }
19265 }
19266 }
19267 }
19268
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)19269 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
19270 TEST_REQUIRES_X86_XOP;
19271 for (size_t k = 9; k < 16; k++) {
19272 GemmMicrokernelTester()
19273 .mr(1)
19274 .nr(4)
19275 .kr(2)
19276 .sr(4)
19277 .m(1)
19278 .n(4)
19279 .k(k)
19280 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19281 }
19282 }
19283
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)19284 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
19285 TEST_REQUIRES_X86_XOP;
19286 for (size_t k = 9; k < 16; k++) {
19287 for (uint32_t n = 1; n <= 4; n++) {
19288 for (uint32_t m = 1; m <= 1; m++) {
19289 GemmMicrokernelTester()
19290 .mr(1)
19291 .nr(4)
19292 .kr(2)
19293 .sr(4)
19294 .m(m)
19295 .n(n)
19296 .k(k)
19297 .iterations(1)
19298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19299 }
19300 }
19301 }
19302 }
19303
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)19304 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
19305 TEST_REQUIRES_X86_XOP;
19306 for (size_t k = 16; k <= 80; k += 8) {
19307 GemmMicrokernelTester()
19308 .mr(1)
19309 .nr(4)
19310 .kr(2)
19311 .sr(4)
19312 .m(1)
19313 .n(4)
19314 .k(k)
19315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19316 }
19317 }
19318
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)19319 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
19320 TEST_REQUIRES_X86_XOP;
19321 for (size_t k = 16; k <= 80; k += 8) {
19322 for (uint32_t n = 1; n <= 4; n++) {
19323 for (uint32_t m = 1; m <= 1; m++) {
19324 GemmMicrokernelTester()
19325 .mr(1)
19326 .nr(4)
19327 .kr(2)
19328 .sr(4)
19329 .m(m)
19330 .n(n)
19331 .k(k)
19332 .iterations(1)
19333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19334 }
19335 }
19336 }
19337 }
19338
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)19339 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
19340 TEST_REQUIRES_X86_XOP;
19341 for (uint32_t n = 5; n < 8; n++) {
19342 for (size_t k = 1; k <= 40; k += 9) {
19343 GemmMicrokernelTester()
19344 .mr(1)
19345 .nr(4)
19346 .kr(2)
19347 .sr(4)
19348 .m(1)
19349 .n(n)
19350 .k(k)
19351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19352 }
19353 }
19354 }
19355
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)19356 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19357 TEST_REQUIRES_X86_XOP;
19358 for (uint32_t n = 5; n < 8; n++) {
19359 for (size_t k = 1; k <= 40; k += 9) {
19360 GemmMicrokernelTester()
19361 .mr(1)
19362 .nr(4)
19363 .kr(2)
19364 .sr(4)
19365 .m(1)
19366 .n(n)
19367 .k(k)
19368 .cn_stride(7)
19369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19370 }
19371 }
19372 }
19373
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)19374 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
19375 TEST_REQUIRES_X86_XOP;
19376 for (uint32_t n = 5; n < 8; n++) {
19377 for (size_t k = 1; k <= 40; k += 9) {
19378 for (uint32_t m = 1; m <= 1; m++) {
19379 GemmMicrokernelTester()
19380 .mr(1)
19381 .nr(4)
19382 .kr(2)
19383 .sr(4)
19384 .m(m)
19385 .n(n)
19386 .k(k)
19387 .iterations(1)
19388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19389 }
19390 }
19391 }
19392 }
19393
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)19394 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
19395 TEST_REQUIRES_X86_XOP;
19396 for (uint32_t n = 8; n <= 12; n += 4) {
19397 for (size_t k = 1; k <= 40; k += 9) {
19398 GemmMicrokernelTester()
19399 .mr(1)
19400 .nr(4)
19401 .kr(2)
19402 .sr(4)
19403 .m(1)
19404 .n(n)
19405 .k(k)
19406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19407 }
19408 }
19409 }
19410
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)19411 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19412 TEST_REQUIRES_X86_XOP;
19413 for (uint32_t n = 8; n <= 12; n += 4) {
19414 for (size_t k = 1; k <= 40; k += 9) {
19415 GemmMicrokernelTester()
19416 .mr(1)
19417 .nr(4)
19418 .kr(2)
19419 .sr(4)
19420 .m(1)
19421 .n(n)
19422 .k(k)
19423 .cn_stride(7)
19424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19425 }
19426 }
19427 }
19428
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)19429 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
19430 TEST_REQUIRES_X86_XOP;
19431 for (uint32_t n = 8; n <= 12; n += 4) {
19432 for (size_t k = 1; k <= 40; k += 9) {
19433 for (uint32_t m = 1; m <= 1; m++) {
19434 GemmMicrokernelTester()
19435 .mr(1)
19436 .nr(4)
19437 .kr(2)
19438 .sr(4)
19439 .m(m)
19440 .n(n)
19441 .k(k)
19442 .iterations(1)
19443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19444 }
19445 }
19446 }
19447 }
19448
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel)19449 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel) {
19450 TEST_REQUIRES_X86_XOP;
19451 for (size_t k = 1; k <= 40; k += 9) {
19452 GemmMicrokernelTester()
19453 .mr(1)
19454 .nr(4)
19455 .kr(2)
19456 .sr(4)
19457 .m(1)
19458 .n(4)
19459 .k(k)
19460 .ks(3)
19461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19462 }
19463 }
19464
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel_subtile)19465 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel_subtile) {
19466 TEST_REQUIRES_X86_XOP;
19467 for (size_t k = 1; k <= 40; k += 9) {
19468 for (uint32_t n = 1; n <= 4; n++) {
19469 for (uint32_t m = 1; m <= 1; m++) {
19470 GemmMicrokernelTester()
19471 .mr(1)
19472 .nr(4)
19473 .kr(2)
19474 .sr(4)
19475 .m(m)
19476 .n(n)
19477 .k(k)
19478 .ks(3)
19479 .iterations(1)
19480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19481 }
19482 }
19483 }
19484 }
19485
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_small_kernel)19486 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
19487 TEST_REQUIRES_X86_XOP;
19488 for (uint32_t n = 5; n < 8; n++) {
19489 for (size_t k = 1; k <= 40; k += 9) {
19490 GemmMicrokernelTester()
19491 .mr(1)
19492 .nr(4)
19493 .kr(2)
19494 .sr(4)
19495 .m(1)
19496 .n(n)
19497 .k(k)
19498 .ks(3)
19499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19500 }
19501 }
19502 }
19503
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_small_kernel)19504 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_small_kernel) {
19505 TEST_REQUIRES_X86_XOP;
19506 for (uint32_t n = 8; n <= 12; n += 4) {
19507 for (size_t k = 1; k <= 40; k += 9) {
19508 GemmMicrokernelTester()
19509 .mr(1)
19510 .nr(4)
19511 .kr(2)
19512 .sr(4)
19513 .m(1)
19514 .n(n)
19515 .k(k)
19516 .ks(3)
19517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19518 }
19519 }
19520 }
19521
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)19522 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
19523 TEST_REQUIRES_X86_XOP;
19524 for (size_t k = 1; k <= 40; k += 9) {
19525 for (uint32_t n = 1; n <= 4; n++) {
19526 for (uint32_t m = 1; m <= 1; m++) {
19527 GemmMicrokernelTester()
19528 .mr(1)
19529 .nr(4)
19530 .kr(2)
19531 .sr(4)
19532 .m(m)
19533 .n(n)
19534 .k(k)
19535 .cm_stride(7)
19536 .iterations(1)
19537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19538 }
19539 }
19540 }
19541 }
19542
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,a_offset)19543 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, a_offset) {
19544 TEST_REQUIRES_X86_XOP;
19545 for (size_t k = 1; k <= 40; k += 9) {
19546 GemmMicrokernelTester()
19547 .mr(1)
19548 .nr(4)
19549 .kr(2)
19550 .sr(4)
19551 .m(1)
19552 .n(4)
19553 .k(k)
19554 .ks(3)
19555 .a_offset(43)
19556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19557 }
19558 }
19559
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,zero)19560 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, zero) {
19561 TEST_REQUIRES_X86_XOP;
19562 for (size_t k = 1; k <= 40; k += 9) {
19563 for (uint32_t mz = 0; mz < 1; mz++) {
19564 GemmMicrokernelTester()
19565 .mr(1)
19566 .nr(4)
19567 .kr(2)
19568 .sr(4)
19569 .m(1)
19570 .n(4)
19571 .k(k)
19572 .ks(3)
19573 .a_offset(43)
19574 .zero_index(mz)
19575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19576 }
19577 }
19578 }
19579
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)19580 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
19581 TEST_REQUIRES_X86_XOP;
19582 GemmMicrokernelTester()
19583 .mr(1)
19584 .nr(4)
19585 .kr(2)
19586 .sr(4)
19587 .m(1)
19588 .n(4)
19589 .k(8)
19590 .qmin(128)
19591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19592 }
19593
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)19594 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
19595 TEST_REQUIRES_X86_XOP;
19596 GemmMicrokernelTester()
19597 .mr(1)
19598 .nr(4)
19599 .kr(2)
19600 .sr(4)
19601 .m(1)
19602 .n(4)
19603 .k(8)
19604 .qmax(128)
19605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19606 }
19607
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)19608 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
19609 TEST_REQUIRES_X86_XOP;
19610 GemmMicrokernelTester()
19611 .mr(1)
19612 .nr(4)
19613 .kr(2)
19614 .sr(4)
19615 .m(1)
19616 .n(4)
19617 .k(8)
19618 .cm_stride(7)
19619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19620 }
19621
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_a_zero_point)19622 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_a_zero_point) {
19623 TEST_REQUIRES_X86_XOP;
19624 for (size_t k = 1; k <= 40; k += 9) {
19625 GemmMicrokernelTester()
19626 .mr(1)
19627 .nr(4)
19628 .kr(2)
19629 .sr(4)
19630 .m(1)
19631 .n(4)
19632 .k(k)
19633 .a_zero_point(0)
19634 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19635 }
19636 }
19637
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_b_zero_point)19638 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_b_zero_point) {
19639 TEST_REQUIRES_X86_XOP;
19640 for (size_t k = 1; k <= 40; k += 9) {
19641 GemmMicrokernelTester()
19642 .mr(1)
19643 .nr(4)
19644 .kr(2)
19645 .sr(4)
19646 .m(1)
19647 .n(4)
19648 .k(k)
19649 .b_zero_point(0)
19650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19651 }
19652 }
19653
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_zero_point)19654 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_zero_point) {
19655 TEST_REQUIRES_X86_XOP;
19656 for (size_t k = 1; k <= 40; k += 9) {
19657 GemmMicrokernelTester()
19658 .mr(1)
19659 .nr(4)
19660 .kr(2)
19661 .sr(4)
19662 .m(1)
19663 .n(4)
19664 .k(k)
19665 .a_zero_point(0)
19666 .b_zero_point(0)
19667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19668 }
19669 }
19670 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19671
19672
19673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)19674 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
19675 TEST_REQUIRES_X86_XOP;
19676 GemmMicrokernelTester()
19677 .mr(2)
19678 .nr(4)
19679 .kr(2)
19680 .sr(4)
19681 .m(2)
19682 .n(4)
19683 .k(8)
19684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19685 }
19686
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)19687 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
19688 TEST_REQUIRES_X86_XOP;
19689 GemmMicrokernelTester()
19690 .mr(2)
19691 .nr(4)
19692 .kr(2)
19693 .sr(4)
19694 .m(2)
19695 .n(4)
19696 .k(8)
19697 .cn_stride(7)
19698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19699 }
19700
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)19701 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
19702 TEST_REQUIRES_X86_XOP;
19703 for (uint32_t n = 1; n <= 4; n++) {
19704 for (uint32_t m = 1; m <= 2; m++) {
19705 GemmMicrokernelTester()
19706 .mr(2)
19707 .nr(4)
19708 .kr(2)
19709 .sr(4)
19710 .m(m)
19711 .n(n)
19712 .k(8)
19713 .iterations(1)
19714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19715 }
19716 }
19717 }
19718
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)19719 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19720 TEST_REQUIRES_X86_XOP;
19721 for (uint32_t m = 1; m <= 2; m++) {
19722 GemmMicrokernelTester()
19723 .mr(2)
19724 .nr(4)
19725 .kr(2)
19726 .sr(4)
19727 .m(m)
19728 .n(4)
19729 .k(8)
19730 .iterations(1)
19731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19732 }
19733 }
19734
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)19735 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19736 TEST_REQUIRES_X86_XOP;
19737 for (uint32_t n = 1; n <= 4; n++) {
19738 GemmMicrokernelTester()
19739 .mr(2)
19740 .nr(4)
19741 .kr(2)
19742 .sr(4)
19743 .m(2)
19744 .n(n)
19745 .k(8)
19746 .iterations(1)
19747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19748 }
19749 }
19750
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)19751 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
19752 TEST_REQUIRES_X86_XOP;
19753 for (size_t k = 1; k < 8; k++) {
19754 GemmMicrokernelTester()
19755 .mr(2)
19756 .nr(4)
19757 .kr(2)
19758 .sr(4)
19759 .m(2)
19760 .n(4)
19761 .k(k)
19762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19763 }
19764 }
19765
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)19766 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
19767 TEST_REQUIRES_X86_XOP;
19768 for (size_t k = 1; k < 8; k++) {
19769 for (uint32_t n = 1; n <= 4; n++) {
19770 for (uint32_t m = 1; m <= 2; m++) {
19771 GemmMicrokernelTester()
19772 .mr(2)
19773 .nr(4)
19774 .kr(2)
19775 .sr(4)
19776 .m(m)
19777 .n(n)
19778 .k(k)
19779 .iterations(1)
19780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19781 }
19782 }
19783 }
19784 }
19785
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)19786 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
19787 TEST_REQUIRES_X86_XOP;
19788 for (size_t k = 9; k < 16; k++) {
19789 GemmMicrokernelTester()
19790 .mr(2)
19791 .nr(4)
19792 .kr(2)
19793 .sr(4)
19794 .m(2)
19795 .n(4)
19796 .k(k)
19797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19798 }
19799 }
19800
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)19801 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
19802 TEST_REQUIRES_X86_XOP;
19803 for (size_t k = 9; k < 16; k++) {
19804 for (uint32_t n = 1; n <= 4; n++) {
19805 for (uint32_t m = 1; m <= 2; m++) {
19806 GemmMicrokernelTester()
19807 .mr(2)
19808 .nr(4)
19809 .kr(2)
19810 .sr(4)
19811 .m(m)
19812 .n(n)
19813 .k(k)
19814 .iterations(1)
19815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19816 }
19817 }
19818 }
19819 }
19820
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)19821 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
19822 TEST_REQUIRES_X86_XOP;
19823 for (size_t k = 16; k <= 80; k += 8) {
19824 GemmMicrokernelTester()
19825 .mr(2)
19826 .nr(4)
19827 .kr(2)
19828 .sr(4)
19829 .m(2)
19830 .n(4)
19831 .k(k)
19832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19833 }
19834 }
19835
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)19836 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
19837 TEST_REQUIRES_X86_XOP;
19838 for (size_t k = 16; k <= 80; k += 8) {
19839 for (uint32_t n = 1; n <= 4; n++) {
19840 for (uint32_t m = 1; m <= 2; m++) {
19841 GemmMicrokernelTester()
19842 .mr(2)
19843 .nr(4)
19844 .kr(2)
19845 .sr(4)
19846 .m(m)
19847 .n(n)
19848 .k(k)
19849 .iterations(1)
19850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19851 }
19852 }
19853 }
19854 }
19855
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)19856 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
19857 TEST_REQUIRES_X86_XOP;
19858 for (uint32_t n = 5; n < 8; n++) {
19859 for (size_t k = 1; k <= 40; k += 9) {
19860 GemmMicrokernelTester()
19861 .mr(2)
19862 .nr(4)
19863 .kr(2)
19864 .sr(4)
19865 .m(2)
19866 .n(n)
19867 .k(k)
19868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19869 }
19870 }
19871 }
19872
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)19873 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19874 TEST_REQUIRES_X86_XOP;
19875 for (uint32_t n = 5; n < 8; n++) {
19876 for (size_t k = 1; k <= 40; k += 9) {
19877 GemmMicrokernelTester()
19878 .mr(2)
19879 .nr(4)
19880 .kr(2)
19881 .sr(4)
19882 .m(2)
19883 .n(n)
19884 .k(k)
19885 .cn_stride(7)
19886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19887 }
19888 }
19889 }
19890
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)19891 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
19892 TEST_REQUIRES_X86_XOP;
19893 for (uint32_t n = 5; n < 8; n++) {
19894 for (size_t k = 1; k <= 40; k += 9) {
19895 for (uint32_t m = 1; m <= 2; m++) {
19896 GemmMicrokernelTester()
19897 .mr(2)
19898 .nr(4)
19899 .kr(2)
19900 .sr(4)
19901 .m(m)
19902 .n(n)
19903 .k(k)
19904 .iterations(1)
19905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19906 }
19907 }
19908 }
19909 }
19910
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)19911 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
19912 TEST_REQUIRES_X86_XOP;
19913 for (uint32_t n = 8; n <= 12; n += 4) {
19914 for (size_t k = 1; k <= 40; k += 9) {
19915 GemmMicrokernelTester()
19916 .mr(2)
19917 .nr(4)
19918 .kr(2)
19919 .sr(4)
19920 .m(2)
19921 .n(n)
19922 .k(k)
19923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19924 }
19925 }
19926 }
19927
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)19928 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19929 TEST_REQUIRES_X86_XOP;
19930 for (uint32_t n = 8; n <= 12; n += 4) {
19931 for (size_t k = 1; k <= 40; k += 9) {
19932 GemmMicrokernelTester()
19933 .mr(2)
19934 .nr(4)
19935 .kr(2)
19936 .sr(4)
19937 .m(2)
19938 .n(n)
19939 .k(k)
19940 .cn_stride(7)
19941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19942 }
19943 }
19944 }
19945
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)19946 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
19947 TEST_REQUIRES_X86_XOP;
19948 for (uint32_t n = 8; n <= 12; n += 4) {
19949 for (size_t k = 1; k <= 40; k += 9) {
19950 for (uint32_t m = 1; m <= 2; m++) {
19951 GemmMicrokernelTester()
19952 .mr(2)
19953 .nr(4)
19954 .kr(2)
19955 .sr(4)
19956 .m(m)
19957 .n(n)
19958 .k(k)
19959 .iterations(1)
19960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19961 }
19962 }
19963 }
19964 }
19965
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel)19966 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel) {
19967 TEST_REQUIRES_X86_XOP;
19968 for (size_t k = 1; k <= 40; k += 9) {
19969 GemmMicrokernelTester()
19970 .mr(2)
19971 .nr(4)
19972 .kr(2)
19973 .sr(4)
19974 .m(2)
19975 .n(4)
19976 .k(k)
19977 .ks(3)
19978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19979 }
19980 }
19981
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel_subtile)19982 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel_subtile) {
19983 TEST_REQUIRES_X86_XOP;
19984 for (size_t k = 1; k <= 40; k += 9) {
19985 for (uint32_t n = 1; n <= 4; n++) {
19986 for (uint32_t m = 1; m <= 2; m++) {
19987 GemmMicrokernelTester()
19988 .mr(2)
19989 .nr(4)
19990 .kr(2)
19991 .sr(4)
19992 .m(m)
19993 .n(n)
19994 .k(k)
19995 .ks(3)
19996 .iterations(1)
19997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19998 }
19999 }
20000 }
20001 }
20002
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_small_kernel)20003 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
20004 TEST_REQUIRES_X86_XOP;
20005 for (uint32_t n = 5; n < 8; n++) {
20006 for (size_t k = 1; k <= 40; k += 9) {
20007 GemmMicrokernelTester()
20008 .mr(2)
20009 .nr(4)
20010 .kr(2)
20011 .sr(4)
20012 .m(2)
20013 .n(n)
20014 .k(k)
20015 .ks(3)
20016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20017 }
20018 }
20019 }
20020
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_small_kernel)20021 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_small_kernel) {
20022 TEST_REQUIRES_X86_XOP;
20023 for (uint32_t n = 8; n <= 12; n += 4) {
20024 for (size_t k = 1; k <= 40; k += 9) {
20025 GemmMicrokernelTester()
20026 .mr(2)
20027 .nr(4)
20028 .kr(2)
20029 .sr(4)
20030 .m(2)
20031 .n(n)
20032 .k(k)
20033 .ks(3)
20034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20035 }
20036 }
20037 }
20038
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)20039 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
20040 TEST_REQUIRES_X86_XOP;
20041 for (size_t k = 1; k <= 40; k += 9) {
20042 for (uint32_t n = 1; n <= 4; n++) {
20043 for (uint32_t m = 1; m <= 2; m++) {
20044 GemmMicrokernelTester()
20045 .mr(2)
20046 .nr(4)
20047 .kr(2)
20048 .sr(4)
20049 .m(m)
20050 .n(n)
20051 .k(k)
20052 .cm_stride(7)
20053 .iterations(1)
20054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20055 }
20056 }
20057 }
20058 }
20059
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,a_offset)20060 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, a_offset) {
20061 TEST_REQUIRES_X86_XOP;
20062 for (size_t k = 1; k <= 40; k += 9) {
20063 GemmMicrokernelTester()
20064 .mr(2)
20065 .nr(4)
20066 .kr(2)
20067 .sr(4)
20068 .m(2)
20069 .n(4)
20070 .k(k)
20071 .ks(3)
20072 .a_offset(83)
20073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20074 }
20075 }
20076
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,zero)20077 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, zero) {
20078 TEST_REQUIRES_X86_XOP;
20079 for (size_t k = 1; k <= 40; k += 9) {
20080 for (uint32_t mz = 0; mz < 2; mz++) {
20081 GemmMicrokernelTester()
20082 .mr(2)
20083 .nr(4)
20084 .kr(2)
20085 .sr(4)
20086 .m(2)
20087 .n(4)
20088 .k(k)
20089 .ks(3)
20090 .a_offset(83)
20091 .zero_index(mz)
20092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20093 }
20094 }
20095 }
20096
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)20097 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
20098 TEST_REQUIRES_X86_XOP;
20099 GemmMicrokernelTester()
20100 .mr(2)
20101 .nr(4)
20102 .kr(2)
20103 .sr(4)
20104 .m(2)
20105 .n(4)
20106 .k(8)
20107 .qmin(128)
20108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20109 }
20110
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)20111 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
20112 TEST_REQUIRES_X86_XOP;
20113 GemmMicrokernelTester()
20114 .mr(2)
20115 .nr(4)
20116 .kr(2)
20117 .sr(4)
20118 .m(2)
20119 .n(4)
20120 .k(8)
20121 .qmax(128)
20122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20123 }
20124
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)20125 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
20126 TEST_REQUIRES_X86_XOP;
20127 GemmMicrokernelTester()
20128 .mr(2)
20129 .nr(4)
20130 .kr(2)
20131 .sr(4)
20132 .m(2)
20133 .n(4)
20134 .k(8)
20135 .cm_stride(7)
20136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20137 }
20138
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_a_zero_point)20139 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_a_zero_point) {
20140 TEST_REQUIRES_X86_XOP;
20141 for (size_t k = 1; k <= 40; k += 9) {
20142 GemmMicrokernelTester()
20143 .mr(2)
20144 .nr(4)
20145 .kr(2)
20146 .sr(4)
20147 .m(2)
20148 .n(4)
20149 .k(k)
20150 .a_zero_point(0)
20151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20152 }
20153 }
20154
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_b_zero_point)20155 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_b_zero_point) {
20156 TEST_REQUIRES_X86_XOP;
20157 for (size_t k = 1; k <= 40; k += 9) {
20158 GemmMicrokernelTester()
20159 .mr(2)
20160 .nr(4)
20161 .kr(2)
20162 .sr(4)
20163 .m(2)
20164 .n(4)
20165 .k(k)
20166 .b_zero_point(0)
20167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20168 }
20169 }
20170
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_zero_point)20171 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_zero_point) {
20172 TEST_REQUIRES_X86_XOP;
20173 for (size_t k = 1; k <= 40; k += 9) {
20174 GemmMicrokernelTester()
20175 .mr(2)
20176 .nr(4)
20177 .kr(2)
20178 .sr(4)
20179 .m(2)
20180 .n(4)
20181 .k(k)
20182 .a_zero_point(0)
20183 .b_zero_point(0)
20184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20185 }
20186 }
20187 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20188
20189
20190 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)20191 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
20192 TEST_REQUIRES_X86_AVX;
20193 GemmMicrokernelTester()
20194 .mr(3)
20195 .nr(4)
20196 .kr(2)
20197 .sr(4)
20198 .m(3)
20199 .n(4)
20200 .k(8)
20201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20202 }
20203
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)20204 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
20205 TEST_REQUIRES_X86_AVX;
20206 GemmMicrokernelTester()
20207 .mr(3)
20208 .nr(4)
20209 .kr(2)
20210 .sr(4)
20211 .m(3)
20212 .n(4)
20213 .k(8)
20214 .cn_stride(7)
20215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20216 }
20217
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)20218 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
20219 TEST_REQUIRES_X86_AVX;
20220 for (uint32_t n = 1; n <= 4; n++) {
20221 for (uint32_t m = 1; m <= 3; m++) {
20222 GemmMicrokernelTester()
20223 .mr(3)
20224 .nr(4)
20225 .kr(2)
20226 .sr(4)
20227 .m(m)
20228 .n(n)
20229 .k(8)
20230 .iterations(1)
20231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20232 }
20233 }
20234 }
20235
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)20236 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
20237 TEST_REQUIRES_X86_AVX;
20238 for (uint32_t m = 1; m <= 3; m++) {
20239 GemmMicrokernelTester()
20240 .mr(3)
20241 .nr(4)
20242 .kr(2)
20243 .sr(4)
20244 .m(m)
20245 .n(4)
20246 .k(8)
20247 .iterations(1)
20248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20249 }
20250 }
20251
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)20252 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
20253 TEST_REQUIRES_X86_AVX;
20254 for (uint32_t n = 1; n <= 4; n++) {
20255 GemmMicrokernelTester()
20256 .mr(3)
20257 .nr(4)
20258 .kr(2)
20259 .sr(4)
20260 .m(3)
20261 .n(n)
20262 .k(8)
20263 .iterations(1)
20264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20265 }
20266 }
20267
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)20268 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
20269 TEST_REQUIRES_X86_AVX;
20270 for (size_t k = 1; k < 8; k++) {
20271 GemmMicrokernelTester()
20272 .mr(3)
20273 .nr(4)
20274 .kr(2)
20275 .sr(4)
20276 .m(3)
20277 .n(4)
20278 .k(k)
20279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20280 }
20281 }
20282
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)20283 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
20284 TEST_REQUIRES_X86_AVX;
20285 for (size_t k = 1; k < 8; k++) {
20286 for (uint32_t n = 1; n <= 4; n++) {
20287 for (uint32_t m = 1; m <= 3; m++) {
20288 GemmMicrokernelTester()
20289 .mr(3)
20290 .nr(4)
20291 .kr(2)
20292 .sr(4)
20293 .m(m)
20294 .n(n)
20295 .k(k)
20296 .iterations(1)
20297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20298 }
20299 }
20300 }
20301 }
20302
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)20303 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
20304 TEST_REQUIRES_X86_AVX;
20305 for (size_t k = 9; k < 16; k++) {
20306 GemmMicrokernelTester()
20307 .mr(3)
20308 .nr(4)
20309 .kr(2)
20310 .sr(4)
20311 .m(3)
20312 .n(4)
20313 .k(k)
20314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20315 }
20316 }
20317
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)20318 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
20319 TEST_REQUIRES_X86_AVX;
20320 for (size_t k = 9; k < 16; k++) {
20321 for (uint32_t n = 1; n <= 4; n++) {
20322 for (uint32_t m = 1; m <= 3; m++) {
20323 GemmMicrokernelTester()
20324 .mr(3)
20325 .nr(4)
20326 .kr(2)
20327 .sr(4)
20328 .m(m)
20329 .n(n)
20330 .k(k)
20331 .iterations(1)
20332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20333 }
20334 }
20335 }
20336 }
20337
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)20338 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
20339 TEST_REQUIRES_X86_AVX;
20340 for (size_t k = 16; k <= 80; k += 8) {
20341 GemmMicrokernelTester()
20342 .mr(3)
20343 .nr(4)
20344 .kr(2)
20345 .sr(4)
20346 .m(3)
20347 .n(4)
20348 .k(k)
20349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20350 }
20351 }
20352
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)20353 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
20354 TEST_REQUIRES_X86_AVX;
20355 for (size_t k = 16; k <= 80; k += 8) {
20356 for (uint32_t n = 1; n <= 4; n++) {
20357 for (uint32_t m = 1; m <= 3; m++) {
20358 GemmMicrokernelTester()
20359 .mr(3)
20360 .nr(4)
20361 .kr(2)
20362 .sr(4)
20363 .m(m)
20364 .n(n)
20365 .k(k)
20366 .iterations(1)
20367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20368 }
20369 }
20370 }
20371 }
20372
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)20373 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
20374 TEST_REQUIRES_X86_AVX;
20375 for (uint32_t n = 5; n < 8; n++) {
20376 for (size_t k = 1; k <= 40; k += 9) {
20377 GemmMicrokernelTester()
20378 .mr(3)
20379 .nr(4)
20380 .kr(2)
20381 .sr(4)
20382 .m(3)
20383 .n(n)
20384 .k(k)
20385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20386 }
20387 }
20388 }
20389
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)20390 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
20391 TEST_REQUIRES_X86_AVX;
20392 for (uint32_t n = 5; n < 8; n++) {
20393 for (size_t k = 1; k <= 40; k += 9) {
20394 GemmMicrokernelTester()
20395 .mr(3)
20396 .nr(4)
20397 .kr(2)
20398 .sr(4)
20399 .m(3)
20400 .n(n)
20401 .k(k)
20402 .cn_stride(7)
20403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20404 }
20405 }
20406 }
20407
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)20408 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
20409 TEST_REQUIRES_X86_AVX;
20410 for (uint32_t n = 5; n < 8; n++) {
20411 for (size_t k = 1; k <= 40; k += 9) {
20412 for (uint32_t m = 1; m <= 3; m++) {
20413 GemmMicrokernelTester()
20414 .mr(3)
20415 .nr(4)
20416 .kr(2)
20417 .sr(4)
20418 .m(m)
20419 .n(n)
20420 .k(k)
20421 .iterations(1)
20422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20423 }
20424 }
20425 }
20426 }
20427
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)20428 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
20429 TEST_REQUIRES_X86_AVX;
20430 for (uint32_t n = 8; n <= 12; n += 4) {
20431 for (size_t k = 1; k <= 40; k += 9) {
20432 GemmMicrokernelTester()
20433 .mr(3)
20434 .nr(4)
20435 .kr(2)
20436 .sr(4)
20437 .m(3)
20438 .n(n)
20439 .k(k)
20440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20441 }
20442 }
20443 }
20444
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)20445 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
20446 TEST_REQUIRES_X86_AVX;
20447 for (uint32_t n = 8; n <= 12; n += 4) {
20448 for (size_t k = 1; k <= 40; k += 9) {
20449 GemmMicrokernelTester()
20450 .mr(3)
20451 .nr(4)
20452 .kr(2)
20453 .sr(4)
20454 .m(3)
20455 .n(n)
20456 .k(k)
20457 .cn_stride(7)
20458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20459 }
20460 }
20461 }
20462
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)20463 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
20464 TEST_REQUIRES_X86_AVX;
20465 for (uint32_t n = 8; n <= 12; n += 4) {
20466 for (size_t k = 1; k <= 40; k += 9) {
20467 for (uint32_t m = 1; m <= 3; m++) {
20468 GemmMicrokernelTester()
20469 .mr(3)
20470 .nr(4)
20471 .kr(2)
20472 .sr(4)
20473 .m(m)
20474 .n(n)
20475 .k(k)
20476 .iterations(1)
20477 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20478 }
20479 }
20480 }
20481 }
20482
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel)20483 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel) {
20484 TEST_REQUIRES_X86_AVX;
20485 for (size_t k = 1; k <= 40; k += 9) {
20486 GemmMicrokernelTester()
20487 .mr(3)
20488 .nr(4)
20489 .kr(2)
20490 .sr(4)
20491 .m(3)
20492 .n(4)
20493 .k(k)
20494 .ks(3)
20495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20496 }
20497 }
20498
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel_subtile)20499 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel_subtile) {
20500 TEST_REQUIRES_X86_AVX;
20501 for (size_t k = 1; k <= 40; k += 9) {
20502 for (uint32_t n = 1; n <= 4; n++) {
20503 for (uint32_t m = 1; m <= 3; m++) {
20504 GemmMicrokernelTester()
20505 .mr(3)
20506 .nr(4)
20507 .kr(2)
20508 .sr(4)
20509 .m(m)
20510 .n(n)
20511 .k(k)
20512 .ks(3)
20513 .iterations(1)
20514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20515 }
20516 }
20517 }
20518 }
20519
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_small_kernel)20520 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
20521 TEST_REQUIRES_X86_AVX;
20522 for (uint32_t n = 5; n < 8; n++) {
20523 for (size_t k = 1; k <= 40; k += 9) {
20524 GemmMicrokernelTester()
20525 .mr(3)
20526 .nr(4)
20527 .kr(2)
20528 .sr(4)
20529 .m(3)
20530 .n(n)
20531 .k(k)
20532 .ks(3)
20533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20534 }
20535 }
20536 }
20537
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_small_kernel)20538 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_small_kernel) {
20539 TEST_REQUIRES_X86_AVX;
20540 for (uint32_t n = 8; n <= 12; n += 4) {
20541 for (size_t k = 1; k <= 40; k += 9) {
20542 GemmMicrokernelTester()
20543 .mr(3)
20544 .nr(4)
20545 .kr(2)
20546 .sr(4)
20547 .m(3)
20548 .n(n)
20549 .k(k)
20550 .ks(3)
20551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20552 }
20553 }
20554 }
20555
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)20556 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
20557 TEST_REQUIRES_X86_AVX;
20558 for (size_t k = 1; k <= 40; k += 9) {
20559 for (uint32_t n = 1; n <= 4; n++) {
20560 for (uint32_t m = 1; m <= 3; m++) {
20561 GemmMicrokernelTester()
20562 .mr(3)
20563 .nr(4)
20564 .kr(2)
20565 .sr(4)
20566 .m(m)
20567 .n(n)
20568 .k(k)
20569 .cm_stride(7)
20570 .iterations(1)
20571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20572 }
20573 }
20574 }
20575 }
20576
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,a_offset)20577 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, a_offset) {
20578 TEST_REQUIRES_X86_AVX;
20579 for (size_t k = 1; k <= 40; k += 9) {
20580 GemmMicrokernelTester()
20581 .mr(3)
20582 .nr(4)
20583 .kr(2)
20584 .sr(4)
20585 .m(3)
20586 .n(4)
20587 .k(k)
20588 .ks(3)
20589 .a_offset(127)
20590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20591 }
20592 }
20593
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,zero)20594 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, zero) {
20595 TEST_REQUIRES_X86_AVX;
20596 for (size_t k = 1; k <= 40; k += 9) {
20597 for (uint32_t mz = 0; mz < 3; mz++) {
20598 GemmMicrokernelTester()
20599 .mr(3)
20600 .nr(4)
20601 .kr(2)
20602 .sr(4)
20603 .m(3)
20604 .n(4)
20605 .k(k)
20606 .ks(3)
20607 .a_offset(127)
20608 .zero_index(mz)
20609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20610 }
20611 }
20612 }
20613
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)20614 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
20615 TEST_REQUIRES_X86_AVX;
20616 GemmMicrokernelTester()
20617 .mr(3)
20618 .nr(4)
20619 .kr(2)
20620 .sr(4)
20621 .m(3)
20622 .n(4)
20623 .k(8)
20624 .qmin(128)
20625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20626 }
20627
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)20628 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
20629 TEST_REQUIRES_X86_AVX;
20630 GemmMicrokernelTester()
20631 .mr(3)
20632 .nr(4)
20633 .kr(2)
20634 .sr(4)
20635 .m(3)
20636 .n(4)
20637 .k(8)
20638 .qmax(128)
20639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20640 }
20641
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)20642 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
20643 TEST_REQUIRES_X86_AVX;
20644 GemmMicrokernelTester()
20645 .mr(3)
20646 .nr(4)
20647 .kr(2)
20648 .sr(4)
20649 .m(3)
20650 .n(4)
20651 .k(8)
20652 .cm_stride(7)
20653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20654 }
20655
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_a_zero_point)20656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_a_zero_point) {
20657 TEST_REQUIRES_X86_AVX;
20658 for (size_t k = 1; k <= 40; k += 9) {
20659 GemmMicrokernelTester()
20660 .mr(3)
20661 .nr(4)
20662 .kr(2)
20663 .sr(4)
20664 .m(3)
20665 .n(4)
20666 .k(k)
20667 .a_zero_point(0)
20668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20669 }
20670 }
20671
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_b_zero_point)20672 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_b_zero_point) {
20673 TEST_REQUIRES_X86_AVX;
20674 for (size_t k = 1; k <= 40; k += 9) {
20675 GemmMicrokernelTester()
20676 .mr(3)
20677 .nr(4)
20678 .kr(2)
20679 .sr(4)
20680 .m(3)
20681 .n(4)
20682 .k(k)
20683 .b_zero_point(0)
20684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20685 }
20686 }
20687
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_zero_point)20688 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_zero_point) {
20689 TEST_REQUIRES_X86_AVX;
20690 for (size_t k = 1; k <= 40; k += 9) {
20691 GemmMicrokernelTester()
20692 .mr(3)
20693 .nr(4)
20694 .kr(2)
20695 .sr(4)
20696 .m(3)
20697 .n(4)
20698 .k(k)
20699 .a_zero_point(0)
20700 .b_zero_point(0)
20701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20702 }
20703 }
20704 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20705
20706
20707 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)20708 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
20709 TEST_REQUIRES_X86_AVX;
20710 GemmMicrokernelTester()
20711 .mr(4)
20712 .nr(4)
20713 .kr(2)
20714 .sr(4)
20715 .m(4)
20716 .n(4)
20717 .k(8)
20718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20719 }
20720
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)20721 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
20722 TEST_REQUIRES_X86_AVX;
20723 GemmMicrokernelTester()
20724 .mr(4)
20725 .nr(4)
20726 .kr(2)
20727 .sr(4)
20728 .m(4)
20729 .n(4)
20730 .k(8)
20731 .cn_stride(7)
20732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20733 }
20734
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)20735 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
20736 TEST_REQUIRES_X86_AVX;
20737 for (uint32_t n = 1; n <= 4; n++) {
20738 for (uint32_t m = 1; m <= 4; m++) {
20739 GemmMicrokernelTester()
20740 .mr(4)
20741 .nr(4)
20742 .kr(2)
20743 .sr(4)
20744 .m(m)
20745 .n(n)
20746 .k(8)
20747 .iterations(1)
20748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20749 }
20750 }
20751 }
20752
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)20753 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
20754 TEST_REQUIRES_X86_AVX;
20755 for (uint32_t m = 1; m <= 4; m++) {
20756 GemmMicrokernelTester()
20757 .mr(4)
20758 .nr(4)
20759 .kr(2)
20760 .sr(4)
20761 .m(m)
20762 .n(4)
20763 .k(8)
20764 .iterations(1)
20765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20766 }
20767 }
20768
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)20769 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
20770 TEST_REQUIRES_X86_AVX;
20771 for (uint32_t n = 1; n <= 4; n++) {
20772 GemmMicrokernelTester()
20773 .mr(4)
20774 .nr(4)
20775 .kr(2)
20776 .sr(4)
20777 .m(4)
20778 .n(n)
20779 .k(8)
20780 .iterations(1)
20781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20782 }
20783 }
20784
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)20785 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
20786 TEST_REQUIRES_X86_AVX;
20787 for (size_t k = 1; k < 8; k++) {
20788 GemmMicrokernelTester()
20789 .mr(4)
20790 .nr(4)
20791 .kr(2)
20792 .sr(4)
20793 .m(4)
20794 .n(4)
20795 .k(k)
20796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20797 }
20798 }
20799
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)20800 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
20801 TEST_REQUIRES_X86_AVX;
20802 for (size_t k = 1; k < 8; k++) {
20803 for (uint32_t n = 1; n <= 4; n++) {
20804 for (uint32_t m = 1; m <= 4; m++) {
20805 GemmMicrokernelTester()
20806 .mr(4)
20807 .nr(4)
20808 .kr(2)
20809 .sr(4)
20810 .m(m)
20811 .n(n)
20812 .k(k)
20813 .iterations(1)
20814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20815 }
20816 }
20817 }
20818 }
20819
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)20820 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
20821 TEST_REQUIRES_X86_AVX;
20822 for (size_t k = 9; k < 16; k++) {
20823 GemmMicrokernelTester()
20824 .mr(4)
20825 .nr(4)
20826 .kr(2)
20827 .sr(4)
20828 .m(4)
20829 .n(4)
20830 .k(k)
20831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20832 }
20833 }
20834
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)20835 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
20836 TEST_REQUIRES_X86_AVX;
20837 for (size_t k = 9; k < 16; k++) {
20838 for (uint32_t n = 1; n <= 4; n++) {
20839 for (uint32_t m = 1; m <= 4; m++) {
20840 GemmMicrokernelTester()
20841 .mr(4)
20842 .nr(4)
20843 .kr(2)
20844 .sr(4)
20845 .m(m)
20846 .n(n)
20847 .k(k)
20848 .iterations(1)
20849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20850 }
20851 }
20852 }
20853 }
20854
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)20855 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
20856 TEST_REQUIRES_X86_AVX;
20857 for (size_t k = 16; k <= 80; k += 8) {
20858 GemmMicrokernelTester()
20859 .mr(4)
20860 .nr(4)
20861 .kr(2)
20862 .sr(4)
20863 .m(4)
20864 .n(4)
20865 .k(k)
20866 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20867 }
20868 }
20869
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)20870 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
20871 TEST_REQUIRES_X86_AVX;
20872 for (size_t k = 16; k <= 80; k += 8) {
20873 for (uint32_t n = 1; n <= 4; n++) {
20874 for (uint32_t m = 1; m <= 4; m++) {
20875 GemmMicrokernelTester()
20876 .mr(4)
20877 .nr(4)
20878 .kr(2)
20879 .sr(4)
20880 .m(m)
20881 .n(n)
20882 .k(k)
20883 .iterations(1)
20884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20885 }
20886 }
20887 }
20888 }
20889
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)20890 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
20891 TEST_REQUIRES_X86_AVX;
20892 for (uint32_t n = 5; n < 8; n++) {
20893 for (size_t k = 1; k <= 40; k += 9) {
20894 GemmMicrokernelTester()
20895 .mr(4)
20896 .nr(4)
20897 .kr(2)
20898 .sr(4)
20899 .m(4)
20900 .n(n)
20901 .k(k)
20902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20903 }
20904 }
20905 }
20906
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)20907 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
20908 TEST_REQUIRES_X86_AVX;
20909 for (uint32_t n = 5; n < 8; n++) {
20910 for (size_t k = 1; k <= 40; k += 9) {
20911 GemmMicrokernelTester()
20912 .mr(4)
20913 .nr(4)
20914 .kr(2)
20915 .sr(4)
20916 .m(4)
20917 .n(n)
20918 .k(k)
20919 .cn_stride(7)
20920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20921 }
20922 }
20923 }
20924
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)20925 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
20926 TEST_REQUIRES_X86_AVX;
20927 for (uint32_t n = 5; n < 8; n++) {
20928 for (size_t k = 1; k <= 40; k += 9) {
20929 for (uint32_t m = 1; m <= 4; m++) {
20930 GemmMicrokernelTester()
20931 .mr(4)
20932 .nr(4)
20933 .kr(2)
20934 .sr(4)
20935 .m(m)
20936 .n(n)
20937 .k(k)
20938 .iterations(1)
20939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20940 }
20941 }
20942 }
20943 }
20944
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)20945 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
20946 TEST_REQUIRES_X86_AVX;
20947 for (uint32_t n = 8; n <= 12; n += 4) {
20948 for (size_t k = 1; k <= 40; k += 9) {
20949 GemmMicrokernelTester()
20950 .mr(4)
20951 .nr(4)
20952 .kr(2)
20953 .sr(4)
20954 .m(4)
20955 .n(n)
20956 .k(k)
20957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20958 }
20959 }
20960 }
20961
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)20962 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
20963 TEST_REQUIRES_X86_AVX;
20964 for (uint32_t n = 8; n <= 12; n += 4) {
20965 for (size_t k = 1; k <= 40; k += 9) {
20966 GemmMicrokernelTester()
20967 .mr(4)
20968 .nr(4)
20969 .kr(2)
20970 .sr(4)
20971 .m(4)
20972 .n(n)
20973 .k(k)
20974 .cn_stride(7)
20975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20976 }
20977 }
20978 }
20979
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)20980 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
20981 TEST_REQUIRES_X86_AVX;
20982 for (uint32_t n = 8; n <= 12; n += 4) {
20983 for (size_t k = 1; k <= 40; k += 9) {
20984 for (uint32_t m = 1; m <= 4; m++) {
20985 GemmMicrokernelTester()
20986 .mr(4)
20987 .nr(4)
20988 .kr(2)
20989 .sr(4)
20990 .m(m)
20991 .n(n)
20992 .k(k)
20993 .iterations(1)
20994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20995 }
20996 }
20997 }
20998 }
20999
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel)21000 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel) {
21001 TEST_REQUIRES_X86_AVX;
21002 for (size_t k = 1; k <= 40; k += 9) {
21003 GemmMicrokernelTester()
21004 .mr(4)
21005 .nr(4)
21006 .kr(2)
21007 .sr(4)
21008 .m(4)
21009 .n(4)
21010 .k(k)
21011 .ks(3)
21012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21013 }
21014 }
21015
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel_subtile)21016 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel_subtile) {
21017 TEST_REQUIRES_X86_AVX;
21018 for (size_t k = 1; k <= 40; k += 9) {
21019 for (uint32_t n = 1; n <= 4; n++) {
21020 for (uint32_t m = 1; m <= 4; m++) {
21021 GemmMicrokernelTester()
21022 .mr(4)
21023 .nr(4)
21024 .kr(2)
21025 .sr(4)
21026 .m(m)
21027 .n(n)
21028 .k(k)
21029 .ks(3)
21030 .iterations(1)
21031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21032 }
21033 }
21034 }
21035 }
21036
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_small_kernel)21037 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
21038 TEST_REQUIRES_X86_AVX;
21039 for (uint32_t n = 5; n < 8; n++) {
21040 for (size_t k = 1; k <= 40; k += 9) {
21041 GemmMicrokernelTester()
21042 .mr(4)
21043 .nr(4)
21044 .kr(2)
21045 .sr(4)
21046 .m(4)
21047 .n(n)
21048 .k(k)
21049 .ks(3)
21050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21051 }
21052 }
21053 }
21054
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_small_kernel)21055 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_small_kernel) {
21056 TEST_REQUIRES_X86_AVX;
21057 for (uint32_t n = 8; n <= 12; n += 4) {
21058 for (size_t k = 1; k <= 40; k += 9) {
21059 GemmMicrokernelTester()
21060 .mr(4)
21061 .nr(4)
21062 .kr(2)
21063 .sr(4)
21064 .m(4)
21065 .n(n)
21066 .k(k)
21067 .ks(3)
21068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21069 }
21070 }
21071 }
21072
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)21073 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
21074 TEST_REQUIRES_X86_AVX;
21075 for (size_t k = 1; k <= 40; k += 9) {
21076 for (uint32_t n = 1; n <= 4; n++) {
21077 for (uint32_t m = 1; m <= 4; m++) {
21078 GemmMicrokernelTester()
21079 .mr(4)
21080 .nr(4)
21081 .kr(2)
21082 .sr(4)
21083 .m(m)
21084 .n(n)
21085 .k(k)
21086 .cm_stride(7)
21087 .iterations(1)
21088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21089 }
21090 }
21091 }
21092 }
21093
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,a_offset)21094 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, a_offset) {
21095 TEST_REQUIRES_X86_AVX;
21096 for (size_t k = 1; k <= 40; k += 9) {
21097 GemmMicrokernelTester()
21098 .mr(4)
21099 .nr(4)
21100 .kr(2)
21101 .sr(4)
21102 .m(4)
21103 .n(4)
21104 .k(k)
21105 .ks(3)
21106 .a_offset(163)
21107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21108 }
21109 }
21110
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,zero)21111 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, zero) {
21112 TEST_REQUIRES_X86_AVX;
21113 for (size_t k = 1; k <= 40; k += 9) {
21114 for (uint32_t mz = 0; mz < 4; mz++) {
21115 GemmMicrokernelTester()
21116 .mr(4)
21117 .nr(4)
21118 .kr(2)
21119 .sr(4)
21120 .m(4)
21121 .n(4)
21122 .k(k)
21123 .ks(3)
21124 .a_offset(163)
21125 .zero_index(mz)
21126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21127 }
21128 }
21129 }
21130
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)21131 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
21132 TEST_REQUIRES_X86_AVX;
21133 GemmMicrokernelTester()
21134 .mr(4)
21135 .nr(4)
21136 .kr(2)
21137 .sr(4)
21138 .m(4)
21139 .n(4)
21140 .k(8)
21141 .qmin(128)
21142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21143 }
21144
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)21145 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
21146 TEST_REQUIRES_X86_AVX;
21147 GemmMicrokernelTester()
21148 .mr(4)
21149 .nr(4)
21150 .kr(2)
21151 .sr(4)
21152 .m(4)
21153 .n(4)
21154 .k(8)
21155 .qmax(128)
21156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21157 }
21158
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)21159 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
21160 TEST_REQUIRES_X86_AVX;
21161 GemmMicrokernelTester()
21162 .mr(4)
21163 .nr(4)
21164 .kr(2)
21165 .sr(4)
21166 .m(4)
21167 .n(4)
21168 .k(8)
21169 .cm_stride(7)
21170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21171 }
21172
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_a_zero_point)21173 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_a_zero_point) {
21174 TEST_REQUIRES_X86_AVX;
21175 for (size_t k = 1; k <= 40; k += 9) {
21176 GemmMicrokernelTester()
21177 .mr(4)
21178 .nr(4)
21179 .kr(2)
21180 .sr(4)
21181 .m(4)
21182 .n(4)
21183 .k(k)
21184 .a_zero_point(0)
21185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21186 }
21187 }
21188
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_b_zero_point)21189 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_b_zero_point) {
21190 TEST_REQUIRES_X86_AVX;
21191 for (size_t k = 1; k <= 40; k += 9) {
21192 GemmMicrokernelTester()
21193 .mr(4)
21194 .nr(4)
21195 .kr(2)
21196 .sr(4)
21197 .m(4)
21198 .n(4)
21199 .k(k)
21200 .b_zero_point(0)
21201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21202 }
21203 }
21204
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_zero_point)21205 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_zero_point) {
21206 TEST_REQUIRES_X86_AVX;
21207 for (size_t k = 1; k <= 40; k += 9) {
21208 GemmMicrokernelTester()
21209 .mr(4)
21210 .nr(4)
21211 .kr(2)
21212 .sr(4)
21213 .m(4)
21214 .n(4)
21215 .k(k)
21216 .a_zero_point(0)
21217 .b_zero_point(0)
21218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21219 }
21220 }
21221 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21222
21223
21224 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)21225 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
21226 TEST_REQUIRES_X86_SSE2;
21227 GemmMicrokernelTester()
21228 .mr(1)
21229 .nr(4)
21230 .kr(8)
21231 .sr(1)
21232 .m(1)
21233 .n(4)
21234 .k(8)
21235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21236 }
21237
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)21238 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
21239 TEST_REQUIRES_X86_SSE2;
21240 GemmMicrokernelTester()
21241 .mr(1)
21242 .nr(4)
21243 .kr(8)
21244 .sr(1)
21245 .m(1)
21246 .n(4)
21247 .k(8)
21248 .cn_stride(7)
21249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21250 }
21251
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)21252 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
21253 TEST_REQUIRES_X86_SSE2;
21254 for (uint32_t n = 1; n <= 4; n++) {
21255 for (uint32_t m = 1; m <= 1; m++) {
21256 GemmMicrokernelTester()
21257 .mr(1)
21258 .nr(4)
21259 .kr(8)
21260 .sr(1)
21261 .m(m)
21262 .n(n)
21263 .k(8)
21264 .iterations(1)
21265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21266 }
21267 }
21268 }
21269
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)21270 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
21271 TEST_REQUIRES_X86_SSE2;
21272 for (uint32_t m = 1; m <= 1; m++) {
21273 GemmMicrokernelTester()
21274 .mr(1)
21275 .nr(4)
21276 .kr(8)
21277 .sr(1)
21278 .m(m)
21279 .n(4)
21280 .k(8)
21281 .iterations(1)
21282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21283 }
21284 }
21285
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)21286 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
21287 TEST_REQUIRES_X86_SSE2;
21288 for (uint32_t n = 1; n <= 4; n++) {
21289 GemmMicrokernelTester()
21290 .mr(1)
21291 .nr(4)
21292 .kr(8)
21293 .sr(1)
21294 .m(1)
21295 .n(n)
21296 .k(8)
21297 .iterations(1)
21298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21299 }
21300 }
21301
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)21302 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
21303 TEST_REQUIRES_X86_SSE2;
21304 for (size_t k = 1; k < 8; k++) {
21305 GemmMicrokernelTester()
21306 .mr(1)
21307 .nr(4)
21308 .kr(8)
21309 .sr(1)
21310 .m(1)
21311 .n(4)
21312 .k(k)
21313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21314 }
21315 }
21316
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)21317 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
21318 TEST_REQUIRES_X86_SSE2;
21319 for (size_t k = 1; k < 8; k++) {
21320 for (uint32_t n = 1; n <= 4; n++) {
21321 for (uint32_t m = 1; m <= 1; m++) {
21322 GemmMicrokernelTester()
21323 .mr(1)
21324 .nr(4)
21325 .kr(8)
21326 .sr(1)
21327 .m(m)
21328 .n(n)
21329 .k(k)
21330 .iterations(1)
21331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21332 }
21333 }
21334 }
21335 }
21336
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)21337 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
21338 TEST_REQUIRES_X86_SSE2;
21339 for (size_t k = 9; k < 16; k++) {
21340 GemmMicrokernelTester()
21341 .mr(1)
21342 .nr(4)
21343 .kr(8)
21344 .sr(1)
21345 .m(1)
21346 .n(4)
21347 .k(k)
21348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21349 }
21350 }
21351
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)21352 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
21353 TEST_REQUIRES_X86_SSE2;
21354 for (size_t k = 9; k < 16; k++) {
21355 for (uint32_t n = 1; n <= 4; n++) {
21356 for (uint32_t m = 1; m <= 1; m++) {
21357 GemmMicrokernelTester()
21358 .mr(1)
21359 .nr(4)
21360 .kr(8)
21361 .sr(1)
21362 .m(m)
21363 .n(n)
21364 .k(k)
21365 .iterations(1)
21366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21367 }
21368 }
21369 }
21370 }
21371
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)21372 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
21373 TEST_REQUIRES_X86_SSE2;
21374 for (size_t k = 16; k <= 80; k += 8) {
21375 GemmMicrokernelTester()
21376 .mr(1)
21377 .nr(4)
21378 .kr(8)
21379 .sr(1)
21380 .m(1)
21381 .n(4)
21382 .k(k)
21383 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21384 }
21385 }
21386
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)21387 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
21388 TEST_REQUIRES_X86_SSE2;
21389 for (size_t k = 16; k <= 80; k += 8) {
21390 for (uint32_t n = 1; n <= 4; n++) {
21391 for (uint32_t m = 1; m <= 1; m++) {
21392 GemmMicrokernelTester()
21393 .mr(1)
21394 .nr(4)
21395 .kr(8)
21396 .sr(1)
21397 .m(m)
21398 .n(n)
21399 .k(k)
21400 .iterations(1)
21401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21402 }
21403 }
21404 }
21405 }
21406
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)21407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
21408 TEST_REQUIRES_X86_SSE2;
21409 for (uint32_t n = 5; n < 8; n++) {
21410 for (size_t k = 1; k <= 40; k += 9) {
21411 GemmMicrokernelTester()
21412 .mr(1)
21413 .nr(4)
21414 .kr(8)
21415 .sr(1)
21416 .m(1)
21417 .n(n)
21418 .k(k)
21419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21420 }
21421 }
21422 }
21423
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)21424 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
21425 TEST_REQUIRES_X86_SSE2;
21426 for (uint32_t n = 5; n < 8; n++) {
21427 for (size_t k = 1; k <= 40; k += 9) {
21428 GemmMicrokernelTester()
21429 .mr(1)
21430 .nr(4)
21431 .kr(8)
21432 .sr(1)
21433 .m(1)
21434 .n(n)
21435 .k(k)
21436 .cn_stride(7)
21437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21438 }
21439 }
21440 }
21441
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)21442 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
21443 TEST_REQUIRES_X86_SSE2;
21444 for (uint32_t n = 5; n < 8; n++) {
21445 for (size_t k = 1; k <= 40; k += 9) {
21446 for (uint32_t m = 1; m <= 1; m++) {
21447 GemmMicrokernelTester()
21448 .mr(1)
21449 .nr(4)
21450 .kr(8)
21451 .sr(1)
21452 .m(m)
21453 .n(n)
21454 .k(k)
21455 .iterations(1)
21456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21457 }
21458 }
21459 }
21460 }
21461
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)21462 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
21463 TEST_REQUIRES_X86_SSE2;
21464 for (uint32_t n = 8; n <= 12; n += 4) {
21465 for (size_t k = 1; k <= 40; k += 9) {
21466 GemmMicrokernelTester()
21467 .mr(1)
21468 .nr(4)
21469 .kr(8)
21470 .sr(1)
21471 .m(1)
21472 .n(n)
21473 .k(k)
21474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21475 }
21476 }
21477 }
21478
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)21479 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
21480 TEST_REQUIRES_X86_SSE2;
21481 for (uint32_t n = 8; n <= 12; n += 4) {
21482 for (size_t k = 1; k <= 40; k += 9) {
21483 GemmMicrokernelTester()
21484 .mr(1)
21485 .nr(4)
21486 .kr(8)
21487 .sr(1)
21488 .m(1)
21489 .n(n)
21490 .k(k)
21491 .cn_stride(7)
21492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21493 }
21494 }
21495 }
21496
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)21497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
21498 TEST_REQUIRES_X86_SSE2;
21499 for (uint32_t n = 8; n <= 12; n += 4) {
21500 for (size_t k = 1; k <= 40; k += 9) {
21501 for (uint32_t m = 1; m <= 1; m++) {
21502 GemmMicrokernelTester()
21503 .mr(1)
21504 .nr(4)
21505 .kr(8)
21506 .sr(1)
21507 .m(m)
21508 .n(n)
21509 .k(k)
21510 .iterations(1)
21511 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21512 }
21513 }
21514 }
21515 }
21516
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel)21517 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
21518 TEST_REQUIRES_X86_SSE2;
21519 for (size_t k = 1; k <= 40; k += 9) {
21520 GemmMicrokernelTester()
21521 .mr(1)
21522 .nr(4)
21523 .kr(8)
21524 .sr(1)
21525 .m(1)
21526 .n(4)
21527 .k(k)
21528 .ks(3)
21529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21530 }
21531 }
21532
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel_subtile)21533 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
21534 TEST_REQUIRES_X86_SSE2;
21535 for (size_t k = 1; k <= 40; k += 9) {
21536 for (uint32_t n = 1; n <= 4; n++) {
21537 for (uint32_t m = 1; m <= 1; m++) {
21538 GemmMicrokernelTester()
21539 .mr(1)
21540 .nr(4)
21541 .kr(8)
21542 .sr(1)
21543 .m(m)
21544 .n(n)
21545 .k(k)
21546 .ks(3)
21547 .iterations(1)
21548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21549 }
21550 }
21551 }
21552 }
21553
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_small_kernel)21554 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
21555 TEST_REQUIRES_X86_SSE2;
21556 for (uint32_t n = 5; n < 8; n++) {
21557 for (size_t k = 1; k <= 40; k += 9) {
21558 GemmMicrokernelTester()
21559 .mr(1)
21560 .nr(4)
21561 .kr(8)
21562 .sr(1)
21563 .m(1)
21564 .n(n)
21565 .k(k)
21566 .ks(3)
21567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21568 }
21569 }
21570 }
21571
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_small_kernel)21572 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
21573 TEST_REQUIRES_X86_SSE2;
21574 for (uint32_t n = 8; n <= 12; n += 4) {
21575 for (size_t k = 1; k <= 40; k += 9) {
21576 GemmMicrokernelTester()
21577 .mr(1)
21578 .nr(4)
21579 .kr(8)
21580 .sr(1)
21581 .m(1)
21582 .n(n)
21583 .k(k)
21584 .ks(3)
21585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21586 }
21587 }
21588 }
21589
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)21590 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
21591 TEST_REQUIRES_X86_SSE2;
21592 for (size_t k = 1; k <= 40; k += 9) {
21593 for (uint32_t n = 1; n <= 4; n++) {
21594 for (uint32_t m = 1; m <= 1; m++) {
21595 GemmMicrokernelTester()
21596 .mr(1)
21597 .nr(4)
21598 .kr(8)
21599 .sr(1)
21600 .m(m)
21601 .n(n)
21602 .k(k)
21603 .cm_stride(7)
21604 .iterations(1)
21605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21606 }
21607 }
21608 }
21609 }
21610
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,a_offset)21611 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
21612 TEST_REQUIRES_X86_SSE2;
21613 for (size_t k = 1; k <= 40; k += 9) {
21614 GemmMicrokernelTester()
21615 .mr(1)
21616 .nr(4)
21617 .kr(8)
21618 .sr(1)
21619 .m(1)
21620 .n(4)
21621 .k(k)
21622 .ks(3)
21623 .a_offset(43)
21624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21625 }
21626 }
21627
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,zero)21628 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
21629 TEST_REQUIRES_X86_SSE2;
21630 for (size_t k = 1; k <= 40; k += 9) {
21631 for (uint32_t mz = 0; mz < 1; mz++) {
21632 GemmMicrokernelTester()
21633 .mr(1)
21634 .nr(4)
21635 .kr(8)
21636 .sr(1)
21637 .m(1)
21638 .n(4)
21639 .k(k)
21640 .ks(3)
21641 .a_offset(43)
21642 .zero_index(mz)
21643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21644 }
21645 }
21646 }
21647
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)21648 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
21649 TEST_REQUIRES_X86_SSE2;
21650 GemmMicrokernelTester()
21651 .mr(1)
21652 .nr(4)
21653 .kr(8)
21654 .sr(1)
21655 .m(1)
21656 .n(4)
21657 .k(8)
21658 .qmin(128)
21659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21660 }
21661
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)21662 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
21663 TEST_REQUIRES_X86_SSE2;
21664 GemmMicrokernelTester()
21665 .mr(1)
21666 .nr(4)
21667 .kr(8)
21668 .sr(1)
21669 .m(1)
21670 .n(4)
21671 .k(8)
21672 .qmax(128)
21673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21674 }
21675
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)21676 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
21677 TEST_REQUIRES_X86_SSE2;
21678 GemmMicrokernelTester()
21679 .mr(1)
21680 .nr(4)
21681 .kr(8)
21682 .sr(1)
21683 .m(1)
21684 .n(4)
21685 .k(8)
21686 .cm_stride(7)
21687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21688 }
21689
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_a_zero_point)21690 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
21691 TEST_REQUIRES_X86_SSE2;
21692 for (size_t k = 1; k <= 40; k += 9) {
21693 GemmMicrokernelTester()
21694 .mr(1)
21695 .nr(4)
21696 .kr(8)
21697 .sr(1)
21698 .m(1)
21699 .n(4)
21700 .k(k)
21701 .a_zero_point(0)
21702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21703 }
21704 }
21705
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_b_zero_point)21706 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
21707 TEST_REQUIRES_X86_SSE2;
21708 for (size_t k = 1; k <= 40; k += 9) {
21709 GemmMicrokernelTester()
21710 .mr(1)
21711 .nr(4)
21712 .kr(8)
21713 .sr(1)
21714 .m(1)
21715 .n(4)
21716 .k(k)
21717 .b_zero_point(0)
21718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21719 }
21720 }
21721
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_zero_point)21722 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
21723 TEST_REQUIRES_X86_SSE2;
21724 for (size_t k = 1; k <= 40; k += 9) {
21725 GemmMicrokernelTester()
21726 .mr(1)
21727 .nr(4)
21728 .kr(8)
21729 .sr(1)
21730 .m(1)
21731 .n(4)
21732 .k(k)
21733 .a_zero_point(0)
21734 .b_zero_point(0)
21735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21736 }
21737 }
21738 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21739
21740
21741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8)21742 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
21743 TEST_REQUIRES_X86_SSE41;
21744 GemmMicrokernelTester()
21745 .mr(1)
21746 .nr(4)
21747 .kr(8)
21748 .sr(1)
21749 .m(1)
21750 .n(4)
21751 .k(8)
21752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21753 }
21754
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cn)21755 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
21756 TEST_REQUIRES_X86_SSE41;
21757 GemmMicrokernelTester()
21758 .mr(1)
21759 .nr(4)
21760 .kr(8)
21761 .sr(1)
21762 .m(1)
21763 .n(4)
21764 .k(8)
21765 .cn_stride(7)
21766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21767 }
21768
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile)21769 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
21770 TEST_REQUIRES_X86_SSE41;
21771 for (uint32_t n = 1; n <= 4; n++) {
21772 for (uint32_t m = 1; m <= 1; m++) {
21773 GemmMicrokernelTester()
21774 .mr(1)
21775 .nr(4)
21776 .kr(8)
21777 .sr(1)
21778 .m(m)
21779 .n(n)
21780 .k(8)
21781 .iterations(1)
21782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21783 }
21784 }
21785 }
21786
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_m)21787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
21788 TEST_REQUIRES_X86_SSE41;
21789 for (uint32_t m = 1; m <= 1; m++) {
21790 GemmMicrokernelTester()
21791 .mr(1)
21792 .nr(4)
21793 .kr(8)
21794 .sr(1)
21795 .m(m)
21796 .n(4)
21797 .k(8)
21798 .iterations(1)
21799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21800 }
21801 }
21802
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_n)21803 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
21804 TEST_REQUIRES_X86_SSE41;
21805 for (uint32_t n = 1; n <= 4; n++) {
21806 GemmMicrokernelTester()
21807 .mr(1)
21808 .nr(4)
21809 .kr(8)
21810 .sr(1)
21811 .m(1)
21812 .n(n)
21813 .k(8)
21814 .iterations(1)
21815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21816 }
21817 }
21818
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8)21819 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
21820 TEST_REQUIRES_X86_SSE41;
21821 for (size_t k = 1; k < 8; k++) {
21822 GemmMicrokernelTester()
21823 .mr(1)
21824 .nr(4)
21825 .kr(8)
21826 .sr(1)
21827 .m(1)
21828 .n(4)
21829 .k(k)
21830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21831 }
21832 }
21833
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_subtile)21834 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
21835 TEST_REQUIRES_X86_SSE41;
21836 for (size_t k = 1; k < 8; k++) {
21837 for (uint32_t n = 1; n <= 4; n++) {
21838 for (uint32_t m = 1; m <= 1; m++) {
21839 GemmMicrokernelTester()
21840 .mr(1)
21841 .nr(4)
21842 .kr(8)
21843 .sr(1)
21844 .m(m)
21845 .n(n)
21846 .k(k)
21847 .iterations(1)
21848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21849 }
21850 }
21851 }
21852 }
21853
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8)21854 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
21855 TEST_REQUIRES_X86_SSE41;
21856 for (size_t k = 9; k < 16; k++) {
21857 GemmMicrokernelTester()
21858 .mr(1)
21859 .nr(4)
21860 .kr(8)
21861 .sr(1)
21862 .m(1)
21863 .n(4)
21864 .k(k)
21865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21866 }
21867 }
21868
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_subtile)21869 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
21870 TEST_REQUIRES_X86_SSE41;
21871 for (size_t k = 9; k < 16; k++) {
21872 for (uint32_t n = 1; n <= 4; n++) {
21873 for (uint32_t m = 1; m <= 1; m++) {
21874 GemmMicrokernelTester()
21875 .mr(1)
21876 .nr(4)
21877 .kr(8)
21878 .sr(1)
21879 .m(m)
21880 .n(n)
21881 .k(k)
21882 .iterations(1)
21883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21884 }
21885 }
21886 }
21887 }
21888
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8)21889 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
21890 TEST_REQUIRES_X86_SSE41;
21891 for (size_t k = 16; k <= 80; k += 8) {
21892 GemmMicrokernelTester()
21893 .mr(1)
21894 .nr(4)
21895 .kr(8)
21896 .sr(1)
21897 .m(1)
21898 .n(4)
21899 .k(k)
21900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21901 }
21902 }
21903
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_subtile)21904 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
21905 TEST_REQUIRES_X86_SSE41;
21906 for (size_t k = 16; k <= 80; k += 8) {
21907 for (uint32_t n = 1; n <= 4; n++) {
21908 for (uint32_t m = 1; m <= 1; m++) {
21909 GemmMicrokernelTester()
21910 .mr(1)
21911 .nr(4)
21912 .kr(8)
21913 .sr(1)
21914 .m(m)
21915 .n(n)
21916 .k(k)
21917 .iterations(1)
21918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21919 }
21920 }
21921 }
21922 }
21923
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4)21924 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
21925 TEST_REQUIRES_X86_SSE41;
21926 for (uint32_t n = 5; n < 8; n++) {
21927 for (size_t k = 1; k <= 40; k += 9) {
21928 GemmMicrokernelTester()
21929 .mr(1)
21930 .nr(4)
21931 .kr(8)
21932 .sr(1)
21933 .m(1)
21934 .n(n)
21935 .k(k)
21936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21937 }
21938 }
21939 }
21940
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_cn)21941 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
21942 TEST_REQUIRES_X86_SSE41;
21943 for (uint32_t n = 5; n < 8; n++) {
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(1)
21947 .nr(4)
21948 .kr(8)
21949 .sr(1)
21950 .m(1)
21951 .n(n)
21952 .k(k)
21953 .cn_stride(7)
21954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21955 }
21956 }
21957 }
21958
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_subtile)21959 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
21960 TEST_REQUIRES_X86_SSE41;
21961 for (uint32_t n = 5; n < 8; n++) {
21962 for (size_t k = 1; k <= 40; k += 9) {
21963 for (uint32_t m = 1; m <= 1; m++) {
21964 GemmMicrokernelTester()
21965 .mr(1)
21966 .nr(4)
21967 .kr(8)
21968 .sr(1)
21969 .m(m)
21970 .n(n)
21971 .k(k)
21972 .iterations(1)
21973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21974 }
21975 }
21976 }
21977 }
21978
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4)21979 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
21980 TEST_REQUIRES_X86_SSE41;
21981 for (uint32_t n = 8; n <= 12; n += 4) {
21982 for (size_t k = 1; k <= 40; k += 9) {
21983 GemmMicrokernelTester()
21984 .mr(1)
21985 .nr(4)
21986 .kr(8)
21987 .sr(1)
21988 .m(1)
21989 .n(n)
21990 .k(k)
21991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21992 }
21993 }
21994 }
21995
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_cn)21996 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
21997 TEST_REQUIRES_X86_SSE41;
21998 for (uint32_t n = 8; n <= 12; n += 4) {
21999 for (size_t k = 1; k <= 40; k += 9) {
22000 GemmMicrokernelTester()
22001 .mr(1)
22002 .nr(4)
22003 .kr(8)
22004 .sr(1)
22005 .m(1)
22006 .n(n)
22007 .k(k)
22008 .cn_stride(7)
22009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22010 }
22011 }
22012 }
22013
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_subtile)22014 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
22015 TEST_REQUIRES_X86_SSE41;
22016 for (uint32_t n = 8; n <= 12; n += 4) {
22017 for (size_t k = 1; k <= 40; k += 9) {
22018 for (uint32_t m = 1; m <= 1; m++) {
22019 GemmMicrokernelTester()
22020 .mr(1)
22021 .nr(4)
22022 .kr(8)
22023 .sr(1)
22024 .m(m)
22025 .n(n)
22026 .k(k)
22027 .iterations(1)
22028 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22029 }
22030 }
22031 }
22032 }
22033
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel)22034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
22035 TEST_REQUIRES_X86_SSE41;
22036 for (size_t k = 1; k <= 40; k += 9) {
22037 GemmMicrokernelTester()
22038 .mr(1)
22039 .nr(4)
22040 .kr(8)
22041 .sr(1)
22042 .m(1)
22043 .n(4)
22044 .k(k)
22045 .ks(3)
22046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22047 }
22048 }
22049
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel_subtile)22050 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
22051 TEST_REQUIRES_X86_SSE41;
22052 for (size_t k = 1; k <= 40; k += 9) {
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 1; m++) {
22055 GemmMicrokernelTester()
22056 .mr(1)
22057 .nr(4)
22058 .kr(8)
22059 .sr(1)
22060 .m(m)
22061 .n(n)
22062 .k(k)
22063 .ks(3)
22064 .iterations(1)
22065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22066 }
22067 }
22068 }
22069 }
22070
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_small_kernel)22071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
22072 TEST_REQUIRES_X86_SSE41;
22073 for (uint32_t n = 5; n < 8; n++) {
22074 for (size_t k = 1; k <= 40; k += 9) {
22075 GemmMicrokernelTester()
22076 .mr(1)
22077 .nr(4)
22078 .kr(8)
22079 .sr(1)
22080 .m(1)
22081 .n(n)
22082 .k(k)
22083 .ks(3)
22084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22085 }
22086 }
22087 }
22088
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_small_kernel)22089 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
22090 TEST_REQUIRES_X86_SSE41;
22091 for (uint32_t n = 8; n <= 12; n += 4) {
22092 for (size_t k = 1; k <= 40; k += 9) {
22093 GemmMicrokernelTester()
22094 .mr(1)
22095 .nr(4)
22096 .kr(8)
22097 .sr(1)
22098 .m(1)
22099 .n(n)
22100 .k(k)
22101 .ks(3)
22102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22103 }
22104 }
22105 }
22106
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm_subtile)22107 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
22108 TEST_REQUIRES_X86_SSE41;
22109 for (size_t k = 1; k <= 40; k += 9) {
22110 for (uint32_t n = 1; n <= 4; n++) {
22111 for (uint32_t m = 1; m <= 1; m++) {
22112 GemmMicrokernelTester()
22113 .mr(1)
22114 .nr(4)
22115 .kr(8)
22116 .sr(1)
22117 .m(m)
22118 .n(n)
22119 .k(k)
22120 .cm_stride(7)
22121 .iterations(1)
22122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22123 }
22124 }
22125 }
22126 }
22127
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,a_offset)22128 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
22129 TEST_REQUIRES_X86_SSE41;
22130 for (size_t k = 1; k <= 40; k += 9) {
22131 GemmMicrokernelTester()
22132 .mr(1)
22133 .nr(4)
22134 .kr(8)
22135 .sr(1)
22136 .m(1)
22137 .n(4)
22138 .k(k)
22139 .ks(3)
22140 .a_offset(43)
22141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22142 }
22143 }
22144
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,zero)22145 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
22146 TEST_REQUIRES_X86_SSE41;
22147 for (size_t k = 1; k <= 40; k += 9) {
22148 for (uint32_t mz = 0; mz < 1; mz++) {
22149 GemmMicrokernelTester()
22150 .mr(1)
22151 .nr(4)
22152 .kr(8)
22153 .sr(1)
22154 .m(1)
22155 .n(4)
22156 .k(k)
22157 .ks(3)
22158 .a_offset(43)
22159 .zero_index(mz)
22160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22161 }
22162 }
22163 }
22164
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmin)22165 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
22166 TEST_REQUIRES_X86_SSE41;
22167 GemmMicrokernelTester()
22168 .mr(1)
22169 .nr(4)
22170 .kr(8)
22171 .sr(1)
22172 .m(1)
22173 .n(4)
22174 .k(8)
22175 .qmin(128)
22176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22177 }
22178
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmax)22179 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
22180 TEST_REQUIRES_X86_SSE41;
22181 GemmMicrokernelTester()
22182 .mr(1)
22183 .nr(4)
22184 .kr(8)
22185 .sr(1)
22186 .m(1)
22187 .n(4)
22188 .k(8)
22189 .qmax(128)
22190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22191 }
22192
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm)22193 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
22194 TEST_REQUIRES_X86_SSE41;
22195 GemmMicrokernelTester()
22196 .mr(1)
22197 .nr(4)
22198 .kr(8)
22199 .sr(1)
22200 .m(1)
22201 .n(4)
22202 .k(8)
22203 .cm_stride(7)
22204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22205 }
22206
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_a_zero_point)22207 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_a_zero_point) {
22208 TEST_REQUIRES_X86_SSE41;
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(1)
22212 .nr(4)
22213 .kr(8)
22214 .sr(1)
22215 .m(1)
22216 .n(4)
22217 .k(k)
22218 .a_zero_point(0)
22219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22220 }
22221 }
22222
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_b_zero_point)22223 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_b_zero_point) {
22224 TEST_REQUIRES_X86_SSE41;
22225 for (size_t k = 1; k <= 40; k += 9) {
22226 GemmMicrokernelTester()
22227 .mr(1)
22228 .nr(4)
22229 .kr(8)
22230 .sr(1)
22231 .m(1)
22232 .n(4)
22233 .k(k)
22234 .b_zero_point(0)
22235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22236 }
22237 }
22238
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_zero_point)22239 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_zero_point) {
22240 TEST_REQUIRES_X86_SSE41;
22241 for (size_t k = 1; k <= 40; k += 9) {
22242 GemmMicrokernelTester()
22243 .mr(1)
22244 .nr(4)
22245 .kr(8)
22246 .sr(1)
22247 .m(1)
22248 .n(4)
22249 .k(k)
22250 .a_zero_point(0)
22251 .b_zero_point(0)
22252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22253 }
22254 }
22255 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22256
22257
22258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)22259 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
22260 TEST_REQUIRES_X86_SSE2;
22261 GemmMicrokernelTester()
22262 .mr(2)
22263 .nr(4)
22264 .kr(8)
22265 .sr(1)
22266 .m(2)
22267 .n(4)
22268 .k(8)
22269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22270 }
22271
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)22272 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
22273 TEST_REQUIRES_X86_SSE2;
22274 GemmMicrokernelTester()
22275 .mr(2)
22276 .nr(4)
22277 .kr(8)
22278 .sr(1)
22279 .m(2)
22280 .n(4)
22281 .k(8)
22282 .cn_stride(7)
22283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22284 }
22285
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)22286 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
22287 TEST_REQUIRES_X86_SSE2;
22288 for (uint32_t n = 1; n <= 4; n++) {
22289 for (uint32_t m = 1; m <= 2; m++) {
22290 GemmMicrokernelTester()
22291 .mr(2)
22292 .nr(4)
22293 .kr(8)
22294 .sr(1)
22295 .m(m)
22296 .n(n)
22297 .k(8)
22298 .iterations(1)
22299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22300 }
22301 }
22302 }
22303
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)22304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
22305 TEST_REQUIRES_X86_SSE2;
22306 for (uint32_t m = 1; m <= 2; m++) {
22307 GemmMicrokernelTester()
22308 .mr(2)
22309 .nr(4)
22310 .kr(8)
22311 .sr(1)
22312 .m(m)
22313 .n(4)
22314 .k(8)
22315 .iterations(1)
22316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22317 }
22318 }
22319
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)22320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
22321 TEST_REQUIRES_X86_SSE2;
22322 for (uint32_t n = 1; n <= 4; n++) {
22323 GemmMicrokernelTester()
22324 .mr(2)
22325 .nr(4)
22326 .kr(8)
22327 .sr(1)
22328 .m(2)
22329 .n(n)
22330 .k(8)
22331 .iterations(1)
22332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22333 }
22334 }
22335
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)22336 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
22337 TEST_REQUIRES_X86_SSE2;
22338 for (size_t k = 1; k < 8; k++) {
22339 GemmMicrokernelTester()
22340 .mr(2)
22341 .nr(4)
22342 .kr(8)
22343 .sr(1)
22344 .m(2)
22345 .n(4)
22346 .k(k)
22347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22348 }
22349 }
22350
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)22351 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
22352 TEST_REQUIRES_X86_SSE2;
22353 for (size_t k = 1; k < 8; k++) {
22354 for (uint32_t n = 1; n <= 4; n++) {
22355 for (uint32_t m = 1; m <= 2; m++) {
22356 GemmMicrokernelTester()
22357 .mr(2)
22358 .nr(4)
22359 .kr(8)
22360 .sr(1)
22361 .m(m)
22362 .n(n)
22363 .k(k)
22364 .iterations(1)
22365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22366 }
22367 }
22368 }
22369 }
22370
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)22371 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
22372 TEST_REQUIRES_X86_SSE2;
22373 for (size_t k = 9; k < 16; k++) {
22374 GemmMicrokernelTester()
22375 .mr(2)
22376 .nr(4)
22377 .kr(8)
22378 .sr(1)
22379 .m(2)
22380 .n(4)
22381 .k(k)
22382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22383 }
22384 }
22385
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)22386 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
22387 TEST_REQUIRES_X86_SSE2;
22388 for (size_t k = 9; k < 16; k++) {
22389 for (uint32_t n = 1; n <= 4; n++) {
22390 for (uint32_t m = 1; m <= 2; m++) {
22391 GemmMicrokernelTester()
22392 .mr(2)
22393 .nr(4)
22394 .kr(8)
22395 .sr(1)
22396 .m(m)
22397 .n(n)
22398 .k(k)
22399 .iterations(1)
22400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22401 }
22402 }
22403 }
22404 }
22405
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)22406 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
22407 TEST_REQUIRES_X86_SSE2;
22408 for (size_t k = 16; k <= 80; k += 8) {
22409 GemmMicrokernelTester()
22410 .mr(2)
22411 .nr(4)
22412 .kr(8)
22413 .sr(1)
22414 .m(2)
22415 .n(4)
22416 .k(k)
22417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22418 }
22419 }
22420
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)22421 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
22422 TEST_REQUIRES_X86_SSE2;
22423 for (size_t k = 16; k <= 80; k += 8) {
22424 for (uint32_t n = 1; n <= 4; n++) {
22425 for (uint32_t m = 1; m <= 2; m++) {
22426 GemmMicrokernelTester()
22427 .mr(2)
22428 .nr(4)
22429 .kr(8)
22430 .sr(1)
22431 .m(m)
22432 .n(n)
22433 .k(k)
22434 .iterations(1)
22435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22436 }
22437 }
22438 }
22439 }
22440
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)22441 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
22442 TEST_REQUIRES_X86_SSE2;
22443 for (uint32_t n = 5; n < 8; n++) {
22444 for (size_t k = 1; k <= 40; k += 9) {
22445 GemmMicrokernelTester()
22446 .mr(2)
22447 .nr(4)
22448 .kr(8)
22449 .sr(1)
22450 .m(2)
22451 .n(n)
22452 .k(k)
22453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22454 }
22455 }
22456 }
22457
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)22458 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
22459 TEST_REQUIRES_X86_SSE2;
22460 for (uint32_t n = 5; n < 8; n++) {
22461 for (size_t k = 1; k <= 40; k += 9) {
22462 GemmMicrokernelTester()
22463 .mr(2)
22464 .nr(4)
22465 .kr(8)
22466 .sr(1)
22467 .m(2)
22468 .n(n)
22469 .k(k)
22470 .cn_stride(7)
22471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22472 }
22473 }
22474 }
22475
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)22476 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
22477 TEST_REQUIRES_X86_SSE2;
22478 for (uint32_t n = 5; n < 8; n++) {
22479 for (size_t k = 1; k <= 40; k += 9) {
22480 for (uint32_t m = 1; m <= 2; m++) {
22481 GemmMicrokernelTester()
22482 .mr(2)
22483 .nr(4)
22484 .kr(8)
22485 .sr(1)
22486 .m(m)
22487 .n(n)
22488 .k(k)
22489 .iterations(1)
22490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22491 }
22492 }
22493 }
22494 }
22495
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)22496 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
22497 TEST_REQUIRES_X86_SSE2;
22498 for (uint32_t n = 8; n <= 12; n += 4) {
22499 for (size_t k = 1; k <= 40; k += 9) {
22500 GemmMicrokernelTester()
22501 .mr(2)
22502 .nr(4)
22503 .kr(8)
22504 .sr(1)
22505 .m(2)
22506 .n(n)
22507 .k(k)
22508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22509 }
22510 }
22511 }
22512
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)22513 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
22514 TEST_REQUIRES_X86_SSE2;
22515 for (uint32_t n = 8; n <= 12; n += 4) {
22516 for (size_t k = 1; k <= 40; k += 9) {
22517 GemmMicrokernelTester()
22518 .mr(2)
22519 .nr(4)
22520 .kr(8)
22521 .sr(1)
22522 .m(2)
22523 .n(n)
22524 .k(k)
22525 .cn_stride(7)
22526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22527 }
22528 }
22529 }
22530
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)22531 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
22532 TEST_REQUIRES_X86_SSE2;
22533 for (uint32_t n = 8; n <= 12; n += 4) {
22534 for (size_t k = 1; k <= 40; k += 9) {
22535 for (uint32_t m = 1; m <= 2; m++) {
22536 GemmMicrokernelTester()
22537 .mr(2)
22538 .nr(4)
22539 .kr(8)
22540 .sr(1)
22541 .m(m)
22542 .n(n)
22543 .k(k)
22544 .iterations(1)
22545 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22546 }
22547 }
22548 }
22549 }
22550
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)22551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
22552 TEST_REQUIRES_X86_SSE2;
22553 for (size_t k = 1; k <= 40; k += 9) {
22554 GemmMicrokernelTester()
22555 .mr(2)
22556 .nr(4)
22557 .kr(8)
22558 .sr(1)
22559 .m(2)
22560 .n(4)
22561 .k(k)
22562 .ks(3)
22563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22564 }
22565 }
22566
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)22567 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
22568 TEST_REQUIRES_X86_SSE2;
22569 for (size_t k = 1; k <= 40; k += 9) {
22570 for (uint32_t n = 1; n <= 4; n++) {
22571 for (uint32_t m = 1; m <= 2; m++) {
22572 GemmMicrokernelTester()
22573 .mr(2)
22574 .nr(4)
22575 .kr(8)
22576 .sr(1)
22577 .m(m)
22578 .n(n)
22579 .k(k)
22580 .ks(3)
22581 .iterations(1)
22582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22583 }
22584 }
22585 }
22586 }
22587
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)22588 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
22589 TEST_REQUIRES_X86_SSE2;
22590 for (uint32_t n = 5; n < 8; n++) {
22591 for (size_t k = 1; k <= 40; k += 9) {
22592 GemmMicrokernelTester()
22593 .mr(2)
22594 .nr(4)
22595 .kr(8)
22596 .sr(1)
22597 .m(2)
22598 .n(n)
22599 .k(k)
22600 .ks(3)
22601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22602 }
22603 }
22604 }
22605
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)22606 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
22607 TEST_REQUIRES_X86_SSE2;
22608 for (uint32_t n = 8; n <= 12; n += 4) {
22609 for (size_t k = 1; k <= 40; k += 9) {
22610 GemmMicrokernelTester()
22611 .mr(2)
22612 .nr(4)
22613 .kr(8)
22614 .sr(1)
22615 .m(2)
22616 .n(n)
22617 .k(k)
22618 .ks(3)
22619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22620 }
22621 }
22622 }
22623
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)22624 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
22625 TEST_REQUIRES_X86_SSE2;
22626 for (size_t k = 1; k <= 40; k += 9) {
22627 for (uint32_t n = 1; n <= 4; n++) {
22628 for (uint32_t m = 1; m <= 2; m++) {
22629 GemmMicrokernelTester()
22630 .mr(2)
22631 .nr(4)
22632 .kr(8)
22633 .sr(1)
22634 .m(m)
22635 .n(n)
22636 .k(k)
22637 .cm_stride(7)
22638 .iterations(1)
22639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22640 }
22641 }
22642 }
22643 }
22644
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)22645 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
22646 TEST_REQUIRES_X86_SSE2;
22647 for (size_t k = 1; k <= 40; k += 9) {
22648 GemmMicrokernelTester()
22649 .mr(2)
22650 .nr(4)
22651 .kr(8)
22652 .sr(1)
22653 .m(2)
22654 .n(4)
22655 .k(k)
22656 .ks(3)
22657 .a_offset(83)
22658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22659 }
22660 }
22661
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)22662 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
22663 TEST_REQUIRES_X86_SSE2;
22664 for (size_t k = 1; k <= 40; k += 9) {
22665 for (uint32_t mz = 0; mz < 2; mz++) {
22666 GemmMicrokernelTester()
22667 .mr(2)
22668 .nr(4)
22669 .kr(8)
22670 .sr(1)
22671 .m(2)
22672 .n(4)
22673 .k(k)
22674 .ks(3)
22675 .a_offset(83)
22676 .zero_index(mz)
22677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22678 }
22679 }
22680 }
22681
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)22682 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
22683 TEST_REQUIRES_X86_SSE2;
22684 GemmMicrokernelTester()
22685 .mr(2)
22686 .nr(4)
22687 .kr(8)
22688 .sr(1)
22689 .m(2)
22690 .n(4)
22691 .k(8)
22692 .qmin(128)
22693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22694 }
22695
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)22696 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
22697 TEST_REQUIRES_X86_SSE2;
22698 GemmMicrokernelTester()
22699 .mr(2)
22700 .nr(4)
22701 .kr(8)
22702 .sr(1)
22703 .m(2)
22704 .n(4)
22705 .k(8)
22706 .qmax(128)
22707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22708 }
22709
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)22710 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
22711 TEST_REQUIRES_X86_SSE2;
22712 GemmMicrokernelTester()
22713 .mr(2)
22714 .nr(4)
22715 .kr(8)
22716 .sr(1)
22717 .m(2)
22718 .n(4)
22719 .k(8)
22720 .cm_stride(7)
22721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22722 }
22723
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_a_zero_point)22724 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
22725 TEST_REQUIRES_X86_SSE2;
22726 for (size_t k = 1; k <= 40; k += 9) {
22727 GemmMicrokernelTester()
22728 .mr(2)
22729 .nr(4)
22730 .kr(8)
22731 .sr(1)
22732 .m(2)
22733 .n(4)
22734 .k(k)
22735 .a_zero_point(0)
22736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22737 }
22738 }
22739
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_b_zero_point)22740 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
22741 TEST_REQUIRES_X86_SSE2;
22742 for (size_t k = 1; k <= 40; k += 9) {
22743 GemmMicrokernelTester()
22744 .mr(2)
22745 .nr(4)
22746 .kr(8)
22747 .sr(1)
22748 .m(2)
22749 .n(4)
22750 .k(k)
22751 .b_zero_point(0)
22752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22753 }
22754 }
22755
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_zero_point)22756 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
22757 TEST_REQUIRES_X86_SSE2;
22758 for (size_t k = 1; k <= 40; k += 9) {
22759 GemmMicrokernelTester()
22760 .mr(2)
22761 .nr(4)
22762 .kr(8)
22763 .sr(1)
22764 .m(2)
22765 .n(4)
22766 .k(k)
22767 .a_zero_point(0)
22768 .b_zero_point(0)
22769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22770 }
22771 }
22772 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22773
22774
22775 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)22776 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
22777 TEST_REQUIRES_X86_SSE41;
22778 GemmMicrokernelTester()
22779 .mr(2)
22780 .nr(4)
22781 .kr(8)
22782 .sr(1)
22783 .m(2)
22784 .n(4)
22785 .k(8)
22786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22787 }
22788
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)22789 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
22790 TEST_REQUIRES_X86_SSE41;
22791 GemmMicrokernelTester()
22792 .mr(2)
22793 .nr(4)
22794 .kr(8)
22795 .sr(1)
22796 .m(2)
22797 .n(4)
22798 .k(8)
22799 .cn_stride(7)
22800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22801 }
22802
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)22803 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
22804 TEST_REQUIRES_X86_SSE41;
22805 for (uint32_t n = 1; n <= 4; n++) {
22806 for (uint32_t m = 1; m <= 2; m++) {
22807 GemmMicrokernelTester()
22808 .mr(2)
22809 .nr(4)
22810 .kr(8)
22811 .sr(1)
22812 .m(m)
22813 .n(n)
22814 .k(8)
22815 .iterations(1)
22816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22817 }
22818 }
22819 }
22820
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)22821 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
22822 TEST_REQUIRES_X86_SSE41;
22823 for (uint32_t m = 1; m <= 2; m++) {
22824 GemmMicrokernelTester()
22825 .mr(2)
22826 .nr(4)
22827 .kr(8)
22828 .sr(1)
22829 .m(m)
22830 .n(4)
22831 .k(8)
22832 .iterations(1)
22833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22834 }
22835 }
22836
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)22837 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
22838 TEST_REQUIRES_X86_SSE41;
22839 for (uint32_t n = 1; n <= 4; n++) {
22840 GemmMicrokernelTester()
22841 .mr(2)
22842 .nr(4)
22843 .kr(8)
22844 .sr(1)
22845 .m(2)
22846 .n(n)
22847 .k(8)
22848 .iterations(1)
22849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22850 }
22851 }
22852
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)22853 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
22854 TEST_REQUIRES_X86_SSE41;
22855 for (size_t k = 1; k < 8; k++) {
22856 GemmMicrokernelTester()
22857 .mr(2)
22858 .nr(4)
22859 .kr(8)
22860 .sr(1)
22861 .m(2)
22862 .n(4)
22863 .k(k)
22864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22865 }
22866 }
22867
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)22868 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
22869 TEST_REQUIRES_X86_SSE41;
22870 for (size_t k = 1; k < 8; k++) {
22871 for (uint32_t n = 1; n <= 4; n++) {
22872 for (uint32_t m = 1; m <= 2; m++) {
22873 GemmMicrokernelTester()
22874 .mr(2)
22875 .nr(4)
22876 .kr(8)
22877 .sr(1)
22878 .m(m)
22879 .n(n)
22880 .k(k)
22881 .iterations(1)
22882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22883 }
22884 }
22885 }
22886 }
22887
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)22888 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
22889 TEST_REQUIRES_X86_SSE41;
22890 for (size_t k = 9; k < 16; k++) {
22891 GemmMicrokernelTester()
22892 .mr(2)
22893 .nr(4)
22894 .kr(8)
22895 .sr(1)
22896 .m(2)
22897 .n(4)
22898 .k(k)
22899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22900 }
22901 }
22902
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)22903 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
22904 TEST_REQUIRES_X86_SSE41;
22905 for (size_t k = 9; k < 16; k++) {
22906 for (uint32_t n = 1; n <= 4; n++) {
22907 for (uint32_t m = 1; m <= 2; m++) {
22908 GemmMicrokernelTester()
22909 .mr(2)
22910 .nr(4)
22911 .kr(8)
22912 .sr(1)
22913 .m(m)
22914 .n(n)
22915 .k(k)
22916 .iterations(1)
22917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22918 }
22919 }
22920 }
22921 }
22922
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)22923 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
22924 TEST_REQUIRES_X86_SSE41;
22925 for (size_t k = 16; k <= 80; k += 8) {
22926 GemmMicrokernelTester()
22927 .mr(2)
22928 .nr(4)
22929 .kr(8)
22930 .sr(1)
22931 .m(2)
22932 .n(4)
22933 .k(k)
22934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22935 }
22936 }
22937
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)22938 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
22939 TEST_REQUIRES_X86_SSE41;
22940 for (size_t k = 16; k <= 80; k += 8) {
22941 for (uint32_t n = 1; n <= 4; n++) {
22942 for (uint32_t m = 1; m <= 2; m++) {
22943 GemmMicrokernelTester()
22944 .mr(2)
22945 .nr(4)
22946 .kr(8)
22947 .sr(1)
22948 .m(m)
22949 .n(n)
22950 .k(k)
22951 .iterations(1)
22952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22953 }
22954 }
22955 }
22956 }
22957
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)22958 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
22959 TEST_REQUIRES_X86_SSE41;
22960 for (uint32_t n = 5; n < 8; n++) {
22961 for (size_t k = 1; k <= 40; k += 9) {
22962 GemmMicrokernelTester()
22963 .mr(2)
22964 .nr(4)
22965 .kr(8)
22966 .sr(1)
22967 .m(2)
22968 .n(n)
22969 .k(k)
22970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22971 }
22972 }
22973 }
22974
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)22975 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
22976 TEST_REQUIRES_X86_SSE41;
22977 for (uint32_t n = 5; n < 8; n++) {
22978 for (size_t k = 1; k <= 40; k += 9) {
22979 GemmMicrokernelTester()
22980 .mr(2)
22981 .nr(4)
22982 .kr(8)
22983 .sr(1)
22984 .m(2)
22985 .n(n)
22986 .k(k)
22987 .cn_stride(7)
22988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22989 }
22990 }
22991 }
22992
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)22993 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
22994 TEST_REQUIRES_X86_SSE41;
22995 for (uint32_t n = 5; n < 8; n++) {
22996 for (size_t k = 1; k <= 40; k += 9) {
22997 for (uint32_t m = 1; m <= 2; m++) {
22998 GemmMicrokernelTester()
22999 .mr(2)
23000 .nr(4)
23001 .kr(8)
23002 .sr(1)
23003 .m(m)
23004 .n(n)
23005 .k(k)
23006 .iterations(1)
23007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23008 }
23009 }
23010 }
23011 }
23012
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)23013 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
23014 TEST_REQUIRES_X86_SSE41;
23015 for (uint32_t n = 8; n <= 12; n += 4) {
23016 for (size_t k = 1; k <= 40; k += 9) {
23017 GemmMicrokernelTester()
23018 .mr(2)
23019 .nr(4)
23020 .kr(8)
23021 .sr(1)
23022 .m(2)
23023 .n(n)
23024 .k(k)
23025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23026 }
23027 }
23028 }
23029
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)23030 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
23031 TEST_REQUIRES_X86_SSE41;
23032 for (uint32_t n = 8; n <= 12; n += 4) {
23033 for (size_t k = 1; k <= 40; k += 9) {
23034 GemmMicrokernelTester()
23035 .mr(2)
23036 .nr(4)
23037 .kr(8)
23038 .sr(1)
23039 .m(2)
23040 .n(n)
23041 .k(k)
23042 .cn_stride(7)
23043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23044 }
23045 }
23046 }
23047
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)23048 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
23049 TEST_REQUIRES_X86_SSE41;
23050 for (uint32_t n = 8; n <= 12; n += 4) {
23051 for (size_t k = 1; k <= 40; k += 9) {
23052 for (uint32_t m = 1; m <= 2; m++) {
23053 GemmMicrokernelTester()
23054 .mr(2)
23055 .nr(4)
23056 .kr(8)
23057 .sr(1)
23058 .m(m)
23059 .n(n)
23060 .k(k)
23061 .iterations(1)
23062 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23063 }
23064 }
23065 }
23066 }
23067
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel)23068 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
23069 TEST_REQUIRES_X86_SSE41;
23070 for (size_t k = 1; k <= 40; k += 9) {
23071 GemmMicrokernelTester()
23072 .mr(2)
23073 .nr(4)
23074 .kr(8)
23075 .sr(1)
23076 .m(2)
23077 .n(4)
23078 .k(k)
23079 .ks(3)
23080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23081 }
23082 }
23083
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel_subtile)23084 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
23085 TEST_REQUIRES_X86_SSE41;
23086 for (size_t k = 1; k <= 40; k += 9) {
23087 for (uint32_t n = 1; n <= 4; n++) {
23088 for (uint32_t m = 1; m <= 2; m++) {
23089 GemmMicrokernelTester()
23090 .mr(2)
23091 .nr(4)
23092 .kr(8)
23093 .sr(1)
23094 .m(m)
23095 .n(n)
23096 .k(k)
23097 .ks(3)
23098 .iterations(1)
23099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23100 }
23101 }
23102 }
23103 }
23104
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_small_kernel)23105 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
23106 TEST_REQUIRES_X86_SSE41;
23107 for (uint32_t n = 5; n < 8; n++) {
23108 for (size_t k = 1; k <= 40; k += 9) {
23109 GemmMicrokernelTester()
23110 .mr(2)
23111 .nr(4)
23112 .kr(8)
23113 .sr(1)
23114 .m(2)
23115 .n(n)
23116 .k(k)
23117 .ks(3)
23118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23119 }
23120 }
23121 }
23122
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_small_kernel)23123 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
23124 TEST_REQUIRES_X86_SSE41;
23125 for (uint32_t n = 8; n <= 12; n += 4) {
23126 for (size_t k = 1; k <= 40; k += 9) {
23127 GemmMicrokernelTester()
23128 .mr(2)
23129 .nr(4)
23130 .kr(8)
23131 .sr(1)
23132 .m(2)
23133 .n(n)
23134 .k(k)
23135 .ks(3)
23136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23137 }
23138 }
23139 }
23140
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)23141 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
23142 TEST_REQUIRES_X86_SSE41;
23143 for (size_t k = 1; k <= 40; k += 9) {
23144 for (uint32_t n = 1; n <= 4; n++) {
23145 for (uint32_t m = 1; m <= 2; m++) {
23146 GemmMicrokernelTester()
23147 .mr(2)
23148 .nr(4)
23149 .kr(8)
23150 .sr(1)
23151 .m(m)
23152 .n(n)
23153 .k(k)
23154 .cm_stride(7)
23155 .iterations(1)
23156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23157 }
23158 }
23159 }
23160 }
23161
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,a_offset)23162 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
23163 TEST_REQUIRES_X86_SSE41;
23164 for (size_t k = 1; k <= 40; k += 9) {
23165 GemmMicrokernelTester()
23166 .mr(2)
23167 .nr(4)
23168 .kr(8)
23169 .sr(1)
23170 .m(2)
23171 .n(4)
23172 .k(k)
23173 .ks(3)
23174 .a_offset(83)
23175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23176 }
23177 }
23178
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,zero)23179 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
23180 TEST_REQUIRES_X86_SSE41;
23181 for (size_t k = 1; k <= 40; k += 9) {
23182 for (uint32_t mz = 0; mz < 2; mz++) {
23183 GemmMicrokernelTester()
23184 .mr(2)
23185 .nr(4)
23186 .kr(8)
23187 .sr(1)
23188 .m(2)
23189 .n(4)
23190 .k(k)
23191 .ks(3)
23192 .a_offset(83)
23193 .zero_index(mz)
23194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23195 }
23196 }
23197 }
23198
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)23199 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
23200 TEST_REQUIRES_X86_SSE41;
23201 GemmMicrokernelTester()
23202 .mr(2)
23203 .nr(4)
23204 .kr(8)
23205 .sr(1)
23206 .m(2)
23207 .n(4)
23208 .k(8)
23209 .qmin(128)
23210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23211 }
23212
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)23213 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
23214 TEST_REQUIRES_X86_SSE41;
23215 GemmMicrokernelTester()
23216 .mr(2)
23217 .nr(4)
23218 .kr(8)
23219 .sr(1)
23220 .m(2)
23221 .n(4)
23222 .k(8)
23223 .qmax(128)
23224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23225 }
23226
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)23227 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
23228 TEST_REQUIRES_X86_SSE41;
23229 GemmMicrokernelTester()
23230 .mr(2)
23231 .nr(4)
23232 .kr(8)
23233 .sr(1)
23234 .m(2)
23235 .n(4)
23236 .k(8)
23237 .cm_stride(7)
23238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23239 }
23240
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_a_zero_point)23241 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_a_zero_point) {
23242 TEST_REQUIRES_X86_SSE41;
23243 for (size_t k = 1; k <= 40; k += 9) {
23244 GemmMicrokernelTester()
23245 .mr(2)
23246 .nr(4)
23247 .kr(8)
23248 .sr(1)
23249 .m(2)
23250 .n(4)
23251 .k(k)
23252 .a_zero_point(0)
23253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23254 }
23255 }
23256
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_b_zero_point)23257 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_b_zero_point) {
23258 TEST_REQUIRES_X86_SSE41;
23259 for (size_t k = 1; k <= 40; k += 9) {
23260 GemmMicrokernelTester()
23261 .mr(2)
23262 .nr(4)
23263 .kr(8)
23264 .sr(1)
23265 .m(2)
23266 .n(4)
23267 .k(k)
23268 .b_zero_point(0)
23269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23270 }
23271 }
23272
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_zero_point)23273 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_zero_point) {
23274 TEST_REQUIRES_X86_SSE41;
23275 for (size_t k = 1; k <= 40; k += 9) {
23276 GemmMicrokernelTester()
23277 .mr(2)
23278 .nr(4)
23279 .kr(8)
23280 .sr(1)
23281 .m(2)
23282 .n(4)
23283 .k(k)
23284 .a_zero_point(0)
23285 .b_zero_point(0)
23286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23287 }
23288 }
23289 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23290
23291
23292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)23293 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
23294 TEST_REQUIRES_X86_AVX;
23295 GemmMicrokernelTester()
23296 .mr(2)
23297 .nr(4)
23298 .kr(8)
23299 .sr(1)
23300 .m(2)
23301 .n(4)
23302 .k(8)
23303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23304 }
23305
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)23306 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
23307 TEST_REQUIRES_X86_AVX;
23308 GemmMicrokernelTester()
23309 .mr(2)
23310 .nr(4)
23311 .kr(8)
23312 .sr(1)
23313 .m(2)
23314 .n(4)
23315 .k(8)
23316 .cn_stride(7)
23317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23318 }
23319
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)23320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
23321 TEST_REQUIRES_X86_AVX;
23322 for (uint32_t n = 1; n <= 4; n++) {
23323 for (uint32_t m = 1; m <= 2; m++) {
23324 GemmMicrokernelTester()
23325 .mr(2)
23326 .nr(4)
23327 .kr(8)
23328 .sr(1)
23329 .m(m)
23330 .n(n)
23331 .k(8)
23332 .iterations(1)
23333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23334 }
23335 }
23336 }
23337
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)23338 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
23339 TEST_REQUIRES_X86_AVX;
23340 for (uint32_t m = 1; m <= 2; m++) {
23341 GemmMicrokernelTester()
23342 .mr(2)
23343 .nr(4)
23344 .kr(8)
23345 .sr(1)
23346 .m(m)
23347 .n(4)
23348 .k(8)
23349 .iterations(1)
23350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23351 }
23352 }
23353
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)23354 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
23355 TEST_REQUIRES_X86_AVX;
23356 for (uint32_t n = 1; n <= 4; n++) {
23357 GemmMicrokernelTester()
23358 .mr(2)
23359 .nr(4)
23360 .kr(8)
23361 .sr(1)
23362 .m(2)
23363 .n(n)
23364 .k(8)
23365 .iterations(1)
23366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23367 }
23368 }
23369
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)23370 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
23371 TEST_REQUIRES_X86_AVX;
23372 for (size_t k = 1; k < 8; k++) {
23373 GemmMicrokernelTester()
23374 .mr(2)
23375 .nr(4)
23376 .kr(8)
23377 .sr(1)
23378 .m(2)
23379 .n(4)
23380 .k(k)
23381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23382 }
23383 }
23384
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)23385 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
23386 TEST_REQUIRES_X86_AVX;
23387 for (size_t k = 1; k < 8; k++) {
23388 for (uint32_t n = 1; n <= 4; n++) {
23389 for (uint32_t m = 1; m <= 2; m++) {
23390 GemmMicrokernelTester()
23391 .mr(2)
23392 .nr(4)
23393 .kr(8)
23394 .sr(1)
23395 .m(m)
23396 .n(n)
23397 .k(k)
23398 .iterations(1)
23399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23400 }
23401 }
23402 }
23403 }
23404
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)23405 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
23406 TEST_REQUIRES_X86_AVX;
23407 for (size_t k = 9; k < 16; k++) {
23408 GemmMicrokernelTester()
23409 .mr(2)
23410 .nr(4)
23411 .kr(8)
23412 .sr(1)
23413 .m(2)
23414 .n(4)
23415 .k(k)
23416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23417 }
23418 }
23419
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)23420 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
23421 TEST_REQUIRES_X86_AVX;
23422 for (size_t k = 9; k < 16; k++) {
23423 for (uint32_t n = 1; n <= 4; n++) {
23424 for (uint32_t m = 1; m <= 2; m++) {
23425 GemmMicrokernelTester()
23426 .mr(2)
23427 .nr(4)
23428 .kr(8)
23429 .sr(1)
23430 .m(m)
23431 .n(n)
23432 .k(k)
23433 .iterations(1)
23434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23435 }
23436 }
23437 }
23438 }
23439
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)23440 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
23441 TEST_REQUIRES_X86_AVX;
23442 for (size_t k = 16; k <= 80; k += 8) {
23443 GemmMicrokernelTester()
23444 .mr(2)
23445 .nr(4)
23446 .kr(8)
23447 .sr(1)
23448 .m(2)
23449 .n(4)
23450 .k(k)
23451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23452 }
23453 }
23454
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)23455 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
23456 TEST_REQUIRES_X86_AVX;
23457 for (size_t k = 16; k <= 80; k += 8) {
23458 for (uint32_t n = 1; n <= 4; n++) {
23459 for (uint32_t m = 1; m <= 2; m++) {
23460 GemmMicrokernelTester()
23461 .mr(2)
23462 .nr(4)
23463 .kr(8)
23464 .sr(1)
23465 .m(m)
23466 .n(n)
23467 .k(k)
23468 .iterations(1)
23469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23470 }
23471 }
23472 }
23473 }
23474
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)23475 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
23476 TEST_REQUIRES_X86_AVX;
23477 for (uint32_t n = 5; n < 8; n++) {
23478 for (size_t k = 1; k <= 40; k += 9) {
23479 GemmMicrokernelTester()
23480 .mr(2)
23481 .nr(4)
23482 .kr(8)
23483 .sr(1)
23484 .m(2)
23485 .n(n)
23486 .k(k)
23487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23488 }
23489 }
23490 }
23491
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)23492 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
23493 TEST_REQUIRES_X86_AVX;
23494 for (uint32_t n = 5; n < 8; n++) {
23495 for (size_t k = 1; k <= 40; k += 9) {
23496 GemmMicrokernelTester()
23497 .mr(2)
23498 .nr(4)
23499 .kr(8)
23500 .sr(1)
23501 .m(2)
23502 .n(n)
23503 .k(k)
23504 .cn_stride(7)
23505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23506 }
23507 }
23508 }
23509
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)23510 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
23511 TEST_REQUIRES_X86_AVX;
23512 for (uint32_t n = 5; n < 8; n++) {
23513 for (size_t k = 1; k <= 40; k += 9) {
23514 for (uint32_t m = 1; m <= 2; m++) {
23515 GemmMicrokernelTester()
23516 .mr(2)
23517 .nr(4)
23518 .kr(8)
23519 .sr(1)
23520 .m(m)
23521 .n(n)
23522 .k(k)
23523 .iterations(1)
23524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23525 }
23526 }
23527 }
23528 }
23529
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)23530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
23531 TEST_REQUIRES_X86_AVX;
23532 for (uint32_t n = 8; n <= 12; n += 4) {
23533 for (size_t k = 1; k <= 40; k += 9) {
23534 GemmMicrokernelTester()
23535 .mr(2)
23536 .nr(4)
23537 .kr(8)
23538 .sr(1)
23539 .m(2)
23540 .n(n)
23541 .k(k)
23542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23543 }
23544 }
23545 }
23546
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)23547 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
23548 TEST_REQUIRES_X86_AVX;
23549 for (uint32_t n = 8; n <= 12; n += 4) {
23550 for (size_t k = 1; k <= 40; k += 9) {
23551 GemmMicrokernelTester()
23552 .mr(2)
23553 .nr(4)
23554 .kr(8)
23555 .sr(1)
23556 .m(2)
23557 .n(n)
23558 .k(k)
23559 .cn_stride(7)
23560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23561 }
23562 }
23563 }
23564
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)23565 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
23566 TEST_REQUIRES_X86_AVX;
23567 for (uint32_t n = 8; n <= 12; n += 4) {
23568 for (size_t k = 1; k <= 40; k += 9) {
23569 for (uint32_t m = 1; m <= 2; m++) {
23570 GemmMicrokernelTester()
23571 .mr(2)
23572 .nr(4)
23573 .kr(8)
23574 .sr(1)
23575 .m(m)
23576 .n(n)
23577 .k(k)
23578 .iterations(1)
23579 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23580 }
23581 }
23582 }
23583 }
23584
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel)23585 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
23586 TEST_REQUIRES_X86_AVX;
23587 for (size_t k = 1; k <= 40; k += 9) {
23588 GemmMicrokernelTester()
23589 .mr(2)
23590 .nr(4)
23591 .kr(8)
23592 .sr(1)
23593 .m(2)
23594 .n(4)
23595 .k(k)
23596 .ks(3)
23597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23598 }
23599 }
23600
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel_subtile)23601 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
23602 TEST_REQUIRES_X86_AVX;
23603 for (size_t k = 1; k <= 40; k += 9) {
23604 for (uint32_t n = 1; n <= 4; n++) {
23605 for (uint32_t m = 1; m <= 2; m++) {
23606 GemmMicrokernelTester()
23607 .mr(2)
23608 .nr(4)
23609 .kr(8)
23610 .sr(1)
23611 .m(m)
23612 .n(n)
23613 .k(k)
23614 .ks(3)
23615 .iterations(1)
23616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23617 }
23618 }
23619 }
23620 }
23621
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_small_kernel)23622 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
23623 TEST_REQUIRES_X86_AVX;
23624 for (uint32_t n = 5; n < 8; n++) {
23625 for (size_t k = 1; k <= 40; k += 9) {
23626 GemmMicrokernelTester()
23627 .mr(2)
23628 .nr(4)
23629 .kr(8)
23630 .sr(1)
23631 .m(2)
23632 .n(n)
23633 .k(k)
23634 .ks(3)
23635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23636 }
23637 }
23638 }
23639
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_small_kernel)23640 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
23641 TEST_REQUIRES_X86_AVX;
23642 for (uint32_t n = 8; n <= 12; n += 4) {
23643 for (size_t k = 1; k <= 40; k += 9) {
23644 GemmMicrokernelTester()
23645 .mr(2)
23646 .nr(4)
23647 .kr(8)
23648 .sr(1)
23649 .m(2)
23650 .n(n)
23651 .k(k)
23652 .ks(3)
23653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23654 }
23655 }
23656 }
23657
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)23658 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
23659 TEST_REQUIRES_X86_AVX;
23660 for (size_t k = 1; k <= 40; k += 9) {
23661 for (uint32_t n = 1; n <= 4; n++) {
23662 for (uint32_t m = 1; m <= 2; m++) {
23663 GemmMicrokernelTester()
23664 .mr(2)
23665 .nr(4)
23666 .kr(8)
23667 .sr(1)
23668 .m(m)
23669 .n(n)
23670 .k(k)
23671 .cm_stride(7)
23672 .iterations(1)
23673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23674 }
23675 }
23676 }
23677 }
23678
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,a_offset)23679 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
23680 TEST_REQUIRES_X86_AVX;
23681 for (size_t k = 1; k <= 40; k += 9) {
23682 GemmMicrokernelTester()
23683 .mr(2)
23684 .nr(4)
23685 .kr(8)
23686 .sr(1)
23687 .m(2)
23688 .n(4)
23689 .k(k)
23690 .ks(3)
23691 .a_offset(83)
23692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23693 }
23694 }
23695
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,zero)23696 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
23697 TEST_REQUIRES_X86_AVX;
23698 for (size_t k = 1; k <= 40; k += 9) {
23699 for (uint32_t mz = 0; mz < 2; mz++) {
23700 GemmMicrokernelTester()
23701 .mr(2)
23702 .nr(4)
23703 .kr(8)
23704 .sr(1)
23705 .m(2)
23706 .n(4)
23707 .k(k)
23708 .ks(3)
23709 .a_offset(83)
23710 .zero_index(mz)
23711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23712 }
23713 }
23714 }
23715
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)23716 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
23717 TEST_REQUIRES_X86_AVX;
23718 GemmMicrokernelTester()
23719 .mr(2)
23720 .nr(4)
23721 .kr(8)
23722 .sr(1)
23723 .m(2)
23724 .n(4)
23725 .k(8)
23726 .qmin(128)
23727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23728 }
23729
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)23730 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
23731 TEST_REQUIRES_X86_AVX;
23732 GemmMicrokernelTester()
23733 .mr(2)
23734 .nr(4)
23735 .kr(8)
23736 .sr(1)
23737 .m(2)
23738 .n(4)
23739 .k(8)
23740 .qmax(128)
23741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23742 }
23743
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)23744 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
23745 TEST_REQUIRES_X86_AVX;
23746 GemmMicrokernelTester()
23747 .mr(2)
23748 .nr(4)
23749 .kr(8)
23750 .sr(1)
23751 .m(2)
23752 .n(4)
23753 .k(8)
23754 .cm_stride(7)
23755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23756 }
23757
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_a_zero_point)23758 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
23759 TEST_REQUIRES_X86_AVX;
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(2)
23763 .nr(4)
23764 .kr(8)
23765 .sr(1)
23766 .m(2)
23767 .n(4)
23768 .k(k)
23769 .a_zero_point(0)
23770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23771 }
23772 }
23773
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_b_zero_point)23774 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
23775 TEST_REQUIRES_X86_AVX;
23776 for (size_t k = 1; k <= 40; k += 9) {
23777 GemmMicrokernelTester()
23778 .mr(2)
23779 .nr(4)
23780 .kr(8)
23781 .sr(1)
23782 .m(2)
23783 .n(4)
23784 .k(k)
23785 .b_zero_point(0)
23786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23787 }
23788 }
23789
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_zero_point)23790 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
23791 TEST_REQUIRES_X86_AVX;
23792 for (size_t k = 1; k <= 40; k += 9) {
23793 GemmMicrokernelTester()
23794 .mr(2)
23795 .nr(4)
23796 .kr(8)
23797 .sr(1)
23798 .m(2)
23799 .n(4)
23800 .k(k)
23801 .a_zero_point(0)
23802 .b_zero_point(0)
23803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23804 }
23805 }
23806 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23807
23808
23809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)23810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
23811 TEST_REQUIRES_X86_XOP;
23812 GemmMicrokernelTester()
23813 .mr(2)
23814 .nr(4)
23815 .kr(8)
23816 .sr(1)
23817 .m(2)
23818 .n(4)
23819 .k(8)
23820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23821 }
23822
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)23823 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
23824 TEST_REQUIRES_X86_XOP;
23825 GemmMicrokernelTester()
23826 .mr(2)
23827 .nr(4)
23828 .kr(8)
23829 .sr(1)
23830 .m(2)
23831 .n(4)
23832 .k(8)
23833 .cn_stride(7)
23834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23835 }
23836
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)23837 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
23838 TEST_REQUIRES_X86_XOP;
23839 for (uint32_t n = 1; n <= 4; n++) {
23840 for (uint32_t m = 1; m <= 2; m++) {
23841 GemmMicrokernelTester()
23842 .mr(2)
23843 .nr(4)
23844 .kr(8)
23845 .sr(1)
23846 .m(m)
23847 .n(n)
23848 .k(8)
23849 .iterations(1)
23850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23851 }
23852 }
23853 }
23854
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)23855 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
23856 TEST_REQUIRES_X86_XOP;
23857 for (uint32_t m = 1; m <= 2; m++) {
23858 GemmMicrokernelTester()
23859 .mr(2)
23860 .nr(4)
23861 .kr(8)
23862 .sr(1)
23863 .m(m)
23864 .n(4)
23865 .k(8)
23866 .iterations(1)
23867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23868 }
23869 }
23870
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)23871 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
23872 TEST_REQUIRES_X86_XOP;
23873 for (uint32_t n = 1; n <= 4; n++) {
23874 GemmMicrokernelTester()
23875 .mr(2)
23876 .nr(4)
23877 .kr(8)
23878 .sr(1)
23879 .m(2)
23880 .n(n)
23881 .k(8)
23882 .iterations(1)
23883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23884 }
23885 }
23886
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)23887 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
23888 TEST_REQUIRES_X86_XOP;
23889 for (size_t k = 1; k < 8; k++) {
23890 GemmMicrokernelTester()
23891 .mr(2)
23892 .nr(4)
23893 .kr(8)
23894 .sr(1)
23895 .m(2)
23896 .n(4)
23897 .k(k)
23898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23899 }
23900 }
23901
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)23902 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
23903 TEST_REQUIRES_X86_XOP;
23904 for (size_t k = 1; k < 8; k++) {
23905 for (uint32_t n = 1; n <= 4; n++) {
23906 for (uint32_t m = 1; m <= 2; m++) {
23907 GemmMicrokernelTester()
23908 .mr(2)
23909 .nr(4)
23910 .kr(8)
23911 .sr(1)
23912 .m(m)
23913 .n(n)
23914 .k(k)
23915 .iterations(1)
23916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23917 }
23918 }
23919 }
23920 }
23921
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)23922 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
23923 TEST_REQUIRES_X86_XOP;
23924 for (size_t k = 9; k < 16; k++) {
23925 GemmMicrokernelTester()
23926 .mr(2)
23927 .nr(4)
23928 .kr(8)
23929 .sr(1)
23930 .m(2)
23931 .n(4)
23932 .k(k)
23933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23934 }
23935 }
23936
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)23937 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
23938 TEST_REQUIRES_X86_XOP;
23939 for (size_t k = 9; k < 16; k++) {
23940 for (uint32_t n = 1; n <= 4; n++) {
23941 for (uint32_t m = 1; m <= 2; m++) {
23942 GemmMicrokernelTester()
23943 .mr(2)
23944 .nr(4)
23945 .kr(8)
23946 .sr(1)
23947 .m(m)
23948 .n(n)
23949 .k(k)
23950 .iterations(1)
23951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23952 }
23953 }
23954 }
23955 }
23956
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)23957 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
23958 TEST_REQUIRES_X86_XOP;
23959 for (size_t k = 16; k <= 80; k += 8) {
23960 GemmMicrokernelTester()
23961 .mr(2)
23962 .nr(4)
23963 .kr(8)
23964 .sr(1)
23965 .m(2)
23966 .n(4)
23967 .k(k)
23968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23969 }
23970 }
23971
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)23972 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
23973 TEST_REQUIRES_X86_XOP;
23974 for (size_t k = 16; k <= 80; k += 8) {
23975 for (uint32_t n = 1; n <= 4; n++) {
23976 for (uint32_t m = 1; m <= 2; m++) {
23977 GemmMicrokernelTester()
23978 .mr(2)
23979 .nr(4)
23980 .kr(8)
23981 .sr(1)
23982 .m(m)
23983 .n(n)
23984 .k(k)
23985 .iterations(1)
23986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23987 }
23988 }
23989 }
23990 }
23991
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)23992 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
23993 TEST_REQUIRES_X86_XOP;
23994 for (uint32_t n = 5; n < 8; n++) {
23995 for (size_t k = 1; k <= 40; k += 9) {
23996 GemmMicrokernelTester()
23997 .mr(2)
23998 .nr(4)
23999 .kr(8)
24000 .sr(1)
24001 .m(2)
24002 .n(n)
24003 .k(k)
24004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24005 }
24006 }
24007 }
24008
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)24009 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
24010 TEST_REQUIRES_X86_XOP;
24011 for (uint32_t n = 5; n < 8; n++) {
24012 for (size_t k = 1; k <= 40; k += 9) {
24013 GemmMicrokernelTester()
24014 .mr(2)
24015 .nr(4)
24016 .kr(8)
24017 .sr(1)
24018 .m(2)
24019 .n(n)
24020 .k(k)
24021 .cn_stride(7)
24022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24023 }
24024 }
24025 }
24026
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)24027 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
24028 TEST_REQUIRES_X86_XOP;
24029 for (uint32_t n = 5; n < 8; n++) {
24030 for (size_t k = 1; k <= 40; k += 9) {
24031 for (uint32_t m = 1; m <= 2; m++) {
24032 GemmMicrokernelTester()
24033 .mr(2)
24034 .nr(4)
24035 .kr(8)
24036 .sr(1)
24037 .m(m)
24038 .n(n)
24039 .k(k)
24040 .iterations(1)
24041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24042 }
24043 }
24044 }
24045 }
24046
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)24047 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
24048 TEST_REQUIRES_X86_XOP;
24049 for (uint32_t n = 8; n <= 12; n += 4) {
24050 for (size_t k = 1; k <= 40; k += 9) {
24051 GemmMicrokernelTester()
24052 .mr(2)
24053 .nr(4)
24054 .kr(8)
24055 .sr(1)
24056 .m(2)
24057 .n(n)
24058 .k(k)
24059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24060 }
24061 }
24062 }
24063
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)24064 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
24065 TEST_REQUIRES_X86_XOP;
24066 for (uint32_t n = 8; n <= 12; n += 4) {
24067 for (size_t k = 1; k <= 40; k += 9) {
24068 GemmMicrokernelTester()
24069 .mr(2)
24070 .nr(4)
24071 .kr(8)
24072 .sr(1)
24073 .m(2)
24074 .n(n)
24075 .k(k)
24076 .cn_stride(7)
24077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24078 }
24079 }
24080 }
24081
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)24082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
24083 TEST_REQUIRES_X86_XOP;
24084 for (uint32_t n = 8; n <= 12; n += 4) {
24085 for (size_t k = 1; k <= 40; k += 9) {
24086 for (uint32_t m = 1; m <= 2; m++) {
24087 GemmMicrokernelTester()
24088 .mr(2)
24089 .nr(4)
24090 .kr(8)
24091 .sr(1)
24092 .m(m)
24093 .n(n)
24094 .k(k)
24095 .iterations(1)
24096 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24097 }
24098 }
24099 }
24100 }
24101
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel)24102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
24103 TEST_REQUIRES_X86_XOP;
24104 for (size_t k = 1; k <= 40; k += 9) {
24105 GemmMicrokernelTester()
24106 .mr(2)
24107 .nr(4)
24108 .kr(8)
24109 .sr(1)
24110 .m(2)
24111 .n(4)
24112 .k(k)
24113 .ks(3)
24114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24115 }
24116 }
24117
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel_subtile)24118 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
24119 TEST_REQUIRES_X86_XOP;
24120 for (size_t k = 1; k <= 40; k += 9) {
24121 for (uint32_t n = 1; n <= 4; n++) {
24122 for (uint32_t m = 1; m <= 2; m++) {
24123 GemmMicrokernelTester()
24124 .mr(2)
24125 .nr(4)
24126 .kr(8)
24127 .sr(1)
24128 .m(m)
24129 .n(n)
24130 .k(k)
24131 .ks(3)
24132 .iterations(1)
24133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24134 }
24135 }
24136 }
24137 }
24138
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_small_kernel)24139 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
24140 TEST_REQUIRES_X86_XOP;
24141 for (uint32_t n = 5; n < 8; n++) {
24142 for (size_t k = 1; k <= 40; k += 9) {
24143 GemmMicrokernelTester()
24144 .mr(2)
24145 .nr(4)
24146 .kr(8)
24147 .sr(1)
24148 .m(2)
24149 .n(n)
24150 .k(k)
24151 .ks(3)
24152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24153 }
24154 }
24155 }
24156
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_small_kernel)24157 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
24158 TEST_REQUIRES_X86_XOP;
24159 for (uint32_t n = 8; n <= 12; n += 4) {
24160 for (size_t k = 1; k <= 40; k += 9) {
24161 GemmMicrokernelTester()
24162 .mr(2)
24163 .nr(4)
24164 .kr(8)
24165 .sr(1)
24166 .m(2)
24167 .n(n)
24168 .k(k)
24169 .ks(3)
24170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24171 }
24172 }
24173 }
24174
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)24175 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
24176 TEST_REQUIRES_X86_XOP;
24177 for (size_t k = 1; k <= 40; k += 9) {
24178 for (uint32_t n = 1; n <= 4; n++) {
24179 for (uint32_t m = 1; m <= 2; m++) {
24180 GemmMicrokernelTester()
24181 .mr(2)
24182 .nr(4)
24183 .kr(8)
24184 .sr(1)
24185 .m(m)
24186 .n(n)
24187 .k(k)
24188 .cm_stride(7)
24189 .iterations(1)
24190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24191 }
24192 }
24193 }
24194 }
24195
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,a_offset)24196 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
24197 TEST_REQUIRES_X86_XOP;
24198 for (size_t k = 1; k <= 40; k += 9) {
24199 GemmMicrokernelTester()
24200 .mr(2)
24201 .nr(4)
24202 .kr(8)
24203 .sr(1)
24204 .m(2)
24205 .n(4)
24206 .k(k)
24207 .ks(3)
24208 .a_offset(83)
24209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24210 }
24211 }
24212
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,zero)24213 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
24214 TEST_REQUIRES_X86_XOP;
24215 for (size_t k = 1; k <= 40; k += 9) {
24216 for (uint32_t mz = 0; mz < 2; mz++) {
24217 GemmMicrokernelTester()
24218 .mr(2)
24219 .nr(4)
24220 .kr(8)
24221 .sr(1)
24222 .m(2)
24223 .n(4)
24224 .k(k)
24225 .ks(3)
24226 .a_offset(83)
24227 .zero_index(mz)
24228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24229 }
24230 }
24231 }
24232
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)24233 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
24234 TEST_REQUIRES_X86_XOP;
24235 GemmMicrokernelTester()
24236 .mr(2)
24237 .nr(4)
24238 .kr(8)
24239 .sr(1)
24240 .m(2)
24241 .n(4)
24242 .k(8)
24243 .qmin(128)
24244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24245 }
24246
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)24247 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
24248 TEST_REQUIRES_X86_XOP;
24249 GemmMicrokernelTester()
24250 .mr(2)
24251 .nr(4)
24252 .kr(8)
24253 .sr(1)
24254 .m(2)
24255 .n(4)
24256 .k(8)
24257 .qmax(128)
24258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24259 }
24260
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)24261 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
24262 TEST_REQUIRES_X86_XOP;
24263 GemmMicrokernelTester()
24264 .mr(2)
24265 .nr(4)
24266 .kr(8)
24267 .sr(1)
24268 .m(2)
24269 .n(4)
24270 .k(8)
24271 .cm_stride(7)
24272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24273 }
24274
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_a_zero_point)24275 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
24276 TEST_REQUIRES_X86_XOP;
24277 for (size_t k = 1; k <= 40; k += 9) {
24278 GemmMicrokernelTester()
24279 .mr(2)
24280 .nr(4)
24281 .kr(8)
24282 .sr(1)
24283 .m(2)
24284 .n(4)
24285 .k(k)
24286 .a_zero_point(0)
24287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24288 }
24289 }
24290
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_b_zero_point)24291 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
24292 TEST_REQUIRES_X86_XOP;
24293 for (size_t k = 1; k <= 40; k += 9) {
24294 GemmMicrokernelTester()
24295 .mr(2)
24296 .nr(4)
24297 .kr(8)
24298 .sr(1)
24299 .m(2)
24300 .n(4)
24301 .k(k)
24302 .b_zero_point(0)
24303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24304 }
24305 }
24306
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_zero_point)24307 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
24308 TEST_REQUIRES_X86_XOP;
24309 for (size_t k = 1; k <= 40; k += 9) {
24310 GemmMicrokernelTester()
24311 .mr(2)
24312 .nr(4)
24313 .kr(8)
24314 .sr(1)
24315 .m(2)
24316 .n(4)
24317 .k(k)
24318 .a_zero_point(0)
24319 .b_zero_point(0)
24320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24321 }
24322 }
24323 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24324
24325
24326 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)24327 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
24328 TEST_REQUIRES_X86_AVX;
24329 GemmMicrokernelTester()
24330 .mr(3)
24331 .nr(4)
24332 .kr(8)
24333 .sr(1)
24334 .m(3)
24335 .n(4)
24336 .k(8)
24337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24338 }
24339
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)24340 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
24341 TEST_REQUIRES_X86_AVX;
24342 GemmMicrokernelTester()
24343 .mr(3)
24344 .nr(4)
24345 .kr(8)
24346 .sr(1)
24347 .m(3)
24348 .n(4)
24349 .k(8)
24350 .cn_stride(7)
24351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24352 }
24353
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)24354 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
24355 TEST_REQUIRES_X86_AVX;
24356 for (uint32_t n = 1; n <= 4; n++) {
24357 for (uint32_t m = 1; m <= 3; m++) {
24358 GemmMicrokernelTester()
24359 .mr(3)
24360 .nr(4)
24361 .kr(8)
24362 .sr(1)
24363 .m(m)
24364 .n(n)
24365 .k(8)
24366 .iterations(1)
24367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24368 }
24369 }
24370 }
24371
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)24372 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
24373 TEST_REQUIRES_X86_AVX;
24374 for (uint32_t m = 1; m <= 3; m++) {
24375 GemmMicrokernelTester()
24376 .mr(3)
24377 .nr(4)
24378 .kr(8)
24379 .sr(1)
24380 .m(m)
24381 .n(4)
24382 .k(8)
24383 .iterations(1)
24384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24385 }
24386 }
24387
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)24388 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
24389 TEST_REQUIRES_X86_AVX;
24390 for (uint32_t n = 1; n <= 4; n++) {
24391 GemmMicrokernelTester()
24392 .mr(3)
24393 .nr(4)
24394 .kr(8)
24395 .sr(1)
24396 .m(3)
24397 .n(n)
24398 .k(8)
24399 .iterations(1)
24400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24401 }
24402 }
24403
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)24404 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
24405 TEST_REQUIRES_X86_AVX;
24406 for (size_t k = 1; k < 8; k++) {
24407 GemmMicrokernelTester()
24408 .mr(3)
24409 .nr(4)
24410 .kr(8)
24411 .sr(1)
24412 .m(3)
24413 .n(4)
24414 .k(k)
24415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24416 }
24417 }
24418
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)24419 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
24420 TEST_REQUIRES_X86_AVX;
24421 for (size_t k = 1; k < 8; k++) {
24422 for (uint32_t n = 1; n <= 4; n++) {
24423 for (uint32_t m = 1; m <= 3; m++) {
24424 GemmMicrokernelTester()
24425 .mr(3)
24426 .nr(4)
24427 .kr(8)
24428 .sr(1)
24429 .m(m)
24430 .n(n)
24431 .k(k)
24432 .iterations(1)
24433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24434 }
24435 }
24436 }
24437 }
24438
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)24439 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
24440 TEST_REQUIRES_X86_AVX;
24441 for (size_t k = 9; k < 16; k++) {
24442 GemmMicrokernelTester()
24443 .mr(3)
24444 .nr(4)
24445 .kr(8)
24446 .sr(1)
24447 .m(3)
24448 .n(4)
24449 .k(k)
24450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24451 }
24452 }
24453
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)24454 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
24455 TEST_REQUIRES_X86_AVX;
24456 for (size_t k = 9; k < 16; k++) {
24457 for (uint32_t n = 1; n <= 4; n++) {
24458 for (uint32_t m = 1; m <= 3; m++) {
24459 GemmMicrokernelTester()
24460 .mr(3)
24461 .nr(4)
24462 .kr(8)
24463 .sr(1)
24464 .m(m)
24465 .n(n)
24466 .k(k)
24467 .iterations(1)
24468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24469 }
24470 }
24471 }
24472 }
24473
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)24474 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
24475 TEST_REQUIRES_X86_AVX;
24476 for (size_t k = 16; k <= 80; k += 8) {
24477 GemmMicrokernelTester()
24478 .mr(3)
24479 .nr(4)
24480 .kr(8)
24481 .sr(1)
24482 .m(3)
24483 .n(4)
24484 .k(k)
24485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24486 }
24487 }
24488
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)24489 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
24490 TEST_REQUIRES_X86_AVX;
24491 for (size_t k = 16; k <= 80; k += 8) {
24492 for (uint32_t n = 1; n <= 4; n++) {
24493 for (uint32_t m = 1; m <= 3; m++) {
24494 GemmMicrokernelTester()
24495 .mr(3)
24496 .nr(4)
24497 .kr(8)
24498 .sr(1)
24499 .m(m)
24500 .n(n)
24501 .k(k)
24502 .iterations(1)
24503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24504 }
24505 }
24506 }
24507 }
24508
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)24509 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
24510 TEST_REQUIRES_X86_AVX;
24511 for (uint32_t n = 5; n < 8; n++) {
24512 for (size_t k = 1; k <= 40; k += 9) {
24513 GemmMicrokernelTester()
24514 .mr(3)
24515 .nr(4)
24516 .kr(8)
24517 .sr(1)
24518 .m(3)
24519 .n(n)
24520 .k(k)
24521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24522 }
24523 }
24524 }
24525
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)24526 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
24527 TEST_REQUIRES_X86_AVX;
24528 for (uint32_t n = 5; n < 8; n++) {
24529 for (size_t k = 1; k <= 40; k += 9) {
24530 GemmMicrokernelTester()
24531 .mr(3)
24532 .nr(4)
24533 .kr(8)
24534 .sr(1)
24535 .m(3)
24536 .n(n)
24537 .k(k)
24538 .cn_stride(7)
24539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24540 }
24541 }
24542 }
24543
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)24544 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
24545 TEST_REQUIRES_X86_AVX;
24546 for (uint32_t n = 5; n < 8; n++) {
24547 for (size_t k = 1; k <= 40; k += 9) {
24548 for (uint32_t m = 1; m <= 3; m++) {
24549 GemmMicrokernelTester()
24550 .mr(3)
24551 .nr(4)
24552 .kr(8)
24553 .sr(1)
24554 .m(m)
24555 .n(n)
24556 .k(k)
24557 .iterations(1)
24558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24559 }
24560 }
24561 }
24562 }
24563
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)24564 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
24565 TEST_REQUIRES_X86_AVX;
24566 for (uint32_t n = 8; n <= 12; n += 4) {
24567 for (size_t k = 1; k <= 40; k += 9) {
24568 GemmMicrokernelTester()
24569 .mr(3)
24570 .nr(4)
24571 .kr(8)
24572 .sr(1)
24573 .m(3)
24574 .n(n)
24575 .k(k)
24576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)24581 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
24582 TEST_REQUIRES_X86_AVX;
24583 for (uint32_t n = 8; n <= 12; n += 4) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 GemmMicrokernelTester()
24586 .mr(3)
24587 .nr(4)
24588 .kr(8)
24589 .sr(1)
24590 .m(3)
24591 .n(n)
24592 .k(k)
24593 .cn_stride(7)
24594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24595 }
24596 }
24597 }
24598
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)24599 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
24600 TEST_REQUIRES_X86_AVX;
24601 for (uint32_t n = 8; n <= 12; n += 4) {
24602 for (size_t k = 1; k <= 40; k += 9) {
24603 for (uint32_t m = 1; m <= 3; m++) {
24604 GemmMicrokernelTester()
24605 .mr(3)
24606 .nr(4)
24607 .kr(8)
24608 .sr(1)
24609 .m(m)
24610 .n(n)
24611 .k(k)
24612 .iterations(1)
24613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24614 }
24615 }
24616 }
24617 }
24618
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel)24619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
24620 TEST_REQUIRES_X86_AVX;
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(3)
24624 .nr(4)
24625 .kr(8)
24626 .sr(1)
24627 .m(3)
24628 .n(4)
24629 .k(k)
24630 .ks(3)
24631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24632 }
24633 }
24634
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel_subtile)24635 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
24636 TEST_REQUIRES_X86_AVX;
24637 for (size_t k = 1; k <= 40; k += 9) {
24638 for (uint32_t n = 1; n <= 4; n++) {
24639 for (uint32_t m = 1; m <= 3; m++) {
24640 GemmMicrokernelTester()
24641 .mr(3)
24642 .nr(4)
24643 .kr(8)
24644 .sr(1)
24645 .m(m)
24646 .n(n)
24647 .k(k)
24648 .ks(3)
24649 .iterations(1)
24650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_small_kernel)24656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
24657 TEST_REQUIRES_X86_AVX;
24658 for (uint32_t n = 5; n < 8; n++) {
24659 for (size_t k = 1; k <= 40; k += 9) {
24660 GemmMicrokernelTester()
24661 .mr(3)
24662 .nr(4)
24663 .kr(8)
24664 .sr(1)
24665 .m(3)
24666 .n(n)
24667 .k(k)
24668 .ks(3)
24669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24670 }
24671 }
24672 }
24673
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_small_kernel)24674 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
24675 TEST_REQUIRES_X86_AVX;
24676 for (uint32_t n = 8; n <= 12; n += 4) {
24677 for (size_t k = 1; k <= 40; k += 9) {
24678 GemmMicrokernelTester()
24679 .mr(3)
24680 .nr(4)
24681 .kr(8)
24682 .sr(1)
24683 .m(3)
24684 .n(n)
24685 .k(k)
24686 .ks(3)
24687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24688 }
24689 }
24690 }
24691
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)24692 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
24693 TEST_REQUIRES_X86_AVX;
24694 for (size_t k = 1; k <= 40; k += 9) {
24695 for (uint32_t n = 1; n <= 4; n++) {
24696 for (uint32_t m = 1; m <= 3; m++) {
24697 GemmMicrokernelTester()
24698 .mr(3)
24699 .nr(4)
24700 .kr(8)
24701 .sr(1)
24702 .m(m)
24703 .n(n)
24704 .k(k)
24705 .cm_stride(7)
24706 .iterations(1)
24707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24708 }
24709 }
24710 }
24711 }
24712
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,a_offset)24713 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
24714 TEST_REQUIRES_X86_AVX;
24715 for (size_t k = 1; k <= 40; k += 9) {
24716 GemmMicrokernelTester()
24717 .mr(3)
24718 .nr(4)
24719 .kr(8)
24720 .sr(1)
24721 .m(3)
24722 .n(4)
24723 .k(k)
24724 .ks(3)
24725 .a_offset(127)
24726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24727 }
24728 }
24729
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,zero)24730 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
24731 TEST_REQUIRES_X86_AVX;
24732 for (size_t k = 1; k <= 40; k += 9) {
24733 for (uint32_t mz = 0; mz < 3; mz++) {
24734 GemmMicrokernelTester()
24735 .mr(3)
24736 .nr(4)
24737 .kr(8)
24738 .sr(1)
24739 .m(3)
24740 .n(4)
24741 .k(k)
24742 .ks(3)
24743 .a_offset(127)
24744 .zero_index(mz)
24745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24746 }
24747 }
24748 }
24749
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)24750 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
24751 TEST_REQUIRES_X86_AVX;
24752 GemmMicrokernelTester()
24753 .mr(3)
24754 .nr(4)
24755 .kr(8)
24756 .sr(1)
24757 .m(3)
24758 .n(4)
24759 .k(8)
24760 .qmin(128)
24761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24762 }
24763
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)24764 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
24765 TEST_REQUIRES_X86_AVX;
24766 GemmMicrokernelTester()
24767 .mr(3)
24768 .nr(4)
24769 .kr(8)
24770 .sr(1)
24771 .m(3)
24772 .n(4)
24773 .k(8)
24774 .qmax(128)
24775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24776 }
24777
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)24778 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
24779 TEST_REQUIRES_X86_AVX;
24780 GemmMicrokernelTester()
24781 .mr(3)
24782 .nr(4)
24783 .kr(8)
24784 .sr(1)
24785 .m(3)
24786 .n(4)
24787 .k(8)
24788 .cm_stride(7)
24789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24790 }
24791
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_a_zero_point)24792 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
24793 TEST_REQUIRES_X86_AVX;
24794 for (size_t k = 1; k <= 40; k += 9) {
24795 GemmMicrokernelTester()
24796 .mr(3)
24797 .nr(4)
24798 .kr(8)
24799 .sr(1)
24800 .m(3)
24801 .n(4)
24802 .k(k)
24803 .a_zero_point(0)
24804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24805 }
24806 }
24807
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_b_zero_point)24808 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
24809 TEST_REQUIRES_X86_AVX;
24810 for (size_t k = 1; k <= 40; k += 9) {
24811 GemmMicrokernelTester()
24812 .mr(3)
24813 .nr(4)
24814 .kr(8)
24815 .sr(1)
24816 .m(3)
24817 .n(4)
24818 .k(k)
24819 .b_zero_point(0)
24820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24821 }
24822 }
24823
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_zero_point)24824 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
24825 TEST_REQUIRES_X86_AVX;
24826 for (size_t k = 1; k <= 40; k += 9) {
24827 GemmMicrokernelTester()
24828 .mr(3)
24829 .nr(4)
24830 .kr(8)
24831 .sr(1)
24832 .m(3)
24833 .n(4)
24834 .k(k)
24835 .a_zero_point(0)
24836 .b_zero_point(0)
24837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24838 }
24839 }
24840 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24841
24842
24843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)24844 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
24845 TEST_REQUIRES_X86_XOP;
24846 GemmMicrokernelTester()
24847 .mr(3)
24848 .nr(4)
24849 .kr(8)
24850 .sr(1)
24851 .m(3)
24852 .n(4)
24853 .k(8)
24854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24855 }
24856
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)24857 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
24858 TEST_REQUIRES_X86_XOP;
24859 GemmMicrokernelTester()
24860 .mr(3)
24861 .nr(4)
24862 .kr(8)
24863 .sr(1)
24864 .m(3)
24865 .n(4)
24866 .k(8)
24867 .cn_stride(7)
24868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24869 }
24870
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)24871 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
24872 TEST_REQUIRES_X86_XOP;
24873 for (uint32_t n = 1; n <= 4; n++) {
24874 for (uint32_t m = 1; m <= 3; m++) {
24875 GemmMicrokernelTester()
24876 .mr(3)
24877 .nr(4)
24878 .kr(8)
24879 .sr(1)
24880 .m(m)
24881 .n(n)
24882 .k(8)
24883 .iterations(1)
24884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24885 }
24886 }
24887 }
24888
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)24889 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
24890 TEST_REQUIRES_X86_XOP;
24891 for (uint32_t m = 1; m <= 3; m++) {
24892 GemmMicrokernelTester()
24893 .mr(3)
24894 .nr(4)
24895 .kr(8)
24896 .sr(1)
24897 .m(m)
24898 .n(4)
24899 .k(8)
24900 .iterations(1)
24901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24902 }
24903 }
24904
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)24905 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
24906 TEST_REQUIRES_X86_XOP;
24907 for (uint32_t n = 1; n <= 4; n++) {
24908 GemmMicrokernelTester()
24909 .mr(3)
24910 .nr(4)
24911 .kr(8)
24912 .sr(1)
24913 .m(3)
24914 .n(n)
24915 .k(8)
24916 .iterations(1)
24917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24918 }
24919 }
24920
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)24921 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
24922 TEST_REQUIRES_X86_XOP;
24923 for (size_t k = 1; k < 8; k++) {
24924 GemmMicrokernelTester()
24925 .mr(3)
24926 .nr(4)
24927 .kr(8)
24928 .sr(1)
24929 .m(3)
24930 .n(4)
24931 .k(k)
24932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24933 }
24934 }
24935
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)24936 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
24937 TEST_REQUIRES_X86_XOP;
24938 for (size_t k = 1; k < 8; k++) {
24939 for (uint32_t n = 1; n <= 4; n++) {
24940 for (uint32_t m = 1; m <= 3; m++) {
24941 GemmMicrokernelTester()
24942 .mr(3)
24943 .nr(4)
24944 .kr(8)
24945 .sr(1)
24946 .m(m)
24947 .n(n)
24948 .k(k)
24949 .iterations(1)
24950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24951 }
24952 }
24953 }
24954 }
24955
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)24956 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
24957 TEST_REQUIRES_X86_XOP;
24958 for (size_t k = 9; k < 16; k++) {
24959 GemmMicrokernelTester()
24960 .mr(3)
24961 .nr(4)
24962 .kr(8)
24963 .sr(1)
24964 .m(3)
24965 .n(4)
24966 .k(k)
24967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24968 }
24969 }
24970
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)24971 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
24972 TEST_REQUIRES_X86_XOP;
24973 for (size_t k = 9; k < 16; k++) {
24974 for (uint32_t n = 1; n <= 4; n++) {
24975 for (uint32_t m = 1; m <= 3; m++) {
24976 GemmMicrokernelTester()
24977 .mr(3)
24978 .nr(4)
24979 .kr(8)
24980 .sr(1)
24981 .m(m)
24982 .n(n)
24983 .k(k)
24984 .iterations(1)
24985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24986 }
24987 }
24988 }
24989 }
24990
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)24991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
24992 TEST_REQUIRES_X86_XOP;
24993 for (size_t k = 16; k <= 80; k += 8) {
24994 GemmMicrokernelTester()
24995 .mr(3)
24996 .nr(4)
24997 .kr(8)
24998 .sr(1)
24999 .m(3)
25000 .n(4)
25001 .k(k)
25002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25003 }
25004 }
25005
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)25006 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
25007 TEST_REQUIRES_X86_XOP;
25008 for (size_t k = 16; k <= 80; k += 8) {
25009 for (uint32_t n = 1; n <= 4; n++) {
25010 for (uint32_t m = 1; m <= 3; m++) {
25011 GemmMicrokernelTester()
25012 .mr(3)
25013 .nr(4)
25014 .kr(8)
25015 .sr(1)
25016 .m(m)
25017 .n(n)
25018 .k(k)
25019 .iterations(1)
25020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25021 }
25022 }
25023 }
25024 }
25025
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)25026 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
25027 TEST_REQUIRES_X86_XOP;
25028 for (uint32_t n = 5; n < 8; n++) {
25029 for (size_t k = 1; k <= 40; k += 9) {
25030 GemmMicrokernelTester()
25031 .mr(3)
25032 .nr(4)
25033 .kr(8)
25034 .sr(1)
25035 .m(3)
25036 .n(n)
25037 .k(k)
25038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25039 }
25040 }
25041 }
25042
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)25043 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
25044 TEST_REQUIRES_X86_XOP;
25045 for (uint32_t n = 5; n < 8; n++) {
25046 for (size_t k = 1; k <= 40; k += 9) {
25047 GemmMicrokernelTester()
25048 .mr(3)
25049 .nr(4)
25050 .kr(8)
25051 .sr(1)
25052 .m(3)
25053 .n(n)
25054 .k(k)
25055 .cn_stride(7)
25056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25057 }
25058 }
25059 }
25060
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)25061 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
25062 TEST_REQUIRES_X86_XOP;
25063 for (uint32_t n = 5; n < 8; n++) {
25064 for (size_t k = 1; k <= 40; k += 9) {
25065 for (uint32_t m = 1; m <= 3; m++) {
25066 GemmMicrokernelTester()
25067 .mr(3)
25068 .nr(4)
25069 .kr(8)
25070 .sr(1)
25071 .m(m)
25072 .n(n)
25073 .k(k)
25074 .iterations(1)
25075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25076 }
25077 }
25078 }
25079 }
25080
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)25081 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
25082 TEST_REQUIRES_X86_XOP;
25083 for (uint32_t n = 8; n <= 12; n += 4) {
25084 for (size_t k = 1; k <= 40; k += 9) {
25085 GemmMicrokernelTester()
25086 .mr(3)
25087 .nr(4)
25088 .kr(8)
25089 .sr(1)
25090 .m(3)
25091 .n(n)
25092 .k(k)
25093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25094 }
25095 }
25096 }
25097
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)25098 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
25099 TEST_REQUIRES_X86_XOP;
25100 for (uint32_t n = 8; n <= 12; n += 4) {
25101 for (size_t k = 1; k <= 40; k += 9) {
25102 GemmMicrokernelTester()
25103 .mr(3)
25104 .nr(4)
25105 .kr(8)
25106 .sr(1)
25107 .m(3)
25108 .n(n)
25109 .k(k)
25110 .cn_stride(7)
25111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25112 }
25113 }
25114 }
25115
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)25116 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
25117 TEST_REQUIRES_X86_XOP;
25118 for (uint32_t n = 8; n <= 12; n += 4) {
25119 for (size_t k = 1; k <= 40; k += 9) {
25120 for (uint32_t m = 1; m <= 3; m++) {
25121 GemmMicrokernelTester()
25122 .mr(3)
25123 .nr(4)
25124 .kr(8)
25125 .sr(1)
25126 .m(m)
25127 .n(n)
25128 .k(k)
25129 .iterations(1)
25130 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25131 }
25132 }
25133 }
25134 }
25135
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)25136 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
25137 TEST_REQUIRES_X86_XOP;
25138 for (size_t k = 1; k <= 40; k += 9) {
25139 GemmMicrokernelTester()
25140 .mr(3)
25141 .nr(4)
25142 .kr(8)
25143 .sr(1)
25144 .m(3)
25145 .n(4)
25146 .k(k)
25147 .ks(3)
25148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25149 }
25150 }
25151
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)25152 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
25153 TEST_REQUIRES_X86_XOP;
25154 for (size_t k = 1; k <= 40; k += 9) {
25155 for (uint32_t n = 1; n <= 4; n++) {
25156 for (uint32_t m = 1; m <= 3; m++) {
25157 GemmMicrokernelTester()
25158 .mr(3)
25159 .nr(4)
25160 .kr(8)
25161 .sr(1)
25162 .m(m)
25163 .n(n)
25164 .k(k)
25165 .ks(3)
25166 .iterations(1)
25167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25168 }
25169 }
25170 }
25171 }
25172
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)25173 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
25174 TEST_REQUIRES_X86_XOP;
25175 for (uint32_t n = 5; n < 8; n++) {
25176 for (size_t k = 1; k <= 40; k += 9) {
25177 GemmMicrokernelTester()
25178 .mr(3)
25179 .nr(4)
25180 .kr(8)
25181 .sr(1)
25182 .m(3)
25183 .n(n)
25184 .k(k)
25185 .ks(3)
25186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25187 }
25188 }
25189 }
25190
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)25191 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
25192 TEST_REQUIRES_X86_XOP;
25193 for (uint32_t n = 8; n <= 12; n += 4) {
25194 for (size_t k = 1; k <= 40; k += 9) {
25195 GemmMicrokernelTester()
25196 .mr(3)
25197 .nr(4)
25198 .kr(8)
25199 .sr(1)
25200 .m(3)
25201 .n(n)
25202 .k(k)
25203 .ks(3)
25204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25205 }
25206 }
25207 }
25208
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)25209 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
25210 TEST_REQUIRES_X86_XOP;
25211 for (size_t k = 1; k <= 40; k += 9) {
25212 for (uint32_t n = 1; n <= 4; n++) {
25213 for (uint32_t m = 1; m <= 3; m++) {
25214 GemmMicrokernelTester()
25215 .mr(3)
25216 .nr(4)
25217 .kr(8)
25218 .sr(1)
25219 .m(m)
25220 .n(n)
25221 .k(k)
25222 .cm_stride(7)
25223 .iterations(1)
25224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25225 }
25226 }
25227 }
25228 }
25229
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)25230 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
25231 TEST_REQUIRES_X86_XOP;
25232 for (size_t k = 1; k <= 40; k += 9) {
25233 GemmMicrokernelTester()
25234 .mr(3)
25235 .nr(4)
25236 .kr(8)
25237 .sr(1)
25238 .m(3)
25239 .n(4)
25240 .k(k)
25241 .ks(3)
25242 .a_offset(127)
25243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25244 }
25245 }
25246
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)25247 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
25248 TEST_REQUIRES_X86_XOP;
25249 for (size_t k = 1; k <= 40; k += 9) {
25250 for (uint32_t mz = 0; mz < 3; mz++) {
25251 GemmMicrokernelTester()
25252 .mr(3)
25253 .nr(4)
25254 .kr(8)
25255 .sr(1)
25256 .m(3)
25257 .n(4)
25258 .k(k)
25259 .ks(3)
25260 .a_offset(127)
25261 .zero_index(mz)
25262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25263 }
25264 }
25265 }
25266
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)25267 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
25268 TEST_REQUIRES_X86_XOP;
25269 GemmMicrokernelTester()
25270 .mr(3)
25271 .nr(4)
25272 .kr(8)
25273 .sr(1)
25274 .m(3)
25275 .n(4)
25276 .k(8)
25277 .qmin(128)
25278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25279 }
25280
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)25281 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
25282 TEST_REQUIRES_X86_XOP;
25283 GemmMicrokernelTester()
25284 .mr(3)
25285 .nr(4)
25286 .kr(8)
25287 .sr(1)
25288 .m(3)
25289 .n(4)
25290 .k(8)
25291 .qmax(128)
25292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25293 }
25294
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)25295 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
25296 TEST_REQUIRES_X86_XOP;
25297 GemmMicrokernelTester()
25298 .mr(3)
25299 .nr(4)
25300 .kr(8)
25301 .sr(1)
25302 .m(3)
25303 .n(4)
25304 .k(8)
25305 .cm_stride(7)
25306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25307 }
25308
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_a_zero_point)25309 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
25310 TEST_REQUIRES_X86_XOP;
25311 for (size_t k = 1; k <= 40; k += 9) {
25312 GemmMicrokernelTester()
25313 .mr(3)
25314 .nr(4)
25315 .kr(8)
25316 .sr(1)
25317 .m(3)
25318 .n(4)
25319 .k(k)
25320 .a_zero_point(0)
25321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25322 }
25323 }
25324
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_b_zero_point)25325 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
25326 TEST_REQUIRES_X86_XOP;
25327 for (size_t k = 1; k <= 40; k += 9) {
25328 GemmMicrokernelTester()
25329 .mr(3)
25330 .nr(4)
25331 .kr(8)
25332 .sr(1)
25333 .m(3)
25334 .n(4)
25335 .k(k)
25336 .b_zero_point(0)
25337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25338 }
25339 }
25340
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_zero_point)25341 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
25342 TEST_REQUIRES_X86_XOP;
25343 for (size_t k = 1; k <= 40; k += 9) {
25344 GemmMicrokernelTester()
25345 .mr(3)
25346 .nr(4)
25347 .kr(8)
25348 .sr(1)
25349 .m(3)
25350 .n(4)
25351 .k(k)
25352 .a_zero_point(0)
25353 .b_zero_point(0)
25354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25355 }
25356 }
25357 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25358
25359
25360 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)25361 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
25362 TEST_REQUIRES_X86_SSE41;
25363 GemmMicrokernelTester()
25364 .mr(1)
25365 .nr(4)
25366 .kr(8)
25367 .sr(1)
25368 .m(1)
25369 .n(4)
25370 .k(8)
25371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25372 }
25373
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)25374 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
25375 TEST_REQUIRES_X86_SSE41;
25376 GemmMicrokernelTester()
25377 .mr(1)
25378 .nr(4)
25379 .kr(8)
25380 .sr(1)
25381 .m(1)
25382 .n(4)
25383 .k(8)
25384 .cn_stride(7)
25385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25386 }
25387
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)25388 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
25389 TEST_REQUIRES_X86_SSE41;
25390 for (uint32_t n = 1; n <= 4; n++) {
25391 for (uint32_t m = 1; m <= 1; m++) {
25392 GemmMicrokernelTester()
25393 .mr(1)
25394 .nr(4)
25395 .kr(8)
25396 .sr(1)
25397 .m(m)
25398 .n(n)
25399 .k(8)
25400 .iterations(1)
25401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25402 }
25403 }
25404 }
25405
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)25406 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
25407 TEST_REQUIRES_X86_SSE41;
25408 for (uint32_t m = 1; m <= 1; m++) {
25409 GemmMicrokernelTester()
25410 .mr(1)
25411 .nr(4)
25412 .kr(8)
25413 .sr(1)
25414 .m(m)
25415 .n(4)
25416 .k(8)
25417 .iterations(1)
25418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25419 }
25420 }
25421
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)25422 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
25423 TEST_REQUIRES_X86_SSE41;
25424 for (uint32_t n = 1; n <= 4; n++) {
25425 GemmMicrokernelTester()
25426 .mr(1)
25427 .nr(4)
25428 .kr(8)
25429 .sr(1)
25430 .m(1)
25431 .n(n)
25432 .k(8)
25433 .iterations(1)
25434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25435 }
25436 }
25437
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)25438 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
25439 TEST_REQUIRES_X86_SSE41;
25440 for (size_t k = 1; k < 8; k++) {
25441 GemmMicrokernelTester()
25442 .mr(1)
25443 .nr(4)
25444 .kr(8)
25445 .sr(1)
25446 .m(1)
25447 .n(4)
25448 .k(k)
25449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25450 }
25451 }
25452
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)25453 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
25454 TEST_REQUIRES_X86_SSE41;
25455 for (size_t k = 1; k < 8; k++) {
25456 for (uint32_t n = 1; n <= 4; n++) {
25457 for (uint32_t m = 1; m <= 1; m++) {
25458 GemmMicrokernelTester()
25459 .mr(1)
25460 .nr(4)
25461 .kr(8)
25462 .sr(1)
25463 .m(m)
25464 .n(n)
25465 .k(k)
25466 .iterations(1)
25467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25468 }
25469 }
25470 }
25471 }
25472
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)25473 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
25474 TEST_REQUIRES_X86_SSE41;
25475 for (size_t k = 9; k < 16; k++) {
25476 GemmMicrokernelTester()
25477 .mr(1)
25478 .nr(4)
25479 .kr(8)
25480 .sr(1)
25481 .m(1)
25482 .n(4)
25483 .k(k)
25484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25485 }
25486 }
25487
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)25488 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
25489 TEST_REQUIRES_X86_SSE41;
25490 for (size_t k = 9; k < 16; k++) {
25491 for (uint32_t n = 1; n <= 4; n++) {
25492 for (uint32_t m = 1; m <= 1; m++) {
25493 GemmMicrokernelTester()
25494 .mr(1)
25495 .nr(4)
25496 .kr(8)
25497 .sr(1)
25498 .m(m)
25499 .n(n)
25500 .k(k)
25501 .iterations(1)
25502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25503 }
25504 }
25505 }
25506 }
25507
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)25508 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
25509 TEST_REQUIRES_X86_SSE41;
25510 for (size_t k = 16; k <= 80; k += 8) {
25511 GemmMicrokernelTester()
25512 .mr(1)
25513 .nr(4)
25514 .kr(8)
25515 .sr(1)
25516 .m(1)
25517 .n(4)
25518 .k(k)
25519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25520 }
25521 }
25522
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)25523 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
25524 TEST_REQUIRES_X86_SSE41;
25525 for (size_t k = 16; k <= 80; k += 8) {
25526 for (uint32_t n = 1; n <= 4; n++) {
25527 for (uint32_t m = 1; m <= 1; m++) {
25528 GemmMicrokernelTester()
25529 .mr(1)
25530 .nr(4)
25531 .kr(8)
25532 .sr(1)
25533 .m(m)
25534 .n(n)
25535 .k(k)
25536 .iterations(1)
25537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25538 }
25539 }
25540 }
25541 }
25542
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)25543 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
25544 TEST_REQUIRES_X86_SSE41;
25545 for (uint32_t n = 5; n < 8; n++) {
25546 for (size_t k = 1; k <= 40; k += 9) {
25547 GemmMicrokernelTester()
25548 .mr(1)
25549 .nr(4)
25550 .kr(8)
25551 .sr(1)
25552 .m(1)
25553 .n(n)
25554 .k(k)
25555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25556 }
25557 }
25558 }
25559
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)25560 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
25561 TEST_REQUIRES_X86_SSE41;
25562 for (uint32_t n = 5; n < 8; n++) {
25563 for (size_t k = 1; k <= 40; k += 9) {
25564 GemmMicrokernelTester()
25565 .mr(1)
25566 .nr(4)
25567 .kr(8)
25568 .sr(1)
25569 .m(1)
25570 .n(n)
25571 .k(k)
25572 .cn_stride(7)
25573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25574 }
25575 }
25576 }
25577
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)25578 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
25579 TEST_REQUIRES_X86_SSE41;
25580 for (uint32_t n = 5; n < 8; n++) {
25581 for (size_t k = 1; k <= 40; k += 9) {
25582 for (uint32_t m = 1; m <= 1; m++) {
25583 GemmMicrokernelTester()
25584 .mr(1)
25585 .nr(4)
25586 .kr(8)
25587 .sr(1)
25588 .m(m)
25589 .n(n)
25590 .k(k)
25591 .iterations(1)
25592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25593 }
25594 }
25595 }
25596 }
25597
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)25598 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
25599 TEST_REQUIRES_X86_SSE41;
25600 for (uint32_t n = 8; n <= 12; n += 4) {
25601 for (size_t k = 1; k <= 40; k += 9) {
25602 GemmMicrokernelTester()
25603 .mr(1)
25604 .nr(4)
25605 .kr(8)
25606 .sr(1)
25607 .m(1)
25608 .n(n)
25609 .k(k)
25610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25611 }
25612 }
25613 }
25614
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)25615 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
25616 TEST_REQUIRES_X86_SSE41;
25617 for (uint32_t n = 8; n <= 12; n += 4) {
25618 for (size_t k = 1; k <= 40; k += 9) {
25619 GemmMicrokernelTester()
25620 .mr(1)
25621 .nr(4)
25622 .kr(8)
25623 .sr(1)
25624 .m(1)
25625 .n(n)
25626 .k(k)
25627 .cn_stride(7)
25628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25629 }
25630 }
25631 }
25632
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)25633 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
25634 TEST_REQUIRES_X86_SSE41;
25635 for (uint32_t n = 8; n <= 12; n += 4) {
25636 for (size_t k = 1; k <= 40; k += 9) {
25637 for (uint32_t m = 1; m <= 1; m++) {
25638 GemmMicrokernelTester()
25639 .mr(1)
25640 .nr(4)
25641 .kr(8)
25642 .sr(1)
25643 .m(m)
25644 .n(n)
25645 .k(k)
25646 .iterations(1)
25647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25648 }
25649 }
25650 }
25651 }
25652
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel)25653 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
25654 TEST_REQUIRES_X86_SSE41;
25655 for (size_t k = 1; k <= 40; k += 9) {
25656 GemmMicrokernelTester()
25657 .mr(1)
25658 .nr(4)
25659 .kr(8)
25660 .sr(1)
25661 .m(1)
25662 .n(4)
25663 .k(k)
25664 .ks(3)
25665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25666 }
25667 }
25668
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel_subtile)25669 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
25670 TEST_REQUIRES_X86_SSE41;
25671 for (size_t k = 1; k <= 40; k += 9) {
25672 for (uint32_t n = 1; n <= 4; n++) {
25673 for (uint32_t m = 1; m <= 1; m++) {
25674 GemmMicrokernelTester()
25675 .mr(1)
25676 .nr(4)
25677 .kr(8)
25678 .sr(1)
25679 .m(m)
25680 .n(n)
25681 .k(k)
25682 .ks(3)
25683 .iterations(1)
25684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25685 }
25686 }
25687 }
25688 }
25689
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_small_kernel)25690 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
25691 TEST_REQUIRES_X86_SSE41;
25692 for (uint32_t n = 5; n < 8; n++) {
25693 for (size_t k = 1; k <= 40; k += 9) {
25694 GemmMicrokernelTester()
25695 .mr(1)
25696 .nr(4)
25697 .kr(8)
25698 .sr(1)
25699 .m(1)
25700 .n(n)
25701 .k(k)
25702 .ks(3)
25703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25704 }
25705 }
25706 }
25707
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_small_kernel)25708 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
25709 TEST_REQUIRES_X86_SSE41;
25710 for (uint32_t n = 8; n <= 12; n += 4) {
25711 for (size_t k = 1; k <= 40; k += 9) {
25712 GemmMicrokernelTester()
25713 .mr(1)
25714 .nr(4)
25715 .kr(8)
25716 .sr(1)
25717 .m(1)
25718 .n(n)
25719 .k(k)
25720 .ks(3)
25721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25722 }
25723 }
25724 }
25725
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)25726 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
25727 TEST_REQUIRES_X86_SSE41;
25728 for (size_t k = 1; k <= 40; k += 9) {
25729 for (uint32_t n = 1; n <= 4; n++) {
25730 for (uint32_t m = 1; m <= 1; m++) {
25731 GemmMicrokernelTester()
25732 .mr(1)
25733 .nr(4)
25734 .kr(8)
25735 .sr(1)
25736 .m(m)
25737 .n(n)
25738 .k(k)
25739 .cm_stride(7)
25740 .iterations(1)
25741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25742 }
25743 }
25744 }
25745 }
25746
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,a_offset)25747 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
25748 TEST_REQUIRES_X86_SSE41;
25749 for (size_t k = 1; k <= 40; k += 9) {
25750 GemmMicrokernelTester()
25751 .mr(1)
25752 .nr(4)
25753 .kr(8)
25754 .sr(1)
25755 .m(1)
25756 .n(4)
25757 .k(k)
25758 .ks(3)
25759 .a_offset(43)
25760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25761 }
25762 }
25763
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,zero)25764 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
25765 TEST_REQUIRES_X86_SSE41;
25766 for (size_t k = 1; k <= 40; k += 9) {
25767 for (uint32_t mz = 0; mz < 1; mz++) {
25768 GemmMicrokernelTester()
25769 .mr(1)
25770 .nr(4)
25771 .kr(8)
25772 .sr(1)
25773 .m(1)
25774 .n(4)
25775 .k(k)
25776 .ks(3)
25777 .a_offset(43)
25778 .zero_index(mz)
25779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25780 }
25781 }
25782 }
25783
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)25784 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
25785 TEST_REQUIRES_X86_SSE41;
25786 GemmMicrokernelTester()
25787 .mr(1)
25788 .nr(4)
25789 .kr(8)
25790 .sr(1)
25791 .m(1)
25792 .n(4)
25793 .k(8)
25794 .qmin(128)
25795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25796 }
25797
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)25798 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
25799 TEST_REQUIRES_X86_SSE41;
25800 GemmMicrokernelTester()
25801 .mr(1)
25802 .nr(4)
25803 .kr(8)
25804 .sr(1)
25805 .m(1)
25806 .n(4)
25807 .k(8)
25808 .qmax(128)
25809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25810 }
25811
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)25812 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
25813 TEST_REQUIRES_X86_SSE41;
25814 GemmMicrokernelTester()
25815 .mr(1)
25816 .nr(4)
25817 .kr(8)
25818 .sr(1)
25819 .m(1)
25820 .n(4)
25821 .k(8)
25822 .cm_stride(7)
25823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25824 }
25825
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_a_zero_point)25826 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
25827 TEST_REQUIRES_X86_SSE41;
25828 for (size_t k = 1; k <= 40; k += 9) {
25829 GemmMicrokernelTester()
25830 .mr(1)
25831 .nr(4)
25832 .kr(8)
25833 .sr(1)
25834 .m(1)
25835 .n(4)
25836 .k(k)
25837 .a_zero_point(0)
25838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25839 }
25840 }
25841
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_b_zero_point)25842 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
25843 TEST_REQUIRES_X86_SSE41;
25844 for (size_t k = 1; k <= 40; k += 9) {
25845 GemmMicrokernelTester()
25846 .mr(1)
25847 .nr(4)
25848 .kr(8)
25849 .sr(1)
25850 .m(1)
25851 .n(4)
25852 .k(k)
25853 .b_zero_point(0)
25854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25855 }
25856 }
25857
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_zero_point)25858 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
25859 TEST_REQUIRES_X86_SSE41;
25860 for (size_t k = 1; k <= 40; k += 9) {
25861 GemmMicrokernelTester()
25862 .mr(1)
25863 .nr(4)
25864 .kr(8)
25865 .sr(1)
25866 .m(1)
25867 .n(4)
25868 .k(k)
25869 .a_zero_point(0)
25870 .b_zero_point(0)
25871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25872 }
25873 }
25874 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25875
25876
25877 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)25878 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
25879 TEST_REQUIRES_X86_SSE41;
25880 GemmMicrokernelTester()
25881 .mr(2)
25882 .nr(4)
25883 .kr(8)
25884 .sr(1)
25885 .m(2)
25886 .n(4)
25887 .k(8)
25888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25889 }
25890
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)25891 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
25892 TEST_REQUIRES_X86_SSE41;
25893 GemmMicrokernelTester()
25894 .mr(2)
25895 .nr(4)
25896 .kr(8)
25897 .sr(1)
25898 .m(2)
25899 .n(4)
25900 .k(8)
25901 .cn_stride(7)
25902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25903 }
25904
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)25905 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
25906 TEST_REQUIRES_X86_SSE41;
25907 for (uint32_t n = 1; n <= 4; n++) {
25908 for (uint32_t m = 1; m <= 2; m++) {
25909 GemmMicrokernelTester()
25910 .mr(2)
25911 .nr(4)
25912 .kr(8)
25913 .sr(1)
25914 .m(m)
25915 .n(n)
25916 .k(8)
25917 .iterations(1)
25918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25919 }
25920 }
25921 }
25922
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)25923 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
25924 TEST_REQUIRES_X86_SSE41;
25925 for (uint32_t m = 1; m <= 2; m++) {
25926 GemmMicrokernelTester()
25927 .mr(2)
25928 .nr(4)
25929 .kr(8)
25930 .sr(1)
25931 .m(m)
25932 .n(4)
25933 .k(8)
25934 .iterations(1)
25935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25936 }
25937 }
25938
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)25939 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
25940 TEST_REQUIRES_X86_SSE41;
25941 for (uint32_t n = 1; n <= 4; n++) {
25942 GemmMicrokernelTester()
25943 .mr(2)
25944 .nr(4)
25945 .kr(8)
25946 .sr(1)
25947 .m(2)
25948 .n(n)
25949 .k(8)
25950 .iterations(1)
25951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25952 }
25953 }
25954
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)25955 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
25956 TEST_REQUIRES_X86_SSE41;
25957 for (size_t k = 1; k < 8; k++) {
25958 GemmMicrokernelTester()
25959 .mr(2)
25960 .nr(4)
25961 .kr(8)
25962 .sr(1)
25963 .m(2)
25964 .n(4)
25965 .k(k)
25966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25967 }
25968 }
25969
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)25970 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
25971 TEST_REQUIRES_X86_SSE41;
25972 for (size_t k = 1; k < 8; k++) {
25973 for (uint32_t n = 1; n <= 4; n++) {
25974 for (uint32_t m = 1; m <= 2; m++) {
25975 GemmMicrokernelTester()
25976 .mr(2)
25977 .nr(4)
25978 .kr(8)
25979 .sr(1)
25980 .m(m)
25981 .n(n)
25982 .k(k)
25983 .iterations(1)
25984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25985 }
25986 }
25987 }
25988 }
25989
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)25990 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
25991 TEST_REQUIRES_X86_SSE41;
25992 for (size_t k = 9; k < 16; k++) {
25993 GemmMicrokernelTester()
25994 .mr(2)
25995 .nr(4)
25996 .kr(8)
25997 .sr(1)
25998 .m(2)
25999 .n(4)
26000 .k(k)
26001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26002 }
26003 }
26004
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)26005 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
26006 TEST_REQUIRES_X86_SSE41;
26007 for (size_t k = 9; k < 16; k++) {
26008 for (uint32_t n = 1; n <= 4; n++) {
26009 for (uint32_t m = 1; m <= 2; m++) {
26010 GemmMicrokernelTester()
26011 .mr(2)
26012 .nr(4)
26013 .kr(8)
26014 .sr(1)
26015 .m(m)
26016 .n(n)
26017 .k(k)
26018 .iterations(1)
26019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26020 }
26021 }
26022 }
26023 }
26024
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)26025 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
26026 TEST_REQUIRES_X86_SSE41;
26027 for (size_t k = 16; k <= 80; k += 8) {
26028 GemmMicrokernelTester()
26029 .mr(2)
26030 .nr(4)
26031 .kr(8)
26032 .sr(1)
26033 .m(2)
26034 .n(4)
26035 .k(k)
26036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26037 }
26038 }
26039
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)26040 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
26041 TEST_REQUIRES_X86_SSE41;
26042 for (size_t k = 16; k <= 80; k += 8) {
26043 for (uint32_t n = 1; n <= 4; n++) {
26044 for (uint32_t m = 1; m <= 2; m++) {
26045 GemmMicrokernelTester()
26046 .mr(2)
26047 .nr(4)
26048 .kr(8)
26049 .sr(1)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)26060 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
26061 TEST_REQUIRES_X86_SSE41;
26062 for (uint32_t n = 5; n < 8; n++) {
26063 for (size_t k = 1; k <= 40; k += 9) {
26064 GemmMicrokernelTester()
26065 .mr(2)
26066 .nr(4)
26067 .kr(8)
26068 .sr(1)
26069 .m(2)
26070 .n(n)
26071 .k(k)
26072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26073 }
26074 }
26075 }
26076
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)26077 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26078 TEST_REQUIRES_X86_SSE41;
26079 for (uint32_t n = 5; n < 8; n++) {
26080 for (size_t k = 1; k <= 40; k += 9) {
26081 GemmMicrokernelTester()
26082 .mr(2)
26083 .nr(4)
26084 .kr(8)
26085 .sr(1)
26086 .m(2)
26087 .n(n)
26088 .k(k)
26089 .cn_stride(7)
26090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26091 }
26092 }
26093 }
26094
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)26095 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
26096 TEST_REQUIRES_X86_SSE41;
26097 for (uint32_t n = 5; n < 8; n++) {
26098 for (size_t k = 1; k <= 40; k += 9) {
26099 for (uint32_t m = 1; m <= 2; m++) {
26100 GemmMicrokernelTester()
26101 .mr(2)
26102 .nr(4)
26103 .kr(8)
26104 .sr(1)
26105 .m(m)
26106 .n(n)
26107 .k(k)
26108 .iterations(1)
26109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26110 }
26111 }
26112 }
26113 }
26114
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)26115 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
26116 TEST_REQUIRES_X86_SSE41;
26117 for (uint32_t n = 8; n <= 12; n += 4) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(2)
26121 .nr(4)
26122 .kr(8)
26123 .sr(1)
26124 .m(2)
26125 .n(n)
26126 .k(k)
26127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26128 }
26129 }
26130 }
26131
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)26132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
26133 TEST_REQUIRES_X86_SSE41;
26134 for (uint32_t n = 8; n <= 12; n += 4) {
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 GemmMicrokernelTester()
26137 .mr(2)
26138 .nr(4)
26139 .kr(8)
26140 .sr(1)
26141 .m(2)
26142 .n(n)
26143 .k(k)
26144 .cn_stride(7)
26145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26146 }
26147 }
26148 }
26149
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)26150 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
26151 TEST_REQUIRES_X86_SSE41;
26152 for (uint32_t n = 8; n <= 12; n += 4) {
26153 for (size_t k = 1; k <= 40; k += 9) {
26154 for (uint32_t m = 1; m <= 2; m++) {
26155 GemmMicrokernelTester()
26156 .mr(2)
26157 .nr(4)
26158 .kr(8)
26159 .sr(1)
26160 .m(m)
26161 .n(n)
26162 .k(k)
26163 .iterations(1)
26164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26165 }
26166 }
26167 }
26168 }
26169
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel)26170 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
26171 TEST_REQUIRES_X86_SSE41;
26172 for (size_t k = 1; k <= 40; k += 9) {
26173 GemmMicrokernelTester()
26174 .mr(2)
26175 .nr(4)
26176 .kr(8)
26177 .sr(1)
26178 .m(2)
26179 .n(4)
26180 .k(k)
26181 .ks(3)
26182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26183 }
26184 }
26185
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel_subtile)26186 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
26187 TEST_REQUIRES_X86_SSE41;
26188 for (size_t k = 1; k <= 40; k += 9) {
26189 for (uint32_t n = 1; n <= 4; n++) {
26190 for (uint32_t m = 1; m <= 2; m++) {
26191 GemmMicrokernelTester()
26192 .mr(2)
26193 .nr(4)
26194 .kr(8)
26195 .sr(1)
26196 .m(m)
26197 .n(n)
26198 .k(k)
26199 .ks(3)
26200 .iterations(1)
26201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26202 }
26203 }
26204 }
26205 }
26206
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_small_kernel)26207 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
26208 TEST_REQUIRES_X86_SSE41;
26209 for (uint32_t n = 5; n < 8; n++) {
26210 for (size_t k = 1; k <= 40; k += 9) {
26211 GemmMicrokernelTester()
26212 .mr(2)
26213 .nr(4)
26214 .kr(8)
26215 .sr(1)
26216 .m(2)
26217 .n(n)
26218 .k(k)
26219 .ks(3)
26220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26221 }
26222 }
26223 }
26224
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_small_kernel)26225 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
26226 TEST_REQUIRES_X86_SSE41;
26227 for (uint32_t n = 8; n <= 12; n += 4) {
26228 for (size_t k = 1; k <= 40; k += 9) {
26229 GemmMicrokernelTester()
26230 .mr(2)
26231 .nr(4)
26232 .kr(8)
26233 .sr(1)
26234 .m(2)
26235 .n(n)
26236 .k(k)
26237 .ks(3)
26238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26239 }
26240 }
26241 }
26242
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)26243 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
26244 TEST_REQUIRES_X86_SSE41;
26245 for (size_t k = 1; k <= 40; k += 9) {
26246 for (uint32_t n = 1; n <= 4; n++) {
26247 for (uint32_t m = 1; m <= 2; m++) {
26248 GemmMicrokernelTester()
26249 .mr(2)
26250 .nr(4)
26251 .kr(8)
26252 .sr(1)
26253 .m(m)
26254 .n(n)
26255 .k(k)
26256 .cm_stride(7)
26257 .iterations(1)
26258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26259 }
26260 }
26261 }
26262 }
26263
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,a_offset)26264 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
26265 TEST_REQUIRES_X86_SSE41;
26266 for (size_t k = 1; k <= 40; k += 9) {
26267 GemmMicrokernelTester()
26268 .mr(2)
26269 .nr(4)
26270 .kr(8)
26271 .sr(1)
26272 .m(2)
26273 .n(4)
26274 .k(k)
26275 .ks(3)
26276 .a_offset(83)
26277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26278 }
26279 }
26280
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,zero)26281 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
26282 TEST_REQUIRES_X86_SSE41;
26283 for (size_t k = 1; k <= 40; k += 9) {
26284 for (uint32_t mz = 0; mz < 2; mz++) {
26285 GemmMicrokernelTester()
26286 .mr(2)
26287 .nr(4)
26288 .kr(8)
26289 .sr(1)
26290 .m(2)
26291 .n(4)
26292 .k(k)
26293 .ks(3)
26294 .a_offset(83)
26295 .zero_index(mz)
26296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26297 }
26298 }
26299 }
26300
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)26301 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
26302 TEST_REQUIRES_X86_SSE41;
26303 GemmMicrokernelTester()
26304 .mr(2)
26305 .nr(4)
26306 .kr(8)
26307 .sr(1)
26308 .m(2)
26309 .n(4)
26310 .k(8)
26311 .qmin(128)
26312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26313 }
26314
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)26315 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
26316 TEST_REQUIRES_X86_SSE41;
26317 GemmMicrokernelTester()
26318 .mr(2)
26319 .nr(4)
26320 .kr(8)
26321 .sr(1)
26322 .m(2)
26323 .n(4)
26324 .k(8)
26325 .qmax(128)
26326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26327 }
26328
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)26329 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
26330 TEST_REQUIRES_X86_SSE41;
26331 GemmMicrokernelTester()
26332 .mr(2)
26333 .nr(4)
26334 .kr(8)
26335 .sr(1)
26336 .m(2)
26337 .n(4)
26338 .k(8)
26339 .cm_stride(7)
26340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26341 }
26342
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_a_zero_point)26343 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
26344 TEST_REQUIRES_X86_SSE41;
26345 for (size_t k = 1; k <= 40; k += 9) {
26346 GemmMicrokernelTester()
26347 .mr(2)
26348 .nr(4)
26349 .kr(8)
26350 .sr(1)
26351 .m(2)
26352 .n(4)
26353 .k(k)
26354 .a_zero_point(0)
26355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26356 }
26357 }
26358
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_b_zero_point)26359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
26360 TEST_REQUIRES_X86_SSE41;
26361 for (size_t k = 1; k <= 40; k += 9) {
26362 GemmMicrokernelTester()
26363 .mr(2)
26364 .nr(4)
26365 .kr(8)
26366 .sr(1)
26367 .m(2)
26368 .n(4)
26369 .k(k)
26370 .b_zero_point(0)
26371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26372 }
26373 }
26374
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_zero_point)26375 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
26376 TEST_REQUIRES_X86_SSE41;
26377 for (size_t k = 1; k <= 40; k += 9) {
26378 GemmMicrokernelTester()
26379 .mr(2)
26380 .nr(4)
26381 .kr(8)
26382 .sr(1)
26383 .m(2)
26384 .n(4)
26385 .k(k)
26386 .a_zero_point(0)
26387 .b_zero_point(0)
26388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26389 }
26390 }
26391 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26392
26393
26394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8)26395 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
26396 TEST_REQUIRES_X86_SSE2;
26397 GemmMicrokernelTester()
26398 .mr(3)
26399 .nr(4)
26400 .kr(8)
26401 .sr(1)
26402 .m(3)
26403 .n(4)
26404 .k(8)
26405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26406 }
26407
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cn)26408 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
26409 TEST_REQUIRES_X86_SSE2;
26410 GemmMicrokernelTester()
26411 .mr(3)
26412 .nr(4)
26413 .kr(8)
26414 .sr(1)
26415 .m(3)
26416 .n(4)
26417 .k(8)
26418 .cn_stride(7)
26419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26420 }
26421
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile)26422 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
26423 TEST_REQUIRES_X86_SSE2;
26424 for (uint32_t n = 1; n <= 4; n++) {
26425 for (uint32_t m = 1; m <= 3; m++) {
26426 GemmMicrokernelTester()
26427 .mr(3)
26428 .nr(4)
26429 .kr(8)
26430 .sr(1)
26431 .m(m)
26432 .n(n)
26433 .k(8)
26434 .iterations(1)
26435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26436 }
26437 }
26438 }
26439
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_m)26440 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
26441 TEST_REQUIRES_X86_SSE2;
26442 for (uint32_t m = 1; m <= 3; m++) {
26443 GemmMicrokernelTester()
26444 .mr(3)
26445 .nr(4)
26446 .kr(8)
26447 .sr(1)
26448 .m(m)
26449 .n(4)
26450 .k(8)
26451 .iterations(1)
26452 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26453 }
26454 }
26455
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_n)26456 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
26457 TEST_REQUIRES_X86_SSE2;
26458 for (uint32_t n = 1; n <= 4; n++) {
26459 GemmMicrokernelTester()
26460 .mr(3)
26461 .nr(4)
26462 .kr(8)
26463 .sr(1)
26464 .m(3)
26465 .n(n)
26466 .k(8)
26467 .iterations(1)
26468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26469 }
26470 }
26471
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8)26472 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
26473 TEST_REQUIRES_X86_SSE2;
26474 for (size_t k = 1; k < 8; k++) {
26475 GemmMicrokernelTester()
26476 .mr(3)
26477 .nr(4)
26478 .kr(8)
26479 .sr(1)
26480 .m(3)
26481 .n(4)
26482 .k(k)
26483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26484 }
26485 }
26486
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_subtile)26487 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
26488 TEST_REQUIRES_X86_SSE2;
26489 for (size_t k = 1; k < 8; k++) {
26490 for (uint32_t n = 1; n <= 4; n++) {
26491 for (uint32_t m = 1; m <= 3; m++) {
26492 GemmMicrokernelTester()
26493 .mr(3)
26494 .nr(4)
26495 .kr(8)
26496 .sr(1)
26497 .m(m)
26498 .n(n)
26499 .k(k)
26500 .iterations(1)
26501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26502 }
26503 }
26504 }
26505 }
26506
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8)26507 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
26508 TEST_REQUIRES_X86_SSE2;
26509 for (size_t k = 9; k < 16; k++) {
26510 GemmMicrokernelTester()
26511 .mr(3)
26512 .nr(4)
26513 .kr(8)
26514 .sr(1)
26515 .m(3)
26516 .n(4)
26517 .k(k)
26518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26519 }
26520 }
26521
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_subtile)26522 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
26523 TEST_REQUIRES_X86_SSE2;
26524 for (size_t k = 9; k < 16; k++) {
26525 for (uint32_t n = 1; n <= 4; n++) {
26526 for (uint32_t m = 1; m <= 3; m++) {
26527 GemmMicrokernelTester()
26528 .mr(3)
26529 .nr(4)
26530 .kr(8)
26531 .sr(1)
26532 .m(m)
26533 .n(n)
26534 .k(k)
26535 .iterations(1)
26536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26537 }
26538 }
26539 }
26540 }
26541
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8)26542 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
26543 TEST_REQUIRES_X86_SSE2;
26544 for (size_t k = 16; k <= 80; k += 8) {
26545 GemmMicrokernelTester()
26546 .mr(3)
26547 .nr(4)
26548 .kr(8)
26549 .sr(1)
26550 .m(3)
26551 .n(4)
26552 .k(k)
26553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26554 }
26555 }
26556
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_subtile)26557 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
26558 TEST_REQUIRES_X86_SSE2;
26559 for (size_t k = 16; k <= 80; k += 8) {
26560 for (uint32_t n = 1; n <= 4; n++) {
26561 for (uint32_t m = 1; m <= 3; m++) {
26562 GemmMicrokernelTester()
26563 .mr(3)
26564 .nr(4)
26565 .kr(8)
26566 .sr(1)
26567 .m(m)
26568 .n(n)
26569 .k(k)
26570 .iterations(1)
26571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26572 }
26573 }
26574 }
26575 }
26576
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4)26577 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
26578 TEST_REQUIRES_X86_SSE2;
26579 for (uint32_t n = 5; n < 8; n++) {
26580 for (size_t k = 1; k <= 40; k += 9) {
26581 GemmMicrokernelTester()
26582 .mr(3)
26583 .nr(4)
26584 .kr(8)
26585 .sr(1)
26586 .m(3)
26587 .n(n)
26588 .k(k)
26589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26590 }
26591 }
26592 }
26593
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_cn)26594 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
26595 TEST_REQUIRES_X86_SSE2;
26596 for (uint32_t n = 5; n < 8; n++) {
26597 for (size_t k = 1; k <= 40; k += 9) {
26598 GemmMicrokernelTester()
26599 .mr(3)
26600 .nr(4)
26601 .kr(8)
26602 .sr(1)
26603 .m(3)
26604 .n(n)
26605 .k(k)
26606 .cn_stride(7)
26607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26608 }
26609 }
26610 }
26611
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_subtile)26612 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
26613 TEST_REQUIRES_X86_SSE2;
26614 for (uint32_t n = 5; n < 8; n++) {
26615 for (size_t k = 1; k <= 40; k += 9) {
26616 for (uint32_t m = 1; m <= 3; m++) {
26617 GemmMicrokernelTester()
26618 .mr(3)
26619 .nr(4)
26620 .kr(8)
26621 .sr(1)
26622 .m(m)
26623 .n(n)
26624 .k(k)
26625 .iterations(1)
26626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26627 }
26628 }
26629 }
26630 }
26631
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4)26632 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
26633 TEST_REQUIRES_X86_SSE2;
26634 for (uint32_t n = 8; n <= 12; n += 4) {
26635 for (size_t k = 1; k <= 40; k += 9) {
26636 GemmMicrokernelTester()
26637 .mr(3)
26638 .nr(4)
26639 .kr(8)
26640 .sr(1)
26641 .m(3)
26642 .n(n)
26643 .k(k)
26644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26645 }
26646 }
26647 }
26648
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_cn)26649 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
26650 TEST_REQUIRES_X86_SSE2;
26651 for (uint32_t n = 8; n <= 12; n += 4) {
26652 for (size_t k = 1; k <= 40; k += 9) {
26653 GemmMicrokernelTester()
26654 .mr(3)
26655 .nr(4)
26656 .kr(8)
26657 .sr(1)
26658 .m(3)
26659 .n(n)
26660 .k(k)
26661 .cn_stride(7)
26662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26663 }
26664 }
26665 }
26666
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_subtile)26667 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
26668 TEST_REQUIRES_X86_SSE2;
26669 for (uint32_t n = 8; n <= 12; n += 4) {
26670 for (size_t k = 1; k <= 40; k += 9) {
26671 for (uint32_t m = 1; m <= 3; m++) {
26672 GemmMicrokernelTester()
26673 .mr(3)
26674 .nr(4)
26675 .kr(8)
26676 .sr(1)
26677 .m(m)
26678 .n(n)
26679 .k(k)
26680 .iterations(1)
26681 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26682 }
26683 }
26684 }
26685 }
26686
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel)26687 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
26688 TEST_REQUIRES_X86_SSE2;
26689 for (size_t k = 1; k <= 40; k += 9) {
26690 GemmMicrokernelTester()
26691 .mr(3)
26692 .nr(4)
26693 .kr(8)
26694 .sr(1)
26695 .m(3)
26696 .n(4)
26697 .k(k)
26698 .ks(3)
26699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26700 }
26701 }
26702
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel_subtile)26703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
26704 TEST_REQUIRES_X86_SSE2;
26705 for (size_t k = 1; k <= 40; k += 9) {
26706 for (uint32_t n = 1; n <= 4; n++) {
26707 for (uint32_t m = 1; m <= 3; m++) {
26708 GemmMicrokernelTester()
26709 .mr(3)
26710 .nr(4)
26711 .kr(8)
26712 .sr(1)
26713 .m(m)
26714 .n(n)
26715 .k(k)
26716 .ks(3)
26717 .iterations(1)
26718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26719 }
26720 }
26721 }
26722 }
26723
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_small_kernel)26724 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
26725 TEST_REQUIRES_X86_SSE2;
26726 for (uint32_t n = 5; n < 8; n++) {
26727 for (size_t k = 1; k <= 40; k += 9) {
26728 GemmMicrokernelTester()
26729 .mr(3)
26730 .nr(4)
26731 .kr(8)
26732 .sr(1)
26733 .m(3)
26734 .n(n)
26735 .k(k)
26736 .ks(3)
26737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26738 }
26739 }
26740 }
26741
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_small_kernel)26742 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
26743 TEST_REQUIRES_X86_SSE2;
26744 for (uint32_t n = 8; n <= 12; n += 4) {
26745 for (size_t k = 1; k <= 40; k += 9) {
26746 GemmMicrokernelTester()
26747 .mr(3)
26748 .nr(4)
26749 .kr(8)
26750 .sr(1)
26751 .m(3)
26752 .n(n)
26753 .k(k)
26754 .ks(3)
26755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26756 }
26757 }
26758 }
26759
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm_subtile)26760 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
26761 TEST_REQUIRES_X86_SSE2;
26762 for (size_t k = 1; k <= 40; k += 9) {
26763 for (uint32_t n = 1; n <= 4; n++) {
26764 for (uint32_t m = 1; m <= 3; m++) {
26765 GemmMicrokernelTester()
26766 .mr(3)
26767 .nr(4)
26768 .kr(8)
26769 .sr(1)
26770 .m(m)
26771 .n(n)
26772 .k(k)
26773 .cm_stride(7)
26774 .iterations(1)
26775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26776 }
26777 }
26778 }
26779 }
26780
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,a_offset)26781 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
26782 TEST_REQUIRES_X86_SSE2;
26783 for (size_t k = 1; k <= 40; k += 9) {
26784 GemmMicrokernelTester()
26785 .mr(3)
26786 .nr(4)
26787 .kr(8)
26788 .sr(1)
26789 .m(3)
26790 .n(4)
26791 .k(k)
26792 .ks(3)
26793 .a_offset(127)
26794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26795 }
26796 }
26797
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,zero)26798 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
26799 TEST_REQUIRES_X86_SSE2;
26800 for (size_t k = 1; k <= 40; k += 9) {
26801 for (uint32_t mz = 0; mz < 3; mz++) {
26802 GemmMicrokernelTester()
26803 .mr(3)
26804 .nr(4)
26805 .kr(8)
26806 .sr(1)
26807 .m(3)
26808 .n(4)
26809 .k(k)
26810 .ks(3)
26811 .a_offset(127)
26812 .zero_index(mz)
26813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26814 }
26815 }
26816 }
26817
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmin)26818 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
26819 TEST_REQUIRES_X86_SSE2;
26820 GemmMicrokernelTester()
26821 .mr(3)
26822 .nr(4)
26823 .kr(8)
26824 .sr(1)
26825 .m(3)
26826 .n(4)
26827 .k(8)
26828 .qmin(128)
26829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26830 }
26831
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmax)26832 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
26833 TEST_REQUIRES_X86_SSE2;
26834 GemmMicrokernelTester()
26835 .mr(3)
26836 .nr(4)
26837 .kr(8)
26838 .sr(1)
26839 .m(3)
26840 .n(4)
26841 .k(8)
26842 .qmax(128)
26843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26844 }
26845
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm)26846 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
26847 TEST_REQUIRES_X86_SSE2;
26848 GemmMicrokernelTester()
26849 .mr(3)
26850 .nr(4)
26851 .kr(8)
26852 .sr(1)
26853 .m(3)
26854 .n(4)
26855 .k(8)
26856 .cm_stride(7)
26857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26858 }
26859
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_a_zero_point)26860 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_a_zero_point) {
26861 TEST_REQUIRES_X86_SSE2;
26862 for (size_t k = 1; k <= 40; k += 9) {
26863 GemmMicrokernelTester()
26864 .mr(3)
26865 .nr(4)
26866 .kr(8)
26867 .sr(1)
26868 .m(3)
26869 .n(4)
26870 .k(k)
26871 .a_zero_point(0)
26872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26873 }
26874 }
26875
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_b_zero_point)26876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_b_zero_point) {
26877 TEST_REQUIRES_X86_SSE2;
26878 for (size_t k = 1; k <= 40; k += 9) {
26879 GemmMicrokernelTester()
26880 .mr(3)
26881 .nr(4)
26882 .kr(8)
26883 .sr(1)
26884 .m(3)
26885 .n(4)
26886 .k(k)
26887 .b_zero_point(0)
26888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26889 }
26890 }
26891
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_zero_point)26892 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_zero_point) {
26893 TEST_REQUIRES_X86_SSE2;
26894 for (size_t k = 1; k <= 40; k += 9) {
26895 GemmMicrokernelTester()
26896 .mr(3)
26897 .nr(4)
26898 .kr(8)
26899 .sr(1)
26900 .m(3)
26901 .n(4)
26902 .k(k)
26903 .a_zero_point(0)
26904 .b_zero_point(0)
26905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26906 }
26907 }
26908 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26909
26910
26911 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)26912 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
26913 TEST_REQUIRES_X86_AVX;
26914 GemmMicrokernelTester()
26915 .mr(1)
26916 .nr(4)
26917 .kr(8)
26918 .sr(1)
26919 .m(1)
26920 .n(4)
26921 .k(8)
26922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26923 }
26924
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)26925 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
26926 TEST_REQUIRES_X86_AVX;
26927 GemmMicrokernelTester()
26928 .mr(1)
26929 .nr(4)
26930 .kr(8)
26931 .sr(1)
26932 .m(1)
26933 .n(4)
26934 .k(8)
26935 .cn_stride(7)
26936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26937 }
26938
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)26939 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
26940 TEST_REQUIRES_X86_AVX;
26941 for (uint32_t n = 1; n <= 4; n++) {
26942 for (uint32_t m = 1; m <= 1; m++) {
26943 GemmMicrokernelTester()
26944 .mr(1)
26945 .nr(4)
26946 .kr(8)
26947 .sr(1)
26948 .m(m)
26949 .n(n)
26950 .k(8)
26951 .iterations(1)
26952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26953 }
26954 }
26955 }
26956
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)26957 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
26958 TEST_REQUIRES_X86_AVX;
26959 for (uint32_t m = 1; m <= 1; m++) {
26960 GemmMicrokernelTester()
26961 .mr(1)
26962 .nr(4)
26963 .kr(8)
26964 .sr(1)
26965 .m(m)
26966 .n(4)
26967 .k(8)
26968 .iterations(1)
26969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26970 }
26971 }
26972
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)26973 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
26974 TEST_REQUIRES_X86_AVX;
26975 for (uint32_t n = 1; n <= 4; n++) {
26976 GemmMicrokernelTester()
26977 .mr(1)
26978 .nr(4)
26979 .kr(8)
26980 .sr(1)
26981 .m(1)
26982 .n(n)
26983 .k(8)
26984 .iterations(1)
26985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26986 }
26987 }
26988
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)26989 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
26990 TEST_REQUIRES_X86_AVX;
26991 for (size_t k = 1; k < 8; k++) {
26992 GemmMicrokernelTester()
26993 .mr(1)
26994 .nr(4)
26995 .kr(8)
26996 .sr(1)
26997 .m(1)
26998 .n(4)
26999 .k(k)
27000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27001 }
27002 }
27003
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)27004 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
27005 TEST_REQUIRES_X86_AVX;
27006 for (size_t k = 1; k < 8; k++) {
27007 for (uint32_t n = 1; n <= 4; n++) {
27008 for (uint32_t m = 1; m <= 1; m++) {
27009 GemmMicrokernelTester()
27010 .mr(1)
27011 .nr(4)
27012 .kr(8)
27013 .sr(1)
27014 .m(m)
27015 .n(n)
27016 .k(k)
27017 .iterations(1)
27018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27019 }
27020 }
27021 }
27022 }
27023
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)27024 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
27025 TEST_REQUIRES_X86_AVX;
27026 for (size_t k = 9; k < 16; k++) {
27027 GemmMicrokernelTester()
27028 .mr(1)
27029 .nr(4)
27030 .kr(8)
27031 .sr(1)
27032 .m(1)
27033 .n(4)
27034 .k(k)
27035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27036 }
27037 }
27038
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)27039 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
27040 TEST_REQUIRES_X86_AVX;
27041 for (size_t k = 9; k < 16; k++) {
27042 for (uint32_t n = 1; n <= 4; n++) {
27043 for (uint32_t m = 1; m <= 1; m++) {
27044 GemmMicrokernelTester()
27045 .mr(1)
27046 .nr(4)
27047 .kr(8)
27048 .sr(1)
27049 .m(m)
27050 .n(n)
27051 .k(k)
27052 .iterations(1)
27053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27054 }
27055 }
27056 }
27057 }
27058
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)27059 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
27060 TEST_REQUIRES_X86_AVX;
27061 for (size_t k = 16; k <= 80; k += 8) {
27062 GemmMicrokernelTester()
27063 .mr(1)
27064 .nr(4)
27065 .kr(8)
27066 .sr(1)
27067 .m(1)
27068 .n(4)
27069 .k(k)
27070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27071 }
27072 }
27073
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)27074 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
27075 TEST_REQUIRES_X86_AVX;
27076 for (size_t k = 16; k <= 80; k += 8) {
27077 for (uint32_t n = 1; n <= 4; n++) {
27078 for (uint32_t m = 1; m <= 1; m++) {
27079 GemmMicrokernelTester()
27080 .mr(1)
27081 .nr(4)
27082 .kr(8)
27083 .sr(1)
27084 .m(m)
27085 .n(n)
27086 .k(k)
27087 .iterations(1)
27088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27089 }
27090 }
27091 }
27092 }
27093
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)27094 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
27095 TEST_REQUIRES_X86_AVX;
27096 for (uint32_t n = 5; n < 8; n++) {
27097 for (size_t k = 1; k <= 40; k += 9) {
27098 GemmMicrokernelTester()
27099 .mr(1)
27100 .nr(4)
27101 .kr(8)
27102 .sr(1)
27103 .m(1)
27104 .n(n)
27105 .k(k)
27106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27107 }
27108 }
27109 }
27110
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)27111 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
27112 TEST_REQUIRES_X86_AVX;
27113 for (uint32_t n = 5; n < 8; n++) {
27114 for (size_t k = 1; k <= 40; k += 9) {
27115 GemmMicrokernelTester()
27116 .mr(1)
27117 .nr(4)
27118 .kr(8)
27119 .sr(1)
27120 .m(1)
27121 .n(n)
27122 .k(k)
27123 .cn_stride(7)
27124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27125 }
27126 }
27127 }
27128
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)27129 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
27130 TEST_REQUIRES_X86_AVX;
27131 for (uint32_t n = 5; n < 8; n++) {
27132 for (size_t k = 1; k <= 40; k += 9) {
27133 for (uint32_t m = 1; m <= 1; m++) {
27134 GemmMicrokernelTester()
27135 .mr(1)
27136 .nr(4)
27137 .kr(8)
27138 .sr(1)
27139 .m(m)
27140 .n(n)
27141 .k(k)
27142 .iterations(1)
27143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27144 }
27145 }
27146 }
27147 }
27148
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)27149 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
27150 TEST_REQUIRES_X86_AVX;
27151 for (uint32_t n = 8; n <= 12; n += 4) {
27152 for (size_t k = 1; k <= 40; k += 9) {
27153 GemmMicrokernelTester()
27154 .mr(1)
27155 .nr(4)
27156 .kr(8)
27157 .sr(1)
27158 .m(1)
27159 .n(n)
27160 .k(k)
27161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27162 }
27163 }
27164 }
27165
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)27166 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
27167 TEST_REQUIRES_X86_AVX;
27168 for (uint32_t n = 8; n <= 12; n += 4) {
27169 for (size_t k = 1; k <= 40; k += 9) {
27170 GemmMicrokernelTester()
27171 .mr(1)
27172 .nr(4)
27173 .kr(8)
27174 .sr(1)
27175 .m(1)
27176 .n(n)
27177 .k(k)
27178 .cn_stride(7)
27179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27180 }
27181 }
27182 }
27183
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)27184 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
27185 TEST_REQUIRES_X86_AVX;
27186 for (uint32_t n = 8; n <= 12; n += 4) {
27187 for (size_t k = 1; k <= 40; k += 9) {
27188 for (uint32_t m = 1; m <= 1; m++) {
27189 GemmMicrokernelTester()
27190 .mr(1)
27191 .nr(4)
27192 .kr(8)
27193 .sr(1)
27194 .m(m)
27195 .n(n)
27196 .k(k)
27197 .iterations(1)
27198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27199 }
27200 }
27201 }
27202 }
27203
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel)27204 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
27205 TEST_REQUIRES_X86_AVX;
27206 for (size_t k = 1; k <= 40; k += 9) {
27207 GemmMicrokernelTester()
27208 .mr(1)
27209 .nr(4)
27210 .kr(8)
27211 .sr(1)
27212 .m(1)
27213 .n(4)
27214 .k(k)
27215 .ks(3)
27216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27217 }
27218 }
27219
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel_subtile)27220 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
27221 TEST_REQUIRES_X86_AVX;
27222 for (size_t k = 1; k <= 40; k += 9) {
27223 for (uint32_t n = 1; n <= 4; n++) {
27224 for (uint32_t m = 1; m <= 1; m++) {
27225 GemmMicrokernelTester()
27226 .mr(1)
27227 .nr(4)
27228 .kr(8)
27229 .sr(1)
27230 .m(m)
27231 .n(n)
27232 .k(k)
27233 .ks(3)
27234 .iterations(1)
27235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27236 }
27237 }
27238 }
27239 }
27240
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_small_kernel)27241 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
27242 TEST_REQUIRES_X86_AVX;
27243 for (uint32_t n = 5; n < 8; n++) {
27244 for (size_t k = 1; k <= 40; k += 9) {
27245 GemmMicrokernelTester()
27246 .mr(1)
27247 .nr(4)
27248 .kr(8)
27249 .sr(1)
27250 .m(1)
27251 .n(n)
27252 .k(k)
27253 .ks(3)
27254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27255 }
27256 }
27257 }
27258
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_small_kernel)27259 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
27260 TEST_REQUIRES_X86_AVX;
27261 for (uint32_t n = 8; n <= 12; n += 4) {
27262 for (size_t k = 1; k <= 40; k += 9) {
27263 GemmMicrokernelTester()
27264 .mr(1)
27265 .nr(4)
27266 .kr(8)
27267 .sr(1)
27268 .m(1)
27269 .n(n)
27270 .k(k)
27271 .ks(3)
27272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27273 }
27274 }
27275 }
27276
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)27277 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
27278 TEST_REQUIRES_X86_AVX;
27279 for (size_t k = 1; k <= 40; k += 9) {
27280 for (uint32_t n = 1; n <= 4; n++) {
27281 for (uint32_t m = 1; m <= 1; m++) {
27282 GemmMicrokernelTester()
27283 .mr(1)
27284 .nr(4)
27285 .kr(8)
27286 .sr(1)
27287 .m(m)
27288 .n(n)
27289 .k(k)
27290 .cm_stride(7)
27291 .iterations(1)
27292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27293 }
27294 }
27295 }
27296 }
27297
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,a_offset)27298 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
27299 TEST_REQUIRES_X86_AVX;
27300 for (size_t k = 1; k <= 40; k += 9) {
27301 GemmMicrokernelTester()
27302 .mr(1)
27303 .nr(4)
27304 .kr(8)
27305 .sr(1)
27306 .m(1)
27307 .n(4)
27308 .k(k)
27309 .ks(3)
27310 .a_offset(43)
27311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27312 }
27313 }
27314
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,zero)27315 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
27316 TEST_REQUIRES_X86_AVX;
27317 for (size_t k = 1; k <= 40; k += 9) {
27318 for (uint32_t mz = 0; mz < 1; mz++) {
27319 GemmMicrokernelTester()
27320 .mr(1)
27321 .nr(4)
27322 .kr(8)
27323 .sr(1)
27324 .m(1)
27325 .n(4)
27326 .k(k)
27327 .ks(3)
27328 .a_offset(43)
27329 .zero_index(mz)
27330 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27331 }
27332 }
27333 }
27334
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)27335 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
27336 TEST_REQUIRES_X86_AVX;
27337 GemmMicrokernelTester()
27338 .mr(1)
27339 .nr(4)
27340 .kr(8)
27341 .sr(1)
27342 .m(1)
27343 .n(4)
27344 .k(8)
27345 .qmin(128)
27346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27347 }
27348
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)27349 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
27350 TEST_REQUIRES_X86_AVX;
27351 GemmMicrokernelTester()
27352 .mr(1)
27353 .nr(4)
27354 .kr(8)
27355 .sr(1)
27356 .m(1)
27357 .n(4)
27358 .k(8)
27359 .qmax(128)
27360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27361 }
27362
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)27363 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
27364 TEST_REQUIRES_X86_AVX;
27365 GemmMicrokernelTester()
27366 .mr(1)
27367 .nr(4)
27368 .kr(8)
27369 .sr(1)
27370 .m(1)
27371 .n(4)
27372 .k(8)
27373 .cm_stride(7)
27374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27375 }
27376
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_a_zero_point)27377 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
27378 TEST_REQUIRES_X86_AVX;
27379 for (size_t k = 1; k <= 40; k += 9) {
27380 GemmMicrokernelTester()
27381 .mr(1)
27382 .nr(4)
27383 .kr(8)
27384 .sr(1)
27385 .m(1)
27386 .n(4)
27387 .k(k)
27388 .a_zero_point(0)
27389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27390 }
27391 }
27392
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_b_zero_point)27393 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
27394 TEST_REQUIRES_X86_AVX;
27395 for (size_t k = 1; k <= 40; k += 9) {
27396 GemmMicrokernelTester()
27397 .mr(1)
27398 .nr(4)
27399 .kr(8)
27400 .sr(1)
27401 .m(1)
27402 .n(4)
27403 .k(k)
27404 .b_zero_point(0)
27405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27406 }
27407 }
27408
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_zero_point)27409 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
27410 TEST_REQUIRES_X86_AVX;
27411 for (size_t k = 1; k <= 40; k += 9) {
27412 GemmMicrokernelTester()
27413 .mr(1)
27414 .nr(4)
27415 .kr(8)
27416 .sr(1)
27417 .m(1)
27418 .n(4)
27419 .k(k)
27420 .a_zero_point(0)
27421 .b_zero_point(0)
27422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27423 }
27424 }
27425 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27426
27427
27428 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)27429 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
27430 TEST_REQUIRES_X86_AVX;
27431 GemmMicrokernelTester()
27432 .mr(2)
27433 .nr(4)
27434 .kr(8)
27435 .sr(1)
27436 .m(2)
27437 .n(4)
27438 .k(8)
27439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27440 }
27441
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)27442 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
27443 TEST_REQUIRES_X86_AVX;
27444 GemmMicrokernelTester()
27445 .mr(2)
27446 .nr(4)
27447 .kr(8)
27448 .sr(1)
27449 .m(2)
27450 .n(4)
27451 .k(8)
27452 .cn_stride(7)
27453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27454 }
27455
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)27456 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
27457 TEST_REQUIRES_X86_AVX;
27458 for (uint32_t n = 1; n <= 4; n++) {
27459 for (uint32_t m = 1; m <= 2; m++) {
27460 GemmMicrokernelTester()
27461 .mr(2)
27462 .nr(4)
27463 .kr(8)
27464 .sr(1)
27465 .m(m)
27466 .n(n)
27467 .k(8)
27468 .iterations(1)
27469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27470 }
27471 }
27472 }
27473
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)27474 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
27475 TEST_REQUIRES_X86_AVX;
27476 for (uint32_t m = 1; m <= 2; m++) {
27477 GemmMicrokernelTester()
27478 .mr(2)
27479 .nr(4)
27480 .kr(8)
27481 .sr(1)
27482 .m(m)
27483 .n(4)
27484 .k(8)
27485 .iterations(1)
27486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27487 }
27488 }
27489
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)27490 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
27491 TEST_REQUIRES_X86_AVX;
27492 for (uint32_t n = 1; n <= 4; n++) {
27493 GemmMicrokernelTester()
27494 .mr(2)
27495 .nr(4)
27496 .kr(8)
27497 .sr(1)
27498 .m(2)
27499 .n(n)
27500 .k(8)
27501 .iterations(1)
27502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27503 }
27504 }
27505
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)27506 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
27507 TEST_REQUIRES_X86_AVX;
27508 for (size_t k = 1; k < 8; k++) {
27509 GemmMicrokernelTester()
27510 .mr(2)
27511 .nr(4)
27512 .kr(8)
27513 .sr(1)
27514 .m(2)
27515 .n(4)
27516 .k(k)
27517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27518 }
27519 }
27520
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)27521 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
27522 TEST_REQUIRES_X86_AVX;
27523 for (size_t k = 1; k < 8; k++) {
27524 for (uint32_t n = 1; n <= 4; n++) {
27525 for (uint32_t m = 1; m <= 2; m++) {
27526 GemmMicrokernelTester()
27527 .mr(2)
27528 .nr(4)
27529 .kr(8)
27530 .sr(1)
27531 .m(m)
27532 .n(n)
27533 .k(k)
27534 .iterations(1)
27535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27536 }
27537 }
27538 }
27539 }
27540
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)27541 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
27542 TEST_REQUIRES_X86_AVX;
27543 for (size_t k = 9; k < 16; k++) {
27544 GemmMicrokernelTester()
27545 .mr(2)
27546 .nr(4)
27547 .kr(8)
27548 .sr(1)
27549 .m(2)
27550 .n(4)
27551 .k(k)
27552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27553 }
27554 }
27555
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)27556 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
27557 TEST_REQUIRES_X86_AVX;
27558 for (size_t k = 9; k < 16; k++) {
27559 for (uint32_t n = 1; n <= 4; n++) {
27560 for (uint32_t m = 1; m <= 2; m++) {
27561 GemmMicrokernelTester()
27562 .mr(2)
27563 .nr(4)
27564 .kr(8)
27565 .sr(1)
27566 .m(m)
27567 .n(n)
27568 .k(k)
27569 .iterations(1)
27570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27571 }
27572 }
27573 }
27574 }
27575
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)27576 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
27577 TEST_REQUIRES_X86_AVX;
27578 for (size_t k = 16; k <= 80; k += 8) {
27579 GemmMicrokernelTester()
27580 .mr(2)
27581 .nr(4)
27582 .kr(8)
27583 .sr(1)
27584 .m(2)
27585 .n(4)
27586 .k(k)
27587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27588 }
27589 }
27590
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)27591 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
27592 TEST_REQUIRES_X86_AVX;
27593 for (size_t k = 16; k <= 80; k += 8) {
27594 for (uint32_t n = 1; n <= 4; n++) {
27595 for (uint32_t m = 1; m <= 2; m++) {
27596 GemmMicrokernelTester()
27597 .mr(2)
27598 .nr(4)
27599 .kr(8)
27600 .sr(1)
27601 .m(m)
27602 .n(n)
27603 .k(k)
27604 .iterations(1)
27605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27606 }
27607 }
27608 }
27609 }
27610
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)27611 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
27612 TEST_REQUIRES_X86_AVX;
27613 for (uint32_t n = 5; n < 8; n++) {
27614 for (size_t k = 1; k <= 40; k += 9) {
27615 GemmMicrokernelTester()
27616 .mr(2)
27617 .nr(4)
27618 .kr(8)
27619 .sr(1)
27620 .m(2)
27621 .n(n)
27622 .k(k)
27623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27624 }
27625 }
27626 }
27627
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)27628 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
27629 TEST_REQUIRES_X86_AVX;
27630 for (uint32_t n = 5; n < 8; n++) {
27631 for (size_t k = 1; k <= 40; k += 9) {
27632 GemmMicrokernelTester()
27633 .mr(2)
27634 .nr(4)
27635 .kr(8)
27636 .sr(1)
27637 .m(2)
27638 .n(n)
27639 .k(k)
27640 .cn_stride(7)
27641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27642 }
27643 }
27644 }
27645
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)27646 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
27647 TEST_REQUIRES_X86_AVX;
27648 for (uint32_t n = 5; n < 8; n++) {
27649 for (size_t k = 1; k <= 40; k += 9) {
27650 for (uint32_t m = 1; m <= 2; m++) {
27651 GemmMicrokernelTester()
27652 .mr(2)
27653 .nr(4)
27654 .kr(8)
27655 .sr(1)
27656 .m(m)
27657 .n(n)
27658 .k(k)
27659 .iterations(1)
27660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27661 }
27662 }
27663 }
27664 }
27665
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)27666 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
27667 TEST_REQUIRES_X86_AVX;
27668 for (uint32_t n = 8; n <= 12; n += 4) {
27669 for (size_t k = 1; k <= 40; k += 9) {
27670 GemmMicrokernelTester()
27671 .mr(2)
27672 .nr(4)
27673 .kr(8)
27674 .sr(1)
27675 .m(2)
27676 .n(n)
27677 .k(k)
27678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27679 }
27680 }
27681 }
27682
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)27683 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
27684 TEST_REQUIRES_X86_AVX;
27685 for (uint32_t n = 8; n <= 12; n += 4) {
27686 for (size_t k = 1; k <= 40; k += 9) {
27687 GemmMicrokernelTester()
27688 .mr(2)
27689 .nr(4)
27690 .kr(8)
27691 .sr(1)
27692 .m(2)
27693 .n(n)
27694 .k(k)
27695 .cn_stride(7)
27696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27697 }
27698 }
27699 }
27700
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)27701 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
27702 TEST_REQUIRES_X86_AVX;
27703 for (uint32_t n = 8; n <= 12; n += 4) {
27704 for (size_t k = 1; k <= 40; k += 9) {
27705 for (uint32_t m = 1; m <= 2; m++) {
27706 GemmMicrokernelTester()
27707 .mr(2)
27708 .nr(4)
27709 .kr(8)
27710 .sr(1)
27711 .m(m)
27712 .n(n)
27713 .k(k)
27714 .iterations(1)
27715 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27716 }
27717 }
27718 }
27719 }
27720
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel)27721 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
27722 TEST_REQUIRES_X86_AVX;
27723 for (size_t k = 1; k <= 40; k += 9) {
27724 GemmMicrokernelTester()
27725 .mr(2)
27726 .nr(4)
27727 .kr(8)
27728 .sr(1)
27729 .m(2)
27730 .n(4)
27731 .k(k)
27732 .ks(3)
27733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27734 }
27735 }
27736
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel_subtile)27737 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
27738 TEST_REQUIRES_X86_AVX;
27739 for (size_t k = 1; k <= 40; k += 9) {
27740 for (uint32_t n = 1; n <= 4; n++) {
27741 for (uint32_t m = 1; m <= 2; m++) {
27742 GemmMicrokernelTester()
27743 .mr(2)
27744 .nr(4)
27745 .kr(8)
27746 .sr(1)
27747 .m(m)
27748 .n(n)
27749 .k(k)
27750 .ks(3)
27751 .iterations(1)
27752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27753 }
27754 }
27755 }
27756 }
27757
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_small_kernel)27758 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
27759 TEST_REQUIRES_X86_AVX;
27760 for (uint32_t n = 5; n < 8; n++) {
27761 for (size_t k = 1; k <= 40; k += 9) {
27762 GemmMicrokernelTester()
27763 .mr(2)
27764 .nr(4)
27765 .kr(8)
27766 .sr(1)
27767 .m(2)
27768 .n(n)
27769 .k(k)
27770 .ks(3)
27771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27772 }
27773 }
27774 }
27775
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_small_kernel)27776 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
27777 TEST_REQUIRES_X86_AVX;
27778 for (uint32_t n = 8; n <= 12; n += 4) {
27779 for (size_t k = 1; k <= 40; k += 9) {
27780 GemmMicrokernelTester()
27781 .mr(2)
27782 .nr(4)
27783 .kr(8)
27784 .sr(1)
27785 .m(2)
27786 .n(n)
27787 .k(k)
27788 .ks(3)
27789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27790 }
27791 }
27792 }
27793
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)27794 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
27795 TEST_REQUIRES_X86_AVX;
27796 for (size_t k = 1; k <= 40; k += 9) {
27797 for (uint32_t n = 1; n <= 4; n++) {
27798 for (uint32_t m = 1; m <= 2; m++) {
27799 GemmMicrokernelTester()
27800 .mr(2)
27801 .nr(4)
27802 .kr(8)
27803 .sr(1)
27804 .m(m)
27805 .n(n)
27806 .k(k)
27807 .cm_stride(7)
27808 .iterations(1)
27809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27810 }
27811 }
27812 }
27813 }
27814
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,a_offset)27815 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
27816 TEST_REQUIRES_X86_AVX;
27817 for (size_t k = 1; k <= 40; k += 9) {
27818 GemmMicrokernelTester()
27819 .mr(2)
27820 .nr(4)
27821 .kr(8)
27822 .sr(1)
27823 .m(2)
27824 .n(4)
27825 .k(k)
27826 .ks(3)
27827 .a_offset(83)
27828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27829 }
27830 }
27831
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,zero)27832 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
27833 TEST_REQUIRES_X86_AVX;
27834 for (size_t k = 1; k <= 40; k += 9) {
27835 for (uint32_t mz = 0; mz < 2; mz++) {
27836 GemmMicrokernelTester()
27837 .mr(2)
27838 .nr(4)
27839 .kr(8)
27840 .sr(1)
27841 .m(2)
27842 .n(4)
27843 .k(k)
27844 .ks(3)
27845 .a_offset(83)
27846 .zero_index(mz)
27847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27848 }
27849 }
27850 }
27851
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)27852 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
27853 TEST_REQUIRES_X86_AVX;
27854 GemmMicrokernelTester()
27855 .mr(2)
27856 .nr(4)
27857 .kr(8)
27858 .sr(1)
27859 .m(2)
27860 .n(4)
27861 .k(8)
27862 .qmin(128)
27863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27864 }
27865
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)27866 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
27867 TEST_REQUIRES_X86_AVX;
27868 GemmMicrokernelTester()
27869 .mr(2)
27870 .nr(4)
27871 .kr(8)
27872 .sr(1)
27873 .m(2)
27874 .n(4)
27875 .k(8)
27876 .qmax(128)
27877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27878 }
27879
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)27880 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
27881 TEST_REQUIRES_X86_AVX;
27882 GemmMicrokernelTester()
27883 .mr(2)
27884 .nr(4)
27885 .kr(8)
27886 .sr(1)
27887 .m(2)
27888 .n(4)
27889 .k(8)
27890 .cm_stride(7)
27891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27892 }
27893
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_a_zero_point)27894 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
27895 TEST_REQUIRES_X86_AVX;
27896 for (size_t k = 1; k <= 40; k += 9) {
27897 GemmMicrokernelTester()
27898 .mr(2)
27899 .nr(4)
27900 .kr(8)
27901 .sr(1)
27902 .m(2)
27903 .n(4)
27904 .k(k)
27905 .a_zero_point(0)
27906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27907 }
27908 }
27909
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_b_zero_point)27910 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
27911 TEST_REQUIRES_X86_AVX;
27912 for (size_t k = 1; k <= 40; k += 9) {
27913 GemmMicrokernelTester()
27914 .mr(2)
27915 .nr(4)
27916 .kr(8)
27917 .sr(1)
27918 .m(2)
27919 .n(4)
27920 .k(k)
27921 .b_zero_point(0)
27922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27923 }
27924 }
27925
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_zero_point)27926 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
27927 TEST_REQUIRES_X86_AVX;
27928 for (size_t k = 1; k <= 40; k += 9) {
27929 GemmMicrokernelTester()
27930 .mr(2)
27931 .nr(4)
27932 .kr(8)
27933 .sr(1)
27934 .m(2)
27935 .n(4)
27936 .k(k)
27937 .a_zero_point(0)
27938 .b_zero_point(0)
27939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27940 }
27941 }
27942 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27943
27944
27945 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)27946 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
27947 TEST_REQUIRES_X86_XOP;
27948 GemmMicrokernelTester()
27949 .mr(3)
27950 .nr(4)
27951 .kr(8)
27952 .sr(1)
27953 .m(3)
27954 .n(4)
27955 .k(8)
27956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27957 }
27958
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)27959 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
27960 TEST_REQUIRES_X86_XOP;
27961 GemmMicrokernelTester()
27962 .mr(3)
27963 .nr(4)
27964 .kr(8)
27965 .sr(1)
27966 .m(3)
27967 .n(4)
27968 .k(8)
27969 .cn_stride(7)
27970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27971 }
27972
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)27973 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
27974 TEST_REQUIRES_X86_XOP;
27975 for (uint32_t n = 1; n <= 4; n++) {
27976 for (uint32_t m = 1; m <= 3; m++) {
27977 GemmMicrokernelTester()
27978 .mr(3)
27979 .nr(4)
27980 .kr(8)
27981 .sr(1)
27982 .m(m)
27983 .n(n)
27984 .k(8)
27985 .iterations(1)
27986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27987 }
27988 }
27989 }
27990
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)27991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
27992 TEST_REQUIRES_X86_XOP;
27993 for (uint32_t m = 1; m <= 3; m++) {
27994 GemmMicrokernelTester()
27995 .mr(3)
27996 .nr(4)
27997 .kr(8)
27998 .sr(1)
27999 .m(m)
28000 .n(4)
28001 .k(8)
28002 .iterations(1)
28003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28004 }
28005 }
28006
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)28007 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
28008 TEST_REQUIRES_X86_XOP;
28009 for (uint32_t n = 1; n <= 4; n++) {
28010 GemmMicrokernelTester()
28011 .mr(3)
28012 .nr(4)
28013 .kr(8)
28014 .sr(1)
28015 .m(3)
28016 .n(n)
28017 .k(8)
28018 .iterations(1)
28019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28020 }
28021 }
28022
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)28023 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
28024 TEST_REQUIRES_X86_XOP;
28025 for (size_t k = 1; k < 8; k++) {
28026 GemmMicrokernelTester()
28027 .mr(3)
28028 .nr(4)
28029 .kr(8)
28030 .sr(1)
28031 .m(3)
28032 .n(4)
28033 .k(k)
28034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28035 }
28036 }
28037
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)28038 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
28039 TEST_REQUIRES_X86_XOP;
28040 for (size_t k = 1; k < 8; k++) {
28041 for (uint32_t n = 1; n <= 4; n++) {
28042 for (uint32_t m = 1; m <= 3; m++) {
28043 GemmMicrokernelTester()
28044 .mr(3)
28045 .nr(4)
28046 .kr(8)
28047 .sr(1)
28048 .m(m)
28049 .n(n)
28050 .k(k)
28051 .iterations(1)
28052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28053 }
28054 }
28055 }
28056 }
28057
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)28058 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
28059 TEST_REQUIRES_X86_XOP;
28060 for (size_t k = 9; k < 16; k++) {
28061 GemmMicrokernelTester()
28062 .mr(3)
28063 .nr(4)
28064 .kr(8)
28065 .sr(1)
28066 .m(3)
28067 .n(4)
28068 .k(k)
28069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28070 }
28071 }
28072
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)28073 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
28074 TEST_REQUIRES_X86_XOP;
28075 for (size_t k = 9; k < 16; k++) {
28076 for (uint32_t n = 1; n <= 4; n++) {
28077 for (uint32_t m = 1; m <= 3; m++) {
28078 GemmMicrokernelTester()
28079 .mr(3)
28080 .nr(4)
28081 .kr(8)
28082 .sr(1)
28083 .m(m)
28084 .n(n)
28085 .k(k)
28086 .iterations(1)
28087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28088 }
28089 }
28090 }
28091 }
28092
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)28093 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
28094 TEST_REQUIRES_X86_XOP;
28095 for (size_t k = 16; k <= 80; k += 8) {
28096 GemmMicrokernelTester()
28097 .mr(3)
28098 .nr(4)
28099 .kr(8)
28100 .sr(1)
28101 .m(3)
28102 .n(4)
28103 .k(k)
28104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28105 }
28106 }
28107
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)28108 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
28109 TEST_REQUIRES_X86_XOP;
28110 for (size_t k = 16; k <= 80; k += 8) {
28111 for (uint32_t n = 1; n <= 4; n++) {
28112 for (uint32_t m = 1; m <= 3; m++) {
28113 GemmMicrokernelTester()
28114 .mr(3)
28115 .nr(4)
28116 .kr(8)
28117 .sr(1)
28118 .m(m)
28119 .n(n)
28120 .k(k)
28121 .iterations(1)
28122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28123 }
28124 }
28125 }
28126 }
28127
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)28128 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
28129 TEST_REQUIRES_X86_XOP;
28130 for (uint32_t n = 5; n < 8; n++) {
28131 for (size_t k = 1; k <= 40; k += 9) {
28132 GemmMicrokernelTester()
28133 .mr(3)
28134 .nr(4)
28135 .kr(8)
28136 .sr(1)
28137 .m(3)
28138 .n(n)
28139 .k(k)
28140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28141 }
28142 }
28143 }
28144
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)28145 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
28146 TEST_REQUIRES_X86_XOP;
28147 for (uint32_t n = 5; n < 8; n++) {
28148 for (size_t k = 1; k <= 40; k += 9) {
28149 GemmMicrokernelTester()
28150 .mr(3)
28151 .nr(4)
28152 .kr(8)
28153 .sr(1)
28154 .m(3)
28155 .n(n)
28156 .k(k)
28157 .cn_stride(7)
28158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28159 }
28160 }
28161 }
28162
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)28163 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
28164 TEST_REQUIRES_X86_XOP;
28165 for (uint32_t n = 5; n < 8; n++) {
28166 for (size_t k = 1; k <= 40; k += 9) {
28167 for (uint32_t m = 1; m <= 3; m++) {
28168 GemmMicrokernelTester()
28169 .mr(3)
28170 .nr(4)
28171 .kr(8)
28172 .sr(1)
28173 .m(m)
28174 .n(n)
28175 .k(k)
28176 .iterations(1)
28177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28178 }
28179 }
28180 }
28181 }
28182
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)28183 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
28184 TEST_REQUIRES_X86_XOP;
28185 for (uint32_t n = 8; n <= 12; n += 4) {
28186 for (size_t k = 1; k <= 40; k += 9) {
28187 GemmMicrokernelTester()
28188 .mr(3)
28189 .nr(4)
28190 .kr(8)
28191 .sr(1)
28192 .m(3)
28193 .n(n)
28194 .k(k)
28195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28196 }
28197 }
28198 }
28199
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)28200 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
28201 TEST_REQUIRES_X86_XOP;
28202 for (uint32_t n = 8; n <= 12; n += 4) {
28203 for (size_t k = 1; k <= 40; k += 9) {
28204 GemmMicrokernelTester()
28205 .mr(3)
28206 .nr(4)
28207 .kr(8)
28208 .sr(1)
28209 .m(3)
28210 .n(n)
28211 .k(k)
28212 .cn_stride(7)
28213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28214 }
28215 }
28216 }
28217
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)28218 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
28219 TEST_REQUIRES_X86_XOP;
28220 for (uint32_t n = 8; n <= 12; n += 4) {
28221 for (size_t k = 1; k <= 40; k += 9) {
28222 for (uint32_t m = 1; m <= 3; m++) {
28223 GemmMicrokernelTester()
28224 .mr(3)
28225 .nr(4)
28226 .kr(8)
28227 .sr(1)
28228 .m(m)
28229 .n(n)
28230 .k(k)
28231 .iterations(1)
28232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28233 }
28234 }
28235 }
28236 }
28237
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel)28238 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
28239 TEST_REQUIRES_X86_XOP;
28240 for (size_t k = 1; k <= 40; k += 9) {
28241 GemmMicrokernelTester()
28242 .mr(3)
28243 .nr(4)
28244 .kr(8)
28245 .sr(1)
28246 .m(3)
28247 .n(4)
28248 .k(k)
28249 .ks(3)
28250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28251 }
28252 }
28253
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel_subtile)28254 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
28255 TEST_REQUIRES_X86_XOP;
28256 for (size_t k = 1; k <= 40; k += 9) {
28257 for (uint32_t n = 1; n <= 4; n++) {
28258 for (uint32_t m = 1; m <= 3; m++) {
28259 GemmMicrokernelTester()
28260 .mr(3)
28261 .nr(4)
28262 .kr(8)
28263 .sr(1)
28264 .m(m)
28265 .n(n)
28266 .k(k)
28267 .ks(3)
28268 .iterations(1)
28269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28270 }
28271 }
28272 }
28273 }
28274
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_small_kernel)28275 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
28276 TEST_REQUIRES_X86_XOP;
28277 for (uint32_t n = 5; n < 8; n++) {
28278 for (size_t k = 1; k <= 40; k += 9) {
28279 GemmMicrokernelTester()
28280 .mr(3)
28281 .nr(4)
28282 .kr(8)
28283 .sr(1)
28284 .m(3)
28285 .n(n)
28286 .k(k)
28287 .ks(3)
28288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28289 }
28290 }
28291 }
28292
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_small_kernel)28293 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
28294 TEST_REQUIRES_X86_XOP;
28295 for (uint32_t n = 8; n <= 12; n += 4) {
28296 for (size_t k = 1; k <= 40; k += 9) {
28297 GemmMicrokernelTester()
28298 .mr(3)
28299 .nr(4)
28300 .kr(8)
28301 .sr(1)
28302 .m(3)
28303 .n(n)
28304 .k(k)
28305 .ks(3)
28306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28307 }
28308 }
28309 }
28310
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)28311 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
28312 TEST_REQUIRES_X86_XOP;
28313 for (size_t k = 1; k <= 40; k += 9) {
28314 for (uint32_t n = 1; n <= 4; n++) {
28315 for (uint32_t m = 1; m <= 3; m++) {
28316 GemmMicrokernelTester()
28317 .mr(3)
28318 .nr(4)
28319 .kr(8)
28320 .sr(1)
28321 .m(m)
28322 .n(n)
28323 .k(k)
28324 .cm_stride(7)
28325 .iterations(1)
28326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28327 }
28328 }
28329 }
28330 }
28331
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,a_offset)28332 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
28333 TEST_REQUIRES_X86_XOP;
28334 for (size_t k = 1; k <= 40; k += 9) {
28335 GemmMicrokernelTester()
28336 .mr(3)
28337 .nr(4)
28338 .kr(8)
28339 .sr(1)
28340 .m(3)
28341 .n(4)
28342 .k(k)
28343 .ks(3)
28344 .a_offset(127)
28345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28346 }
28347 }
28348
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,zero)28349 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
28350 TEST_REQUIRES_X86_XOP;
28351 for (size_t k = 1; k <= 40; k += 9) {
28352 for (uint32_t mz = 0; mz < 3; mz++) {
28353 GemmMicrokernelTester()
28354 .mr(3)
28355 .nr(4)
28356 .kr(8)
28357 .sr(1)
28358 .m(3)
28359 .n(4)
28360 .k(k)
28361 .ks(3)
28362 .a_offset(127)
28363 .zero_index(mz)
28364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28365 }
28366 }
28367 }
28368
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)28369 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
28370 TEST_REQUIRES_X86_XOP;
28371 GemmMicrokernelTester()
28372 .mr(3)
28373 .nr(4)
28374 .kr(8)
28375 .sr(1)
28376 .m(3)
28377 .n(4)
28378 .k(8)
28379 .qmin(128)
28380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28381 }
28382
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)28383 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
28384 TEST_REQUIRES_X86_XOP;
28385 GemmMicrokernelTester()
28386 .mr(3)
28387 .nr(4)
28388 .kr(8)
28389 .sr(1)
28390 .m(3)
28391 .n(4)
28392 .k(8)
28393 .qmax(128)
28394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28395 }
28396
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)28397 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
28398 TEST_REQUIRES_X86_XOP;
28399 GemmMicrokernelTester()
28400 .mr(3)
28401 .nr(4)
28402 .kr(8)
28403 .sr(1)
28404 .m(3)
28405 .n(4)
28406 .k(8)
28407 .cm_stride(7)
28408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28409 }
28410
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_a_zero_point)28411 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
28412 TEST_REQUIRES_X86_XOP;
28413 for (size_t k = 1; k <= 40; k += 9) {
28414 GemmMicrokernelTester()
28415 .mr(3)
28416 .nr(4)
28417 .kr(8)
28418 .sr(1)
28419 .m(3)
28420 .n(4)
28421 .k(k)
28422 .a_zero_point(0)
28423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28424 }
28425 }
28426
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_b_zero_point)28427 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
28428 TEST_REQUIRES_X86_XOP;
28429 for (size_t k = 1; k <= 40; k += 9) {
28430 GemmMicrokernelTester()
28431 .mr(3)
28432 .nr(4)
28433 .kr(8)
28434 .sr(1)
28435 .m(3)
28436 .n(4)
28437 .k(k)
28438 .b_zero_point(0)
28439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28440 }
28441 }
28442
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_zero_point)28443 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
28444 TEST_REQUIRES_X86_XOP;
28445 for (size_t k = 1; k <= 40; k += 9) {
28446 GemmMicrokernelTester()
28447 .mr(3)
28448 .nr(4)
28449 .kr(8)
28450 .sr(1)
28451 .m(3)
28452 .n(4)
28453 .k(k)
28454 .a_zero_point(0)
28455 .b_zero_point(0)
28456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28457 }
28458 }
28459 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28460
28461
28462 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)28463 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
28464 TEST_REQUIRES_X86_AVX2;
28465 GemmMicrokernelTester()
28466 .mr(1)
28467 .nr(8)
28468 .kr(8)
28469 .sr(1)
28470 .m(1)
28471 .n(8)
28472 .k(8)
28473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28474 }
28475
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)28476 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
28477 TEST_REQUIRES_X86_AVX2;
28478 GemmMicrokernelTester()
28479 .mr(1)
28480 .nr(8)
28481 .kr(8)
28482 .sr(1)
28483 .m(1)
28484 .n(8)
28485 .k(8)
28486 .cn_stride(11)
28487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28488 }
28489
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)28490 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
28491 TEST_REQUIRES_X86_AVX2;
28492 for (uint32_t n = 1; n <= 8; n++) {
28493 for (uint32_t m = 1; m <= 1; m++) {
28494 GemmMicrokernelTester()
28495 .mr(1)
28496 .nr(8)
28497 .kr(8)
28498 .sr(1)
28499 .m(m)
28500 .n(n)
28501 .k(8)
28502 .iterations(1)
28503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28504 }
28505 }
28506 }
28507
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)28508 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
28509 TEST_REQUIRES_X86_AVX2;
28510 for (uint32_t m = 1; m <= 1; m++) {
28511 GemmMicrokernelTester()
28512 .mr(1)
28513 .nr(8)
28514 .kr(8)
28515 .sr(1)
28516 .m(m)
28517 .n(8)
28518 .k(8)
28519 .iterations(1)
28520 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28521 }
28522 }
28523
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)28524 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
28525 TEST_REQUIRES_X86_AVX2;
28526 for (uint32_t n = 1; n <= 8; n++) {
28527 GemmMicrokernelTester()
28528 .mr(1)
28529 .nr(8)
28530 .kr(8)
28531 .sr(1)
28532 .m(1)
28533 .n(n)
28534 .k(8)
28535 .iterations(1)
28536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28537 }
28538 }
28539
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)28540 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
28541 TEST_REQUIRES_X86_AVX2;
28542 for (size_t k = 1; k < 8; k++) {
28543 GemmMicrokernelTester()
28544 .mr(1)
28545 .nr(8)
28546 .kr(8)
28547 .sr(1)
28548 .m(1)
28549 .n(8)
28550 .k(k)
28551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28552 }
28553 }
28554
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)28555 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
28556 TEST_REQUIRES_X86_AVX2;
28557 for (size_t k = 1; k < 8; k++) {
28558 for (uint32_t n = 1; n <= 8; n++) {
28559 for (uint32_t m = 1; m <= 1; m++) {
28560 GemmMicrokernelTester()
28561 .mr(1)
28562 .nr(8)
28563 .kr(8)
28564 .sr(1)
28565 .m(m)
28566 .n(n)
28567 .k(k)
28568 .iterations(1)
28569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28570 }
28571 }
28572 }
28573 }
28574
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)28575 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
28576 TEST_REQUIRES_X86_AVX2;
28577 for (size_t k = 9; k < 16; k++) {
28578 GemmMicrokernelTester()
28579 .mr(1)
28580 .nr(8)
28581 .kr(8)
28582 .sr(1)
28583 .m(1)
28584 .n(8)
28585 .k(k)
28586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28587 }
28588 }
28589
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)28590 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
28591 TEST_REQUIRES_X86_AVX2;
28592 for (size_t k = 9; k < 16; k++) {
28593 for (uint32_t n = 1; n <= 8; n++) {
28594 for (uint32_t m = 1; m <= 1; m++) {
28595 GemmMicrokernelTester()
28596 .mr(1)
28597 .nr(8)
28598 .kr(8)
28599 .sr(1)
28600 .m(m)
28601 .n(n)
28602 .k(k)
28603 .iterations(1)
28604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28605 }
28606 }
28607 }
28608 }
28609
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)28610 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
28611 TEST_REQUIRES_X86_AVX2;
28612 for (size_t k = 16; k <= 80; k += 8) {
28613 GemmMicrokernelTester()
28614 .mr(1)
28615 .nr(8)
28616 .kr(8)
28617 .sr(1)
28618 .m(1)
28619 .n(8)
28620 .k(k)
28621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28622 }
28623 }
28624
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)28625 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
28626 TEST_REQUIRES_X86_AVX2;
28627 for (size_t k = 16; k <= 80; k += 8) {
28628 for (uint32_t n = 1; n <= 8; n++) {
28629 for (uint32_t m = 1; m <= 1; m++) {
28630 GemmMicrokernelTester()
28631 .mr(1)
28632 .nr(8)
28633 .kr(8)
28634 .sr(1)
28635 .m(m)
28636 .n(n)
28637 .k(k)
28638 .iterations(1)
28639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28640 }
28641 }
28642 }
28643 }
28644
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)28645 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
28646 TEST_REQUIRES_X86_AVX2;
28647 for (uint32_t n = 9; n < 16; n++) {
28648 for (size_t k = 1; k <= 40; k += 9) {
28649 GemmMicrokernelTester()
28650 .mr(1)
28651 .nr(8)
28652 .kr(8)
28653 .sr(1)
28654 .m(1)
28655 .n(n)
28656 .k(k)
28657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28658 }
28659 }
28660 }
28661
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)28662 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
28663 TEST_REQUIRES_X86_AVX2;
28664 for (uint32_t n = 9; n < 16; n++) {
28665 for (size_t k = 1; k <= 40; k += 9) {
28666 GemmMicrokernelTester()
28667 .mr(1)
28668 .nr(8)
28669 .kr(8)
28670 .sr(1)
28671 .m(1)
28672 .n(n)
28673 .k(k)
28674 .cn_stride(11)
28675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28676 }
28677 }
28678 }
28679
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)28680 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
28681 TEST_REQUIRES_X86_AVX2;
28682 for (uint32_t n = 9; n < 16; n++) {
28683 for (size_t k = 1; k <= 40; k += 9) {
28684 for (uint32_t m = 1; m <= 1; m++) {
28685 GemmMicrokernelTester()
28686 .mr(1)
28687 .nr(8)
28688 .kr(8)
28689 .sr(1)
28690 .m(m)
28691 .n(n)
28692 .k(k)
28693 .iterations(1)
28694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28695 }
28696 }
28697 }
28698 }
28699
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)28700 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
28701 TEST_REQUIRES_X86_AVX2;
28702 for (uint32_t n = 16; n <= 24; n += 8) {
28703 for (size_t k = 1; k <= 40; k += 9) {
28704 GemmMicrokernelTester()
28705 .mr(1)
28706 .nr(8)
28707 .kr(8)
28708 .sr(1)
28709 .m(1)
28710 .n(n)
28711 .k(k)
28712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28713 }
28714 }
28715 }
28716
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)28717 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
28718 TEST_REQUIRES_X86_AVX2;
28719 for (uint32_t n = 16; n <= 24; n += 8) {
28720 for (size_t k = 1; k <= 40; k += 9) {
28721 GemmMicrokernelTester()
28722 .mr(1)
28723 .nr(8)
28724 .kr(8)
28725 .sr(1)
28726 .m(1)
28727 .n(n)
28728 .k(k)
28729 .cn_stride(11)
28730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28731 }
28732 }
28733 }
28734
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)28735 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
28736 TEST_REQUIRES_X86_AVX2;
28737 for (uint32_t n = 16; n <= 24; n += 8) {
28738 for (size_t k = 1; k <= 40; k += 9) {
28739 for (uint32_t m = 1; m <= 1; m++) {
28740 GemmMicrokernelTester()
28741 .mr(1)
28742 .nr(8)
28743 .kr(8)
28744 .sr(1)
28745 .m(m)
28746 .n(n)
28747 .k(k)
28748 .iterations(1)
28749 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28750 }
28751 }
28752 }
28753 }
28754
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)28755 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
28756 TEST_REQUIRES_X86_AVX2;
28757 for (size_t k = 1; k <= 40; k += 9) {
28758 GemmMicrokernelTester()
28759 .mr(1)
28760 .nr(8)
28761 .kr(8)
28762 .sr(1)
28763 .m(1)
28764 .n(8)
28765 .k(k)
28766 .ks(3)
28767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28768 }
28769 }
28770
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)28771 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
28772 TEST_REQUIRES_X86_AVX2;
28773 for (size_t k = 1; k <= 40; k += 9) {
28774 for (uint32_t n = 1; n <= 8; n++) {
28775 for (uint32_t m = 1; m <= 1; m++) {
28776 GemmMicrokernelTester()
28777 .mr(1)
28778 .nr(8)
28779 .kr(8)
28780 .sr(1)
28781 .m(m)
28782 .n(n)
28783 .k(k)
28784 .ks(3)
28785 .iterations(1)
28786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28787 }
28788 }
28789 }
28790 }
28791
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)28792 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
28793 TEST_REQUIRES_X86_AVX2;
28794 for (uint32_t n = 9; n < 16; n++) {
28795 for (size_t k = 1; k <= 40; k += 9) {
28796 GemmMicrokernelTester()
28797 .mr(1)
28798 .nr(8)
28799 .kr(8)
28800 .sr(1)
28801 .m(1)
28802 .n(n)
28803 .k(k)
28804 .ks(3)
28805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28806 }
28807 }
28808 }
28809
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)28810 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
28811 TEST_REQUIRES_X86_AVX2;
28812 for (uint32_t n = 16; n <= 24; n += 8) {
28813 for (size_t k = 1; k <= 40; k += 9) {
28814 GemmMicrokernelTester()
28815 .mr(1)
28816 .nr(8)
28817 .kr(8)
28818 .sr(1)
28819 .m(1)
28820 .n(n)
28821 .k(k)
28822 .ks(3)
28823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28824 }
28825 }
28826 }
28827
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)28828 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
28829 TEST_REQUIRES_X86_AVX2;
28830 for (size_t k = 1; k <= 40; k += 9) {
28831 for (uint32_t n = 1; n <= 8; n++) {
28832 for (uint32_t m = 1; m <= 1; m++) {
28833 GemmMicrokernelTester()
28834 .mr(1)
28835 .nr(8)
28836 .kr(8)
28837 .sr(1)
28838 .m(m)
28839 .n(n)
28840 .k(k)
28841 .cm_stride(11)
28842 .iterations(1)
28843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28844 }
28845 }
28846 }
28847 }
28848
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)28849 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
28850 TEST_REQUIRES_X86_AVX2;
28851 for (size_t k = 1; k <= 40; k += 9) {
28852 GemmMicrokernelTester()
28853 .mr(1)
28854 .nr(8)
28855 .kr(8)
28856 .sr(1)
28857 .m(1)
28858 .n(8)
28859 .k(k)
28860 .ks(3)
28861 .a_offset(43)
28862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28863 }
28864 }
28865
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)28866 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
28867 TEST_REQUIRES_X86_AVX2;
28868 for (size_t k = 1; k <= 40; k += 9) {
28869 for (uint32_t mz = 0; mz < 1; mz++) {
28870 GemmMicrokernelTester()
28871 .mr(1)
28872 .nr(8)
28873 .kr(8)
28874 .sr(1)
28875 .m(1)
28876 .n(8)
28877 .k(k)
28878 .ks(3)
28879 .a_offset(43)
28880 .zero_index(mz)
28881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28882 }
28883 }
28884 }
28885
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)28886 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
28887 TEST_REQUIRES_X86_AVX2;
28888 GemmMicrokernelTester()
28889 .mr(1)
28890 .nr(8)
28891 .kr(8)
28892 .sr(1)
28893 .m(1)
28894 .n(8)
28895 .k(8)
28896 .qmin(128)
28897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28898 }
28899
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)28900 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
28901 TEST_REQUIRES_X86_AVX2;
28902 GemmMicrokernelTester()
28903 .mr(1)
28904 .nr(8)
28905 .kr(8)
28906 .sr(1)
28907 .m(1)
28908 .n(8)
28909 .k(8)
28910 .qmax(128)
28911 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28912 }
28913
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)28914 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
28915 TEST_REQUIRES_X86_AVX2;
28916 GemmMicrokernelTester()
28917 .mr(1)
28918 .nr(8)
28919 .kr(8)
28920 .sr(1)
28921 .m(1)
28922 .n(8)
28923 .k(8)
28924 .cm_stride(11)
28925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28926 }
28927
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_a_zero_point)28928 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_a_zero_point) {
28929 TEST_REQUIRES_X86_AVX2;
28930 for (size_t k = 1; k <= 40; k += 9) {
28931 GemmMicrokernelTester()
28932 .mr(1)
28933 .nr(8)
28934 .kr(8)
28935 .sr(1)
28936 .m(1)
28937 .n(8)
28938 .k(k)
28939 .a_zero_point(0)
28940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28941 }
28942 }
28943
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_b_zero_point)28944 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_b_zero_point) {
28945 TEST_REQUIRES_X86_AVX2;
28946 for (size_t k = 1; k <= 40; k += 9) {
28947 GemmMicrokernelTester()
28948 .mr(1)
28949 .nr(8)
28950 .kr(8)
28951 .sr(1)
28952 .m(1)
28953 .n(8)
28954 .k(k)
28955 .b_zero_point(0)
28956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28957 }
28958 }
28959
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_zero_point)28960 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_zero_point) {
28961 TEST_REQUIRES_X86_AVX2;
28962 for (size_t k = 1; k <= 40; k += 9) {
28963 GemmMicrokernelTester()
28964 .mr(1)
28965 .nr(8)
28966 .kr(8)
28967 .sr(1)
28968 .m(1)
28969 .n(8)
28970 .k(k)
28971 .a_zero_point(0)
28972 .b_zero_point(0)
28973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28974 }
28975 }
28976 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28977
28978
28979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8)28980 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
28981 TEST_REQUIRES_X86_AVX512SKX;
28982 GemmMicrokernelTester()
28983 .mr(1)
28984 .nr(16)
28985 .kr(8)
28986 .sr(1)
28987 .m(1)
28988 .n(16)
28989 .k(8)
28990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
28991 }
28992
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cn)28993 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
28994 TEST_REQUIRES_X86_AVX512SKX;
28995 GemmMicrokernelTester()
28996 .mr(1)
28997 .nr(16)
28998 .kr(8)
28999 .sr(1)
29000 .m(1)
29001 .n(16)
29002 .k(8)
29003 .cn_stride(19)
29004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29005 }
29006
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile)29007 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
29008 TEST_REQUIRES_X86_AVX512SKX;
29009 for (uint32_t n = 1; n <= 16; n++) {
29010 for (uint32_t m = 1; m <= 1; m++) {
29011 GemmMicrokernelTester()
29012 .mr(1)
29013 .nr(16)
29014 .kr(8)
29015 .sr(1)
29016 .m(m)
29017 .n(n)
29018 .k(8)
29019 .iterations(1)
29020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29021 }
29022 }
29023 }
29024
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_m)29025 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
29026 TEST_REQUIRES_X86_AVX512SKX;
29027 for (uint32_t m = 1; m <= 1; m++) {
29028 GemmMicrokernelTester()
29029 .mr(1)
29030 .nr(16)
29031 .kr(8)
29032 .sr(1)
29033 .m(m)
29034 .n(16)
29035 .k(8)
29036 .iterations(1)
29037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29038 }
29039 }
29040
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_n)29041 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
29042 TEST_REQUIRES_X86_AVX512SKX;
29043 for (uint32_t n = 1; n <= 16; n++) {
29044 GemmMicrokernelTester()
29045 .mr(1)
29046 .nr(16)
29047 .kr(8)
29048 .sr(1)
29049 .m(1)
29050 .n(n)
29051 .k(8)
29052 .iterations(1)
29053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29054 }
29055 }
29056
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8)29057 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
29058 TEST_REQUIRES_X86_AVX512SKX;
29059 for (size_t k = 1; k < 8; k++) {
29060 GemmMicrokernelTester()
29061 .mr(1)
29062 .nr(16)
29063 .kr(8)
29064 .sr(1)
29065 .m(1)
29066 .n(16)
29067 .k(k)
29068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29069 }
29070 }
29071
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_subtile)29072 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
29073 TEST_REQUIRES_X86_AVX512SKX;
29074 for (size_t k = 1; k < 8; k++) {
29075 for (uint32_t n = 1; n <= 16; n++) {
29076 for (uint32_t m = 1; m <= 1; m++) {
29077 GemmMicrokernelTester()
29078 .mr(1)
29079 .nr(16)
29080 .kr(8)
29081 .sr(1)
29082 .m(m)
29083 .n(n)
29084 .k(k)
29085 .iterations(1)
29086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29087 }
29088 }
29089 }
29090 }
29091
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8)29092 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
29093 TEST_REQUIRES_X86_AVX512SKX;
29094 for (size_t k = 9; k < 16; k++) {
29095 GemmMicrokernelTester()
29096 .mr(1)
29097 .nr(16)
29098 .kr(8)
29099 .sr(1)
29100 .m(1)
29101 .n(16)
29102 .k(k)
29103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29104 }
29105 }
29106
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_subtile)29107 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
29108 TEST_REQUIRES_X86_AVX512SKX;
29109 for (size_t k = 9; k < 16; k++) {
29110 for (uint32_t n = 1; n <= 16; n++) {
29111 for (uint32_t m = 1; m <= 1; m++) {
29112 GemmMicrokernelTester()
29113 .mr(1)
29114 .nr(16)
29115 .kr(8)
29116 .sr(1)
29117 .m(m)
29118 .n(n)
29119 .k(k)
29120 .iterations(1)
29121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29122 }
29123 }
29124 }
29125 }
29126
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8)29127 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
29128 TEST_REQUIRES_X86_AVX512SKX;
29129 for (size_t k = 16; k <= 80; k += 8) {
29130 GemmMicrokernelTester()
29131 .mr(1)
29132 .nr(16)
29133 .kr(8)
29134 .sr(1)
29135 .m(1)
29136 .n(16)
29137 .k(k)
29138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29139 }
29140 }
29141
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_subtile)29142 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
29143 TEST_REQUIRES_X86_AVX512SKX;
29144 for (size_t k = 16; k <= 80; k += 8) {
29145 for (uint32_t n = 1; n <= 16; n++) {
29146 for (uint32_t m = 1; m <= 1; m++) {
29147 GemmMicrokernelTester()
29148 .mr(1)
29149 .nr(16)
29150 .kr(8)
29151 .sr(1)
29152 .m(m)
29153 .n(n)
29154 .k(k)
29155 .iterations(1)
29156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29157 }
29158 }
29159 }
29160 }
29161
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16)29162 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
29163 TEST_REQUIRES_X86_AVX512SKX;
29164 for (uint32_t n = 17; n < 32; n++) {
29165 for (size_t k = 1; k <= 40; k += 9) {
29166 GemmMicrokernelTester()
29167 .mr(1)
29168 .nr(16)
29169 .kr(8)
29170 .sr(1)
29171 .m(1)
29172 .n(n)
29173 .k(k)
29174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29175 }
29176 }
29177 }
29178
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_cn)29179 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
29180 TEST_REQUIRES_X86_AVX512SKX;
29181 for (uint32_t n = 17; n < 32; n++) {
29182 for (size_t k = 1; k <= 40; k += 9) {
29183 GemmMicrokernelTester()
29184 .mr(1)
29185 .nr(16)
29186 .kr(8)
29187 .sr(1)
29188 .m(1)
29189 .n(n)
29190 .k(k)
29191 .cn_stride(19)
29192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29193 }
29194 }
29195 }
29196
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_subtile)29197 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
29198 TEST_REQUIRES_X86_AVX512SKX;
29199 for (uint32_t n = 17; n < 32; n++) {
29200 for (size_t k = 1; k <= 40; k += 9) {
29201 for (uint32_t m = 1; m <= 1; m++) {
29202 GemmMicrokernelTester()
29203 .mr(1)
29204 .nr(16)
29205 .kr(8)
29206 .sr(1)
29207 .m(m)
29208 .n(n)
29209 .k(k)
29210 .iterations(1)
29211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29212 }
29213 }
29214 }
29215 }
29216
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16)29217 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
29218 TEST_REQUIRES_X86_AVX512SKX;
29219 for (uint32_t n = 32; n <= 48; n += 16) {
29220 for (size_t k = 1; k <= 40; k += 9) {
29221 GemmMicrokernelTester()
29222 .mr(1)
29223 .nr(16)
29224 .kr(8)
29225 .sr(1)
29226 .m(1)
29227 .n(n)
29228 .k(k)
29229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29230 }
29231 }
29232 }
29233
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_cn)29234 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
29235 TEST_REQUIRES_X86_AVX512SKX;
29236 for (uint32_t n = 32; n <= 48; n += 16) {
29237 for (size_t k = 1; k <= 40; k += 9) {
29238 GemmMicrokernelTester()
29239 .mr(1)
29240 .nr(16)
29241 .kr(8)
29242 .sr(1)
29243 .m(1)
29244 .n(n)
29245 .k(k)
29246 .cn_stride(19)
29247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29248 }
29249 }
29250 }
29251
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_subtile)29252 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
29253 TEST_REQUIRES_X86_AVX512SKX;
29254 for (uint32_t n = 32; n <= 48; n += 16) {
29255 for (size_t k = 1; k <= 40; k += 9) {
29256 for (uint32_t m = 1; m <= 1; m++) {
29257 GemmMicrokernelTester()
29258 .mr(1)
29259 .nr(16)
29260 .kr(8)
29261 .sr(1)
29262 .m(m)
29263 .n(n)
29264 .k(k)
29265 .iterations(1)
29266 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29267 }
29268 }
29269 }
29270 }
29271
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel)29272 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
29273 TEST_REQUIRES_X86_AVX512SKX;
29274 for (size_t k = 1; k <= 40; k += 9) {
29275 GemmMicrokernelTester()
29276 .mr(1)
29277 .nr(16)
29278 .kr(8)
29279 .sr(1)
29280 .m(1)
29281 .n(16)
29282 .k(k)
29283 .ks(3)
29284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29285 }
29286 }
29287
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel_subtile)29288 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
29289 TEST_REQUIRES_X86_AVX512SKX;
29290 for (size_t k = 1; k <= 40; k += 9) {
29291 for (uint32_t n = 1; n <= 16; n++) {
29292 for (uint32_t m = 1; m <= 1; m++) {
29293 GemmMicrokernelTester()
29294 .mr(1)
29295 .nr(16)
29296 .kr(8)
29297 .sr(1)
29298 .m(m)
29299 .n(n)
29300 .k(k)
29301 .ks(3)
29302 .iterations(1)
29303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29304 }
29305 }
29306 }
29307 }
29308
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_small_kernel)29309 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
29310 TEST_REQUIRES_X86_AVX512SKX;
29311 for (uint32_t n = 17; n < 32; n++) {
29312 for (size_t k = 1; k <= 40; k += 9) {
29313 GemmMicrokernelTester()
29314 .mr(1)
29315 .nr(16)
29316 .kr(8)
29317 .sr(1)
29318 .m(1)
29319 .n(n)
29320 .k(k)
29321 .ks(3)
29322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29323 }
29324 }
29325 }
29326
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_small_kernel)29327 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
29328 TEST_REQUIRES_X86_AVX512SKX;
29329 for (uint32_t n = 32; n <= 48; n += 16) {
29330 for (size_t k = 1; k <= 40; k += 9) {
29331 GemmMicrokernelTester()
29332 .mr(1)
29333 .nr(16)
29334 .kr(8)
29335 .sr(1)
29336 .m(1)
29337 .n(n)
29338 .k(k)
29339 .ks(3)
29340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29341 }
29342 }
29343 }
29344
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm_subtile)29345 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
29346 TEST_REQUIRES_X86_AVX512SKX;
29347 for (size_t k = 1; k <= 40; k += 9) {
29348 for (uint32_t n = 1; n <= 16; n++) {
29349 for (uint32_t m = 1; m <= 1; m++) {
29350 GemmMicrokernelTester()
29351 .mr(1)
29352 .nr(16)
29353 .kr(8)
29354 .sr(1)
29355 .m(m)
29356 .n(n)
29357 .k(k)
29358 .cm_stride(19)
29359 .iterations(1)
29360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29361 }
29362 }
29363 }
29364 }
29365
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,a_offset)29366 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
29367 TEST_REQUIRES_X86_AVX512SKX;
29368 for (size_t k = 1; k <= 40; k += 9) {
29369 GemmMicrokernelTester()
29370 .mr(1)
29371 .nr(16)
29372 .kr(8)
29373 .sr(1)
29374 .m(1)
29375 .n(16)
29376 .k(k)
29377 .ks(3)
29378 .a_offset(43)
29379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29380 }
29381 }
29382
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,zero)29383 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
29384 TEST_REQUIRES_X86_AVX512SKX;
29385 for (size_t k = 1; k <= 40; k += 9) {
29386 for (uint32_t mz = 0; mz < 1; mz++) {
29387 GemmMicrokernelTester()
29388 .mr(1)
29389 .nr(16)
29390 .kr(8)
29391 .sr(1)
29392 .m(1)
29393 .n(16)
29394 .k(k)
29395 .ks(3)
29396 .a_offset(43)
29397 .zero_index(mz)
29398 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29399 }
29400 }
29401 }
29402
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmin)29403 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
29404 TEST_REQUIRES_X86_AVX512SKX;
29405 GemmMicrokernelTester()
29406 .mr(1)
29407 .nr(16)
29408 .kr(8)
29409 .sr(1)
29410 .m(1)
29411 .n(16)
29412 .k(8)
29413 .qmin(128)
29414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29415 }
29416
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmax)29417 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
29418 TEST_REQUIRES_X86_AVX512SKX;
29419 GemmMicrokernelTester()
29420 .mr(1)
29421 .nr(16)
29422 .kr(8)
29423 .sr(1)
29424 .m(1)
29425 .n(16)
29426 .k(8)
29427 .qmax(128)
29428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29429 }
29430
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm)29431 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
29432 TEST_REQUIRES_X86_AVX512SKX;
29433 GemmMicrokernelTester()
29434 .mr(1)
29435 .nr(16)
29436 .kr(8)
29437 .sr(1)
29438 .m(1)
29439 .n(16)
29440 .k(8)
29441 .cm_stride(19)
29442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29443 }
29444
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_a_zero_point)29445 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_a_zero_point) {
29446 TEST_REQUIRES_X86_AVX512SKX;
29447 for (size_t k = 1; k <= 40; k += 9) {
29448 GemmMicrokernelTester()
29449 .mr(1)
29450 .nr(16)
29451 .kr(8)
29452 .sr(1)
29453 .m(1)
29454 .n(16)
29455 .k(k)
29456 .a_zero_point(0)
29457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29458 }
29459 }
29460
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_b_zero_point)29461 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_b_zero_point) {
29462 TEST_REQUIRES_X86_AVX512SKX;
29463 for (size_t k = 1; k <= 40; k += 9) {
29464 GemmMicrokernelTester()
29465 .mr(1)
29466 .nr(16)
29467 .kr(8)
29468 .sr(1)
29469 .m(1)
29470 .n(16)
29471 .k(k)
29472 .b_zero_point(0)
29473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29474 }
29475 }
29476
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_zero_point)29477 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_zero_point) {
29478 TEST_REQUIRES_X86_AVX512SKX;
29479 for (size_t k = 1; k <= 40; k += 9) {
29480 GemmMicrokernelTester()
29481 .mr(1)
29482 .nr(16)
29483 .kr(8)
29484 .sr(1)
29485 .m(1)
29486 .n(16)
29487 .k(k)
29488 .a_zero_point(0)
29489 .b_zero_point(0)
29490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29491 }
29492 }
29493 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29494
29495
29496 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)29497 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
29498 TEST_REQUIRES_X86_AVX512SKX;
29499 GemmMicrokernelTester()
29500 .mr(2)
29501 .nr(16)
29502 .kr(8)
29503 .sr(1)
29504 .m(2)
29505 .n(16)
29506 .k(8)
29507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29508 }
29509
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)29510 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
29511 TEST_REQUIRES_X86_AVX512SKX;
29512 GemmMicrokernelTester()
29513 .mr(2)
29514 .nr(16)
29515 .kr(8)
29516 .sr(1)
29517 .m(2)
29518 .n(16)
29519 .k(8)
29520 .cn_stride(19)
29521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29522 }
29523
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)29524 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
29525 TEST_REQUIRES_X86_AVX512SKX;
29526 for (uint32_t n = 1; n <= 16; n++) {
29527 for (uint32_t m = 1; m <= 2; m++) {
29528 GemmMicrokernelTester()
29529 .mr(2)
29530 .nr(16)
29531 .kr(8)
29532 .sr(1)
29533 .m(m)
29534 .n(n)
29535 .k(8)
29536 .iterations(1)
29537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29538 }
29539 }
29540 }
29541
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)29542 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
29543 TEST_REQUIRES_X86_AVX512SKX;
29544 for (uint32_t m = 1; m <= 2; m++) {
29545 GemmMicrokernelTester()
29546 .mr(2)
29547 .nr(16)
29548 .kr(8)
29549 .sr(1)
29550 .m(m)
29551 .n(16)
29552 .k(8)
29553 .iterations(1)
29554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29555 }
29556 }
29557
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)29558 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
29559 TEST_REQUIRES_X86_AVX512SKX;
29560 for (uint32_t n = 1; n <= 16; n++) {
29561 GemmMicrokernelTester()
29562 .mr(2)
29563 .nr(16)
29564 .kr(8)
29565 .sr(1)
29566 .m(2)
29567 .n(n)
29568 .k(8)
29569 .iterations(1)
29570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29571 }
29572 }
29573
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)29574 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
29575 TEST_REQUIRES_X86_AVX512SKX;
29576 for (size_t k = 1; k < 8; k++) {
29577 GemmMicrokernelTester()
29578 .mr(2)
29579 .nr(16)
29580 .kr(8)
29581 .sr(1)
29582 .m(2)
29583 .n(16)
29584 .k(k)
29585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29586 }
29587 }
29588
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)29589 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
29590 TEST_REQUIRES_X86_AVX512SKX;
29591 for (size_t k = 1; k < 8; k++) {
29592 for (uint32_t n = 1; n <= 16; n++) {
29593 for (uint32_t m = 1; m <= 2; m++) {
29594 GemmMicrokernelTester()
29595 .mr(2)
29596 .nr(16)
29597 .kr(8)
29598 .sr(1)
29599 .m(m)
29600 .n(n)
29601 .k(k)
29602 .iterations(1)
29603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29604 }
29605 }
29606 }
29607 }
29608
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)29609 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
29610 TEST_REQUIRES_X86_AVX512SKX;
29611 for (size_t k = 9; k < 16; k++) {
29612 GemmMicrokernelTester()
29613 .mr(2)
29614 .nr(16)
29615 .kr(8)
29616 .sr(1)
29617 .m(2)
29618 .n(16)
29619 .k(k)
29620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29621 }
29622 }
29623
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)29624 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
29625 TEST_REQUIRES_X86_AVX512SKX;
29626 for (size_t k = 9; k < 16; k++) {
29627 for (uint32_t n = 1; n <= 16; n++) {
29628 for (uint32_t m = 1; m <= 2; m++) {
29629 GemmMicrokernelTester()
29630 .mr(2)
29631 .nr(16)
29632 .kr(8)
29633 .sr(1)
29634 .m(m)
29635 .n(n)
29636 .k(k)
29637 .iterations(1)
29638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29639 }
29640 }
29641 }
29642 }
29643
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)29644 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
29645 TEST_REQUIRES_X86_AVX512SKX;
29646 for (size_t k = 16; k <= 80; k += 8) {
29647 GemmMicrokernelTester()
29648 .mr(2)
29649 .nr(16)
29650 .kr(8)
29651 .sr(1)
29652 .m(2)
29653 .n(16)
29654 .k(k)
29655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29656 }
29657 }
29658
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)29659 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
29660 TEST_REQUIRES_X86_AVX512SKX;
29661 for (size_t k = 16; k <= 80; k += 8) {
29662 for (uint32_t n = 1; n <= 16; n++) {
29663 for (uint32_t m = 1; m <= 2; m++) {
29664 GemmMicrokernelTester()
29665 .mr(2)
29666 .nr(16)
29667 .kr(8)
29668 .sr(1)
29669 .m(m)
29670 .n(n)
29671 .k(k)
29672 .iterations(1)
29673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29674 }
29675 }
29676 }
29677 }
29678
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)29679 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
29680 TEST_REQUIRES_X86_AVX512SKX;
29681 for (uint32_t n = 17; n < 32; n++) {
29682 for (size_t k = 1; k <= 40; k += 9) {
29683 GemmMicrokernelTester()
29684 .mr(2)
29685 .nr(16)
29686 .kr(8)
29687 .sr(1)
29688 .m(2)
29689 .n(n)
29690 .k(k)
29691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29692 }
29693 }
29694 }
29695
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)29696 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
29697 TEST_REQUIRES_X86_AVX512SKX;
29698 for (uint32_t n = 17; n < 32; n++) {
29699 for (size_t k = 1; k <= 40; k += 9) {
29700 GemmMicrokernelTester()
29701 .mr(2)
29702 .nr(16)
29703 .kr(8)
29704 .sr(1)
29705 .m(2)
29706 .n(n)
29707 .k(k)
29708 .cn_stride(19)
29709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29710 }
29711 }
29712 }
29713
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)29714 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
29715 TEST_REQUIRES_X86_AVX512SKX;
29716 for (uint32_t n = 17; n < 32; n++) {
29717 for (size_t k = 1; k <= 40; k += 9) {
29718 for (uint32_t m = 1; m <= 2; m++) {
29719 GemmMicrokernelTester()
29720 .mr(2)
29721 .nr(16)
29722 .kr(8)
29723 .sr(1)
29724 .m(m)
29725 .n(n)
29726 .k(k)
29727 .iterations(1)
29728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29729 }
29730 }
29731 }
29732 }
29733
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)29734 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
29735 TEST_REQUIRES_X86_AVX512SKX;
29736 for (uint32_t n = 32; n <= 48; n += 16) {
29737 for (size_t k = 1; k <= 40; k += 9) {
29738 GemmMicrokernelTester()
29739 .mr(2)
29740 .nr(16)
29741 .kr(8)
29742 .sr(1)
29743 .m(2)
29744 .n(n)
29745 .k(k)
29746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29747 }
29748 }
29749 }
29750
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)29751 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
29752 TEST_REQUIRES_X86_AVX512SKX;
29753 for (uint32_t n = 32; n <= 48; n += 16) {
29754 for (size_t k = 1; k <= 40; k += 9) {
29755 GemmMicrokernelTester()
29756 .mr(2)
29757 .nr(16)
29758 .kr(8)
29759 .sr(1)
29760 .m(2)
29761 .n(n)
29762 .k(k)
29763 .cn_stride(19)
29764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29765 }
29766 }
29767 }
29768
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)29769 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
29770 TEST_REQUIRES_X86_AVX512SKX;
29771 for (uint32_t n = 32; n <= 48; n += 16) {
29772 for (size_t k = 1; k <= 40; k += 9) {
29773 for (uint32_t m = 1; m <= 2; m++) {
29774 GemmMicrokernelTester()
29775 .mr(2)
29776 .nr(16)
29777 .kr(8)
29778 .sr(1)
29779 .m(m)
29780 .n(n)
29781 .k(k)
29782 .iterations(1)
29783 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29784 }
29785 }
29786 }
29787 }
29788
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel)29789 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
29790 TEST_REQUIRES_X86_AVX512SKX;
29791 for (size_t k = 1; k <= 40; k += 9) {
29792 GemmMicrokernelTester()
29793 .mr(2)
29794 .nr(16)
29795 .kr(8)
29796 .sr(1)
29797 .m(2)
29798 .n(16)
29799 .k(k)
29800 .ks(3)
29801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29802 }
29803 }
29804
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel_subtile)29805 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
29806 TEST_REQUIRES_X86_AVX512SKX;
29807 for (size_t k = 1; k <= 40; k += 9) {
29808 for (uint32_t n = 1; n <= 16; n++) {
29809 for (uint32_t m = 1; m <= 2; m++) {
29810 GemmMicrokernelTester()
29811 .mr(2)
29812 .nr(16)
29813 .kr(8)
29814 .sr(1)
29815 .m(m)
29816 .n(n)
29817 .k(k)
29818 .ks(3)
29819 .iterations(1)
29820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29821 }
29822 }
29823 }
29824 }
29825
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_small_kernel)29826 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
29827 TEST_REQUIRES_X86_AVX512SKX;
29828 for (uint32_t n = 17; n < 32; n++) {
29829 for (size_t k = 1; k <= 40; k += 9) {
29830 GemmMicrokernelTester()
29831 .mr(2)
29832 .nr(16)
29833 .kr(8)
29834 .sr(1)
29835 .m(2)
29836 .n(n)
29837 .k(k)
29838 .ks(3)
29839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29840 }
29841 }
29842 }
29843
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_small_kernel)29844 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
29845 TEST_REQUIRES_X86_AVX512SKX;
29846 for (uint32_t n = 32; n <= 48; n += 16) {
29847 for (size_t k = 1; k <= 40; k += 9) {
29848 GemmMicrokernelTester()
29849 .mr(2)
29850 .nr(16)
29851 .kr(8)
29852 .sr(1)
29853 .m(2)
29854 .n(n)
29855 .k(k)
29856 .ks(3)
29857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29858 }
29859 }
29860 }
29861
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)29862 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
29863 TEST_REQUIRES_X86_AVX512SKX;
29864 for (size_t k = 1; k <= 40; k += 9) {
29865 for (uint32_t n = 1; n <= 16; n++) {
29866 for (uint32_t m = 1; m <= 2; m++) {
29867 GemmMicrokernelTester()
29868 .mr(2)
29869 .nr(16)
29870 .kr(8)
29871 .sr(1)
29872 .m(m)
29873 .n(n)
29874 .k(k)
29875 .cm_stride(19)
29876 .iterations(1)
29877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29878 }
29879 }
29880 }
29881 }
29882
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,a_offset)29883 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
29884 TEST_REQUIRES_X86_AVX512SKX;
29885 for (size_t k = 1; k <= 40; k += 9) {
29886 GemmMicrokernelTester()
29887 .mr(2)
29888 .nr(16)
29889 .kr(8)
29890 .sr(1)
29891 .m(2)
29892 .n(16)
29893 .k(k)
29894 .ks(3)
29895 .a_offset(83)
29896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29897 }
29898 }
29899
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,zero)29900 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
29901 TEST_REQUIRES_X86_AVX512SKX;
29902 for (size_t k = 1; k <= 40; k += 9) {
29903 for (uint32_t mz = 0; mz < 2; mz++) {
29904 GemmMicrokernelTester()
29905 .mr(2)
29906 .nr(16)
29907 .kr(8)
29908 .sr(1)
29909 .m(2)
29910 .n(16)
29911 .k(k)
29912 .ks(3)
29913 .a_offset(83)
29914 .zero_index(mz)
29915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29916 }
29917 }
29918 }
29919
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)29920 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
29921 TEST_REQUIRES_X86_AVX512SKX;
29922 GemmMicrokernelTester()
29923 .mr(2)
29924 .nr(16)
29925 .kr(8)
29926 .sr(1)
29927 .m(2)
29928 .n(16)
29929 .k(8)
29930 .qmin(128)
29931 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29932 }
29933
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)29934 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
29935 TEST_REQUIRES_X86_AVX512SKX;
29936 GemmMicrokernelTester()
29937 .mr(2)
29938 .nr(16)
29939 .kr(8)
29940 .sr(1)
29941 .m(2)
29942 .n(16)
29943 .k(8)
29944 .qmax(128)
29945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29946 }
29947
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)29948 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
29949 TEST_REQUIRES_X86_AVX512SKX;
29950 GemmMicrokernelTester()
29951 .mr(2)
29952 .nr(16)
29953 .kr(8)
29954 .sr(1)
29955 .m(2)
29956 .n(16)
29957 .k(8)
29958 .cm_stride(19)
29959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29960 }
29961
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_a_zero_point)29962 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_a_zero_point) {
29963 TEST_REQUIRES_X86_AVX512SKX;
29964 for (size_t k = 1; k <= 40; k += 9) {
29965 GemmMicrokernelTester()
29966 .mr(2)
29967 .nr(16)
29968 .kr(8)
29969 .sr(1)
29970 .m(2)
29971 .n(16)
29972 .k(k)
29973 .a_zero_point(0)
29974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29975 }
29976 }
29977
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_b_zero_point)29978 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_b_zero_point) {
29979 TEST_REQUIRES_X86_AVX512SKX;
29980 for (size_t k = 1; k <= 40; k += 9) {
29981 GemmMicrokernelTester()
29982 .mr(2)
29983 .nr(16)
29984 .kr(8)
29985 .sr(1)
29986 .m(2)
29987 .n(16)
29988 .k(k)
29989 .b_zero_point(0)
29990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29991 }
29992 }
29993
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_zero_point)29994 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_zero_point) {
29995 TEST_REQUIRES_X86_AVX512SKX;
29996 for (size_t k = 1; k <= 40; k += 9) {
29997 GemmMicrokernelTester()
29998 .mr(2)
29999 .nr(16)
30000 .kr(8)
30001 .sr(1)
30002 .m(2)
30003 .n(16)
30004 .k(k)
30005 .a_zero_point(0)
30006 .b_zero_point(0)
30007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30008 }
30009 }
30010 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30011
30012
30013 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8)30014 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
30015 TEST_REQUIRES_X86_AVX512SKX;
30016 GemmMicrokernelTester()
30017 .mr(4)
30018 .nr(16)
30019 .kr(8)
30020 .sr(1)
30021 .m(4)
30022 .n(16)
30023 .k(8)
30024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30025 }
30026
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cn)30027 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
30028 TEST_REQUIRES_X86_AVX512SKX;
30029 GemmMicrokernelTester()
30030 .mr(4)
30031 .nr(16)
30032 .kr(8)
30033 .sr(1)
30034 .m(4)
30035 .n(16)
30036 .k(8)
30037 .cn_stride(19)
30038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30039 }
30040
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile)30041 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
30042 TEST_REQUIRES_X86_AVX512SKX;
30043 for (uint32_t n = 1; n <= 16; n++) {
30044 for (uint32_t m = 1; m <= 4; m++) {
30045 GemmMicrokernelTester()
30046 .mr(4)
30047 .nr(16)
30048 .kr(8)
30049 .sr(1)
30050 .m(m)
30051 .n(n)
30052 .k(8)
30053 .iterations(1)
30054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30055 }
30056 }
30057 }
30058
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_m)30059 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
30060 TEST_REQUIRES_X86_AVX512SKX;
30061 for (uint32_t m = 1; m <= 4; m++) {
30062 GemmMicrokernelTester()
30063 .mr(4)
30064 .nr(16)
30065 .kr(8)
30066 .sr(1)
30067 .m(m)
30068 .n(16)
30069 .k(8)
30070 .iterations(1)
30071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30072 }
30073 }
30074
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_n)30075 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
30076 TEST_REQUIRES_X86_AVX512SKX;
30077 for (uint32_t n = 1; n <= 16; n++) {
30078 GemmMicrokernelTester()
30079 .mr(4)
30080 .nr(16)
30081 .kr(8)
30082 .sr(1)
30083 .m(4)
30084 .n(n)
30085 .k(8)
30086 .iterations(1)
30087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30088 }
30089 }
30090
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8)30091 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
30092 TEST_REQUIRES_X86_AVX512SKX;
30093 for (size_t k = 1; k < 8; k++) {
30094 GemmMicrokernelTester()
30095 .mr(4)
30096 .nr(16)
30097 .kr(8)
30098 .sr(1)
30099 .m(4)
30100 .n(16)
30101 .k(k)
30102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30103 }
30104 }
30105
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_subtile)30106 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
30107 TEST_REQUIRES_X86_AVX512SKX;
30108 for (size_t k = 1; k < 8; k++) {
30109 for (uint32_t n = 1; n <= 16; n++) {
30110 for (uint32_t m = 1; m <= 4; m++) {
30111 GemmMicrokernelTester()
30112 .mr(4)
30113 .nr(16)
30114 .kr(8)
30115 .sr(1)
30116 .m(m)
30117 .n(n)
30118 .k(k)
30119 .iterations(1)
30120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30121 }
30122 }
30123 }
30124 }
30125
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8)30126 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
30127 TEST_REQUIRES_X86_AVX512SKX;
30128 for (size_t k = 9; k < 16; k++) {
30129 GemmMicrokernelTester()
30130 .mr(4)
30131 .nr(16)
30132 .kr(8)
30133 .sr(1)
30134 .m(4)
30135 .n(16)
30136 .k(k)
30137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30138 }
30139 }
30140
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_subtile)30141 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
30142 TEST_REQUIRES_X86_AVX512SKX;
30143 for (size_t k = 9; k < 16; k++) {
30144 for (uint32_t n = 1; n <= 16; n++) {
30145 for (uint32_t m = 1; m <= 4; m++) {
30146 GemmMicrokernelTester()
30147 .mr(4)
30148 .nr(16)
30149 .kr(8)
30150 .sr(1)
30151 .m(m)
30152 .n(n)
30153 .k(k)
30154 .iterations(1)
30155 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30156 }
30157 }
30158 }
30159 }
30160
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8)30161 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
30162 TEST_REQUIRES_X86_AVX512SKX;
30163 for (size_t k = 16; k <= 80; k += 8) {
30164 GemmMicrokernelTester()
30165 .mr(4)
30166 .nr(16)
30167 .kr(8)
30168 .sr(1)
30169 .m(4)
30170 .n(16)
30171 .k(k)
30172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30173 }
30174 }
30175
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_subtile)30176 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
30177 TEST_REQUIRES_X86_AVX512SKX;
30178 for (size_t k = 16; k <= 80; k += 8) {
30179 for (uint32_t n = 1; n <= 16; n++) {
30180 for (uint32_t m = 1; m <= 4; m++) {
30181 GemmMicrokernelTester()
30182 .mr(4)
30183 .nr(16)
30184 .kr(8)
30185 .sr(1)
30186 .m(m)
30187 .n(n)
30188 .k(k)
30189 .iterations(1)
30190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30191 }
30192 }
30193 }
30194 }
30195
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16)30196 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
30197 TEST_REQUIRES_X86_AVX512SKX;
30198 for (uint32_t n = 17; n < 32; n++) {
30199 for (size_t k = 1; k <= 40; k += 9) {
30200 GemmMicrokernelTester()
30201 .mr(4)
30202 .nr(16)
30203 .kr(8)
30204 .sr(1)
30205 .m(4)
30206 .n(n)
30207 .k(k)
30208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30209 }
30210 }
30211 }
30212
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_cn)30213 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
30214 TEST_REQUIRES_X86_AVX512SKX;
30215 for (uint32_t n = 17; n < 32; n++) {
30216 for (size_t k = 1; k <= 40; k += 9) {
30217 GemmMicrokernelTester()
30218 .mr(4)
30219 .nr(16)
30220 .kr(8)
30221 .sr(1)
30222 .m(4)
30223 .n(n)
30224 .k(k)
30225 .cn_stride(19)
30226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30227 }
30228 }
30229 }
30230
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_subtile)30231 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
30232 TEST_REQUIRES_X86_AVX512SKX;
30233 for (uint32_t n = 17; n < 32; n++) {
30234 for (size_t k = 1; k <= 40; k += 9) {
30235 for (uint32_t m = 1; m <= 4; m++) {
30236 GemmMicrokernelTester()
30237 .mr(4)
30238 .nr(16)
30239 .kr(8)
30240 .sr(1)
30241 .m(m)
30242 .n(n)
30243 .k(k)
30244 .iterations(1)
30245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30246 }
30247 }
30248 }
30249 }
30250
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16)30251 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
30252 TEST_REQUIRES_X86_AVX512SKX;
30253 for (uint32_t n = 32; n <= 48; n += 16) {
30254 for (size_t k = 1; k <= 40; k += 9) {
30255 GemmMicrokernelTester()
30256 .mr(4)
30257 .nr(16)
30258 .kr(8)
30259 .sr(1)
30260 .m(4)
30261 .n(n)
30262 .k(k)
30263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30264 }
30265 }
30266 }
30267
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_cn)30268 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
30269 TEST_REQUIRES_X86_AVX512SKX;
30270 for (uint32_t n = 32; n <= 48; n += 16) {
30271 for (size_t k = 1; k <= 40; k += 9) {
30272 GemmMicrokernelTester()
30273 .mr(4)
30274 .nr(16)
30275 .kr(8)
30276 .sr(1)
30277 .m(4)
30278 .n(n)
30279 .k(k)
30280 .cn_stride(19)
30281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30282 }
30283 }
30284 }
30285
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_subtile)30286 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
30287 TEST_REQUIRES_X86_AVX512SKX;
30288 for (uint32_t n = 32; n <= 48; n += 16) {
30289 for (size_t k = 1; k <= 40; k += 9) {
30290 for (uint32_t m = 1; m <= 4; m++) {
30291 GemmMicrokernelTester()
30292 .mr(4)
30293 .nr(16)
30294 .kr(8)
30295 .sr(1)
30296 .m(m)
30297 .n(n)
30298 .k(k)
30299 .iterations(1)
30300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30301 }
30302 }
30303 }
30304 }
30305
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel)30306 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
30307 TEST_REQUIRES_X86_AVX512SKX;
30308 for (size_t k = 1; k <= 40; k += 9) {
30309 GemmMicrokernelTester()
30310 .mr(4)
30311 .nr(16)
30312 .kr(8)
30313 .sr(1)
30314 .m(4)
30315 .n(16)
30316 .k(k)
30317 .ks(3)
30318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30319 }
30320 }
30321
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel_subtile)30322 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
30323 TEST_REQUIRES_X86_AVX512SKX;
30324 for (size_t k = 1; k <= 40; k += 9) {
30325 for (uint32_t n = 1; n <= 16; n++) {
30326 for (uint32_t m = 1; m <= 4; m++) {
30327 GemmMicrokernelTester()
30328 .mr(4)
30329 .nr(16)
30330 .kr(8)
30331 .sr(1)
30332 .m(m)
30333 .n(n)
30334 .k(k)
30335 .ks(3)
30336 .iterations(1)
30337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30338 }
30339 }
30340 }
30341 }
30342
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_small_kernel)30343 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
30344 TEST_REQUIRES_X86_AVX512SKX;
30345 for (uint32_t n = 17; n < 32; n++) {
30346 for (size_t k = 1; k <= 40; k += 9) {
30347 GemmMicrokernelTester()
30348 .mr(4)
30349 .nr(16)
30350 .kr(8)
30351 .sr(1)
30352 .m(4)
30353 .n(n)
30354 .k(k)
30355 .ks(3)
30356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30357 }
30358 }
30359 }
30360
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_small_kernel)30361 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
30362 TEST_REQUIRES_X86_AVX512SKX;
30363 for (uint32_t n = 32; n <= 48; n += 16) {
30364 for (size_t k = 1; k <= 40; k += 9) {
30365 GemmMicrokernelTester()
30366 .mr(4)
30367 .nr(16)
30368 .kr(8)
30369 .sr(1)
30370 .m(4)
30371 .n(n)
30372 .k(k)
30373 .ks(3)
30374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30375 }
30376 }
30377 }
30378
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm_subtile)30379 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
30380 TEST_REQUIRES_X86_AVX512SKX;
30381 for (size_t k = 1; k <= 40; k += 9) {
30382 for (uint32_t n = 1; n <= 16; n++) {
30383 for (uint32_t m = 1; m <= 4; m++) {
30384 GemmMicrokernelTester()
30385 .mr(4)
30386 .nr(16)
30387 .kr(8)
30388 .sr(1)
30389 .m(m)
30390 .n(n)
30391 .k(k)
30392 .cm_stride(19)
30393 .iterations(1)
30394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30395 }
30396 }
30397 }
30398 }
30399
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,a_offset)30400 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
30401 TEST_REQUIRES_X86_AVX512SKX;
30402 for (size_t k = 1; k <= 40; k += 9) {
30403 GemmMicrokernelTester()
30404 .mr(4)
30405 .nr(16)
30406 .kr(8)
30407 .sr(1)
30408 .m(4)
30409 .n(16)
30410 .k(k)
30411 .ks(3)
30412 .a_offset(163)
30413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30414 }
30415 }
30416
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,zero)30417 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
30418 TEST_REQUIRES_X86_AVX512SKX;
30419 for (size_t k = 1; k <= 40; k += 9) {
30420 for (uint32_t mz = 0; mz < 4; mz++) {
30421 GemmMicrokernelTester()
30422 .mr(4)
30423 .nr(16)
30424 .kr(8)
30425 .sr(1)
30426 .m(4)
30427 .n(16)
30428 .k(k)
30429 .ks(3)
30430 .a_offset(163)
30431 .zero_index(mz)
30432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30433 }
30434 }
30435 }
30436
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmin)30437 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
30438 TEST_REQUIRES_X86_AVX512SKX;
30439 GemmMicrokernelTester()
30440 .mr(4)
30441 .nr(16)
30442 .kr(8)
30443 .sr(1)
30444 .m(4)
30445 .n(16)
30446 .k(8)
30447 .qmin(128)
30448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30449 }
30450
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmax)30451 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
30452 TEST_REQUIRES_X86_AVX512SKX;
30453 GemmMicrokernelTester()
30454 .mr(4)
30455 .nr(16)
30456 .kr(8)
30457 .sr(1)
30458 .m(4)
30459 .n(16)
30460 .k(8)
30461 .qmax(128)
30462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30463 }
30464
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm)30465 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
30466 TEST_REQUIRES_X86_AVX512SKX;
30467 GemmMicrokernelTester()
30468 .mr(4)
30469 .nr(16)
30470 .kr(8)
30471 .sr(1)
30472 .m(4)
30473 .n(16)
30474 .k(8)
30475 .cm_stride(19)
30476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30477 }
30478
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_a_zero_point)30479 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_a_zero_point) {
30480 TEST_REQUIRES_X86_AVX512SKX;
30481 for (size_t k = 1; k <= 40; k += 9) {
30482 GemmMicrokernelTester()
30483 .mr(4)
30484 .nr(16)
30485 .kr(8)
30486 .sr(1)
30487 .m(4)
30488 .n(16)
30489 .k(k)
30490 .a_zero_point(0)
30491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30492 }
30493 }
30494
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_b_zero_point)30495 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_b_zero_point) {
30496 TEST_REQUIRES_X86_AVX512SKX;
30497 for (size_t k = 1; k <= 40; k += 9) {
30498 GemmMicrokernelTester()
30499 .mr(4)
30500 .nr(16)
30501 .kr(8)
30502 .sr(1)
30503 .m(4)
30504 .n(16)
30505 .k(k)
30506 .b_zero_point(0)
30507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30508 }
30509 }
30510
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_zero_point)30511 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_zero_point) {
30512 TEST_REQUIRES_X86_AVX512SKX;
30513 for (size_t k = 1; k <= 40; k += 9) {
30514 GemmMicrokernelTester()
30515 .mr(4)
30516 .nr(16)
30517 .kr(8)
30518 .sr(1)
30519 .m(4)
30520 .n(16)
30521 .k(k)
30522 .a_zero_point(0)
30523 .b_zero_point(0)
30524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30525 }
30526 }
30527 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30528
30529
30530 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)30531 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30532 GemmMicrokernelTester()
30533 .mr(1)
30534 .nr(4)
30535 .kr(2)
30536 .sr(1)
30537 .m(1)
30538 .n(4)
30539 .k(8)
30540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30541 }
30542
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)30543 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
30544 GemmMicrokernelTester()
30545 .mr(1)
30546 .nr(4)
30547 .kr(2)
30548 .sr(1)
30549 .m(1)
30550 .n(4)
30551 .k(8)
30552 .cn_stride(7)
30553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30554 }
30555
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30556 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30557 for (uint32_t n = 1; n <= 4; n++) {
30558 for (uint32_t m = 1; m <= 1; m++) {
30559 GemmMicrokernelTester()
30560 .mr(1)
30561 .nr(4)
30562 .kr(2)
30563 .sr(1)
30564 .m(m)
30565 .n(n)
30566 .k(8)
30567 .iterations(1)
30568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30569 }
30570 }
30571 }
30572
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30573 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30574 for (uint32_t m = 1; m <= 1; m++) {
30575 GemmMicrokernelTester()
30576 .mr(1)
30577 .nr(4)
30578 .kr(2)
30579 .sr(1)
30580 .m(m)
30581 .n(4)
30582 .k(8)
30583 .iterations(1)
30584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30585 }
30586 }
30587
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30588 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30589 for (uint32_t n = 1; n <= 4; n++) {
30590 GemmMicrokernelTester()
30591 .mr(1)
30592 .nr(4)
30593 .kr(2)
30594 .sr(1)
30595 .m(1)
30596 .n(n)
30597 .k(8)
30598 .iterations(1)
30599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30600 }
30601 }
30602
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)30603 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30604 for (size_t k = 1; k < 8; k++) {
30605 GemmMicrokernelTester()
30606 .mr(1)
30607 .nr(4)
30608 .kr(2)
30609 .sr(1)
30610 .m(1)
30611 .n(4)
30612 .k(k)
30613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30614 }
30615 }
30616
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30617 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30618 for (size_t k = 1; k < 8; k++) {
30619 for (uint32_t n = 1; n <= 4; n++) {
30620 for (uint32_t m = 1; m <= 1; m++) {
30621 GemmMicrokernelTester()
30622 .mr(1)
30623 .nr(4)
30624 .kr(2)
30625 .sr(1)
30626 .m(m)
30627 .n(n)
30628 .k(k)
30629 .iterations(1)
30630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30631 }
30632 }
30633 }
30634 }
30635
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)30636 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30637 for (size_t k = 9; k < 16; k++) {
30638 GemmMicrokernelTester()
30639 .mr(1)
30640 .nr(4)
30641 .kr(2)
30642 .sr(1)
30643 .m(1)
30644 .n(4)
30645 .k(k)
30646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30647 }
30648 }
30649
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30650 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30651 for (size_t k = 9; k < 16; k++) {
30652 for (uint32_t n = 1; n <= 4; n++) {
30653 for (uint32_t m = 1; m <= 1; m++) {
30654 GemmMicrokernelTester()
30655 .mr(1)
30656 .nr(4)
30657 .kr(2)
30658 .sr(1)
30659 .m(m)
30660 .n(n)
30661 .k(k)
30662 .iterations(1)
30663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30664 }
30665 }
30666 }
30667 }
30668
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)30669 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
30670 for (size_t k = 16; k <= 80; k += 8) {
30671 GemmMicrokernelTester()
30672 .mr(1)
30673 .nr(4)
30674 .kr(2)
30675 .sr(1)
30676 .m(1)
30677 .n(4)
30678 .k(k)
30679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30680 }
30681 }
30682
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30683 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30684 for (size_t k = 16; k <= 80; k += 8) {
30685 for (uint32_t n = 1; n <= 4; n++) {
30686 for (uint32_t m = 1; m <= 1; m++) {
30687 GemmMicrokernelTester()
30688 .mr(1)
30689 .nr(4)
30690 .kr(2)
30691 .sr(1)
30692 .m(m)
30693 .n(n)
30694 .k(k)
30695 .iterations(1)
30696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30697 }
30698 }
30699 }
30700 }
30701
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)30702 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30703 for (uint32_t n = 5; n < 8; n++) {
30704 for (size_t k = 1; k <= 40; k += 9) {
30705 GemmMicrokernelTester()
30706 .mr(1)
30707 .nr(4)
30708 .kr(2)
30709 .sr(1)
30710 .m(1)
30711 .n(n)
30712 .k(k)
30713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30714 }
30715 }
30716 }
30717
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30718 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30719 for (uint32_t n = 5; n < 8; n++) {
30720 for (size_t k = 1; k <= 40; k += 9) {
30721 GemmMicrokernelTester()
30722 .mr(1)
30723 .nr(4)
30724 .kr(2)
30725 .sr(1)
30726 .m(1)
30727 .n(n)
30728 .k(k)
30729 .cn_stride(7)
30730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30731 }
30732 }
30733 }
30734
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30735 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30736 for (uint32_t n = 5; n < 8; n++) {
30737 for (size_t k = 1; k <= 40; k += 9) {
30738 for (uint32_t m = 1; m <= 1; m++) {
30739 GemmMicrokernelTester()
30740 .mr(1)
30741 .nr(4)
30742 .kr(2)
30743 .sr(1)
30744 .m(m)
30745 .n(n)
30746 .k(k)
30747 .iterations(1)
30748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30749 }
30750 }
30751 }
30752 }
30753
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)30754 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
30755 for (uint32_t n = 8; n <= 12; n += 4) {
30756 for (size_t k = 1; k <= 40; k += 9) {
30757 GemmMicrokernelTester()
30758 .mr(1)
30759 .nr(4)
30760 .kr(2)
30761 .sr(1)
30762 .m(1)
30763 .n(n)
30764 .k(k)
30765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30766 }
30767 }
30768 }
30769
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)30770 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
30771 for (uint32_t n = 8; n <= 12; n += 4) {
30772 for (size_t k = 1; k <= 40; k += 9) {
30773 GemmMicrokernelTester()
30774 .mr(1)
30775 .nr(4)
30776 .kr(2)
30777 .sr(1)
30778 .m(1)
30779 .n(n)
30780 .k(k)
30781 .cn_stride(7)
30782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30783 }
30784 }
30785 }
30786
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)30787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
30788 for (uint32_t n = 8; n <= 12; n += 4) {
30789 for (size_t k = 1; k <= 40; k += 9) {
30790 for (uint32_t m = 1; m <= 1; m++) {
30791 GemmMicrokernelTester()
30792 .mr(1)
30793 .nr(4)
30794 .kr(2)
30795 .sr(1)
30796 .m(m)
30797 .n(n)
30798 .k(k)
30799 .iterations(1)
30800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30801 }
30802 }
30803 }
30804 }
30805
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)30806 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
30807 for (size_t k = 1; k <= 40; k += 9) {
30808 GemmMicrokernelTester()
30809 .mr(1)
30810 .nr(4)
30811 .kr(2)
30812 .sr(1)
30813 .m(1)
30814 .n(4)
30815 .k(k)
30816 .ks(3)
30817 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30818 }
30819 }
30820
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)30821 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
30822 for (size_t k = 1; k <= 40; k += 9) {
30823 for (uint32_t n = 1; n <= 4; n++) {
30824 for (uint32_t m = 1; m <= 1; m++) {
30825 GemmMicrokernelTester()
30826 .mr(1)
30827 .nr(4)
30828 .kr(2)
30829 .sr(1)
30830 .m(m)
30831 .n(n)
30832 .k(k)
30833 .ks(3)
30834 .iterations(1)
30835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30836 }
30837 }
30838 }
30839 }
30840
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)30841 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
30842 for (uint32_t n = 5; n < 8; n++) {
30843 for (size_t k = 1; k <= 40; k += 9) {
30844 GemmMicrokernelTester()
30845 .mr(1)
30846 .nr(4)
30847 .kr(2)
30848 .sr(1)
30849 .m(1)
30850 .n(n)
30851 .k(k)
30852 .ks(3)
30853 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30854 }
30855 }
30856 }
30857
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)30858 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
30859 for (uint32_t n = 8; n <= 12; n += 4) {
30860 for (size_t k = 1; k <= 40; k += 9) {
30861 GemmMicrokernelTester()
30862 .mr(1)
30863 .nr(4)
30864 .kr(2)
30865 .sr(1)
30866 .m(1)
30867 .n(n)
30868 .k(k)
30869 .ks(3)
30870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30871 }
30872 }
30873 }
30874
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)30875 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
30876 for (size_t k = 1; k <= 40; k += 9) {
30877 for (uint32_t n = 1; n <= 4; n++) {
30878 for (uint32_t m = 1; m <= 1; m++) {
30879 GemmMicrokernelTester()
30880 .mr(1)
30881 .nr(4)
30882 .kr(2)
30883 .sr(1)
30884 .m(m)
30885 .n(n)
30886 .k(k)
30887 .cm_stride(7)
30888 .iterations(1)
30889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30890 }
30891 }
30892 }
30893 }
30894
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,a_offset)30895 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
30896 for (size_t k = 1; k <= 40; k += 9) {
30897 GemmMicrokernelTester()
30898 .mr(1)
30899 .nr(4)
30900 .kr(2)
30901 .sr(1)
30902 .m(1)
30903 .n(4)
30904 .k(k)
30905 .ks(3)
30906 .a_offset(43)
30907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30908 }
30909 }
30910
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,zero)30911 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
30912 for (size_t k = 1; k <= 40; k += 9) {
30913 for (uint32_t mz = 0; mz < 1; mz++) {
30914 GemmMicrokernelTester()
30915 .mr(1)
30916 .nr(4)
30917 .kr(2)
30918 .sr(1)
30919 .m(1)
30920 .n(4)
30921 .k(k)
30922 .ks(3)
30923 .a_offset(43)
30924 .zero_index(mz)
30925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30926 }
30927 }
30928 }
30929
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)30930 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
30931 GemmMicrokernelTester()
30932 .mr(1)
30933 .nr(4)
30934 .kr(2)
30935 .sr(1)
30936 .m(1)
30937 .n(4)
30938 .k(8)
30939 .qmin(128)
30940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30941 }
30942
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)30943 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
30944 GemmMicrokernelTester()
30945 .mr(1)
30946 .nr(4)
30947 .kr(2)
30948 .sr(1)
30949 .m(1)
30950 .n(4)
30951 .k(8)
30952 .qmax(128)
30953 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30954 }
30955
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)30956 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
30957 GemmMicrokernelTester()
30958 .mr(1)
30959 .nr(4)
30960 .kr(2)
30961 .sr(1)
30962 .m(1)
30963 .n(4)
30964 .k(8)
30965 .cm_stride(7)
30966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30967 }
30968
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)30969 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
30970 for (size_t k = 1; k <= 40; k += 9) {
30971 GemmMicrokernelTester()
30972 .mr(1)
30973 .nr(4)
30974 .kr(2)
30975 .sr(1)
30976 .m(1)
30977 .n(4)
30978 .k(k)
30979 .a_zero_point(0)
30980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30981 }
30982 }
30983
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)30984 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
30985 for (size_t k = 1; k <= 40; k += 9) {
30986 GemmMicrokernelTester()
30987 .mr(1)
30988 .nr(4)
30989 .kr(2)
30990 .sr(1)
30991 .m(1)
30992 .n(4)
30993 .k(k)
30994 .b_zero_point(0)
30995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30996 }
30997 }
30998
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)30999 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31000 for (size_t k = 1; k <= 40; k += 9) {
31001 GemmMicrokernelTester()
31002 .mr(1)
31003 .nr(4)
31004 .kr(2)
31005 .sr(1)
31006 .m(1)
31007 .n(4)
31008 .k(k)
31009 .a_zero_point(0)
31010 .b_zero_point(0)
31011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31012 }
31013 }
31014 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31015
31016
31017 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)31018 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31019 GemmMicrokernelTester()
31020 .mr(1)
31021 .nr(4)
31022 .kr(2)
31023 .sr(4)
31024 .m(1)
31025 .n(4)
31026 .k(8)
31027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31028 }
31029
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)31030 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
31031 GemmMicrokernelTester()
31032 .mr(1)
31033 .nr(4)
31034 .kr(2)
31035 .sr(4)
31036 .m(1)
31037 .n(4)
31038 .k(8)
31039 .cn_stride(7)
31040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31041 }
31042
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31043 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31044 for (uint32_t n = 1; n <= 4; n++) {
31045 for (uint32_t m = 1; m <= 1; m++) {
31046 GemmMicrokernelTester()
31047 .mr(1)
31048 .nr(4)
31049 .kr(2)
31050 .sr(4)
31051 .m(m)
31052 .n(n)
31053 .k(8)
31054 .iterations(1)
31055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31056 }
31057 }
31058 }
31059
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31060 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31061 for (uint32_t m = 1; m <= 1; m++) {
31062 GemmMicrokernelTester()
31063 .mr(1)
31064 .nr(4)
31065 .kr(2)
31066 .sr(4)
31067 .m(m)
31068 .n(4)
31069 .k(8)
31070 .iterations(1)
31071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31072 }
31073 }
31074
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31075 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31076 for (uint32_t n = 1; n <= 4; n++) {
31077 GemmMicrokernelTester()
31078 .mr(1)
31079 .nr(4)
31080 .kr(2)
31081 .sr(4)
31082 .m(1)
31083 .n(n)
31084 .k(8)
31085 .iterations(1)
31086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31087 }
31088 }
31089
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)31090 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31091 for (size_t k = 1; k < 8; k++) {
31092 GemmMicrokernelTester()
31093 .mr(1)
31094 .nr(4)
31095 .kr(2)
31096 .sr(4)
31097 .m(1)
31098 .n(4)
31099 .k(k)
31100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31101 }
31102 }
31103
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31104 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31105 for (size_t k = 1; k < 8; k++) {
31106 for (uint32_t n = 1; n <= 4; n++) {
31107 for (uint32_t m = 1; m <= 1; m++) {
31108 GemmMicrokernelTester()
31109 .mr(1)
31110 .nr(4)
31111 .kr(2)
31112 .sr(4)
31113 .m(m)
31114 .n(n)
31115 .k(k)
31116 .iterations(1)
31117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31118 }
31119 }
31120 }
31121 }
31122
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)31123 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31124 for (size_t k = 9; k < 16; k++) {
31125 GemmMicrokernelTester()
31126 .mr(1)
31127 .nr(4)
31128 .kr(2)
31129 .sr(4)
31130 .m(1)
31131 .n(4)
31132 .k(k)
31133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31134 }
31135 }
31136
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31137 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31138 for (size_t k = 9; k < 16; k++) {
31139 for (uint32_t n = 1; n <= 4; n++) {
31140 for (uint32_t m = 1; m <= 1; m++) {
31141 GemmMicrokernelTester()
31142 .mr(1)
31143 .nr(4)
31144 .kr(2)
31145 .sr(4)
31146 .m(m)
31147 .n(n)
31148 .k(k)
31149 .iterations(1)
31150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31151 }
31152 }
31153 }
31154 }
31155
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)31156 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
31157 for (size_t k = 16; k <= 80; k += 8) {
31158 GemmMicrokernelTester()
31159 .mr(1)
31160 .nr(4)
31161 .kr(2)
31162 .sr(4)
31163 .m(1)
31164 .n(4)
31165 .k(k)
31166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31167 }
31168 }
31169
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31170 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31171 for (size_t k = 16; k <= 80; k += 8) {
31172 for (uint32_t n = 1; n <= 4; n++) {
31173 for (uint32_t m = 1; m <= 1; m++) {
31174 GemmMicrokernelTester()
31175 .mr(1)
31176 .nr(4)
31177 .kr(2)
31178 .sr(4)
31179 .m(m)
31180 .n(n)
31181 .k(k)
31182 .iterations(1)
31183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31184 }
31185 }
31186 }
31187 }
31188
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)31189 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31190 for (uint32_t n = 5; n < 8; n++) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 GemmMicrokernelTester()
31193 .mr(1)
31194 .nr(4)
31195 .kr(2)
31196 .sr(4)
31197 .m(1)
31198 .n(n)
31199 .k(k)
31200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31201 }
31202 }
31203 }
31204
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31205 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31206 for (uint32_t n = 5; n < 8; n++) {
31207 for (size_t k = 1; k <= 40; k += 9) {
31208 GemmMicrokernelTester()
31209 .mr(1)
31210 .nr(4)
31211 .kr(2)
31212 .sr(4)
31213 .m(1)
31214 .n(n)
31215 .k(k)
31216 .cn_stride(7)
31217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31218 }
31219 }
31220 }
31221
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31222 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31223 for (uint32_t n = 5; n < 8; n++) {
31224 for (size_t k = 1; k <= 40; k += 9) {
31225 for (uint32_t m = 1; m <= 1; m++) {
31226 GemmMicrokernelTester()
31227 .mr(1)
31228 .nr(4)
31229 .kr(2)
31230 .sr(4)
31231 .m(m)
31232 .n(n)
31233 .k(k)
31234 .iterations(1)
31235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31236 }
31237 }
31238 }
31239 }
31240
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)31241 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
31242 for (uint32_t n = 8; n <= 12; n += 4) {
31243 for (size_t k = 1; k <= 40; k += 9) {
31244 GemmMicrokernelTester()
31245 .mr(1)
31246 .nr(4)
31247 .kr(2)
31248 .sr(4)
31249 .m(1)
31250 .n(n)
31251 .k(k)
31252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31253 }
31254 }
31255 }
31256
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31257 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31258 for (uint32_t n = 8; n <= 12; n += 4) {
31259 for (size_t k = 1; k <= 40; k += 9) {
31260 GemmMicrokernelTester()
31261 .mr(1)
31262 .nr(4)
31263 .kr(2)
31264 .sr(4)
31265 .m(1)
31266 .n(n)
31267 .k(k)
31268 .cn_stride(7)
31269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31270 }
31271 }
31272 }
31273
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31274 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31275 for (uint32_t n = 8; n <= 12; n += 4) {
31276 for (size_t k = 1; k <= 40; k += 9) {
31277 for (uint32_t m = 1; m <= 1; m++) {
31278 GemmMicrokernelTester()
31279 .mr(1)
31280 .nr(4)
31281 .kr(2)
31282 .sr(4)
31283 .m(m)
31284 .n(n)
31285 .k(k)
31286 .iterations(1)
31287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31288 }
31289 }
31290 }
31291 }
31292
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)31293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
31294 for (size_t k = 1; k <= 40; k += 9) {
31295 GemmMicrokernelTester()
31296 .mr(1)
31297 .nr(4)
31298 .kr(2)
31299 .sr(4)
31300 .m(1)
31301 .n(4)
31302 .k(k)
31303 .ks(3)
31304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31305 }
31306 }
31307
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31308 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31309 for (size_t k = 1; k <= 40; k += 9) {
31310 for (uint32_t n = 1; n <= 4; n++) {
31311 for (uint32_t m = 1; m <= 1; m++) {
31312 GemmMicrokernelTester()
31313 .mr(1)
31314 .nr(4)
31315 .kr(2)
31316 .sr(4)
31317 .m(m)
31318 .n(n)
31319 .k(k)
31320 .ks(3)
31321 .iterations(1)
31322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31323 }
31324 }
31325 }
31326 }
31327
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31328 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31329 for (uint32_t n = 5; n < 8; n++) {
31330 for (size_t k = 1; k <= 40; k += 9) {
31331 GemmMicrokernelTester()
31332 .mr(1)
31333 .nr(4)
31334 .kr(2)
31335 .sr(4)
31336 .m(1)
31337 .n(n)
31338 .k(k)
31339 .ks(3)
31340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31341 }
31342 }
31343 }
31344
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31345 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31346 for (uint32_t n = 8; n <= 12; n += 4) {
31347 for (size_t k = 1; k <= 40; k += 9) {
31348 GemmMicrokernelTester()
31349 .mr(1)
31350 .nr(4)
31351 .kr(2)
31352 .sr(4)
31353 .m(1)
31354 .n(n)
31355 .k(k)
31356 .ks(3)
31357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31358 }
31359 }
31360 }
31361
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31362 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31363 for (size_t k = 1; k <= 40; k += 9) {
31364 for (uint32_t n = 1; n <= 4; n++) {
31365 for (uint32_t m = 1; m <= 1; m++) {
31366 GemmMicrokernelTester()
31367 .mr(1)
31368 .nr(4)
31369 .kr(2)
31370 .sr(4)
31371 .m(m)
31372 .n(n)
31373 .k(k)
31374 .cm_stride(7)
31375 .iterations(1)
31376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31377 }
31378 }
31379 }
31380 }
31381
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)31382 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
31383 for (size_t k = 1; k <= 40; k += 9) {
31384 GemmMicrokernelTester()
31385 .mr(1)
31386 .nr(4)
31387 .kr(2)
31388 .sr(4)
31389 .m(1)
31390 .n(4)
31391 .k(k)
31392 .ks(3)
31393 .a_offset(43)
31394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31395 }
31396 }
31397
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,zero)31398 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
31399 for (size_t k = 1; k <= 40; k += 9) {
31400 for (uint32_t mz = 0; mz < 1; mz++) {
31401 GemmMicrokernelTester()
31402 .mr(1)
31403 .nr(4)
31404 .kr(2)
31405 .sr(4)
31406 .m(1)
31407 .n(4)
31408 .k(k)
31409 .ks(3)
31410 .a_offset(43)
31411 .zero_index(mz)
31412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31413 }
31414 }
31415 }
31416
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)31417 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
31418 GemmMicrokernelTester()
31419 .mr(1)
31420 .nr(4)
31421 .kr(2)
31422 .sr(4)
31423 .m(1)
31424 .n(4)
31425 .k(8)
31426 .qmin(128)
31427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31428 }
31429
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)31430 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
31431 GemmMicrokernelTester()
31432 .mr(1)
31433 .nr(4)
31434 .kr(2)
31435 .sr(4)
31436 .m(1)
31437 .n(4)
31438 .k(8)
31439 .qmax(128)
31440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31441 }
31442
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)31443 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
31444 GemmMicrokernelTester()
31445 .mr(1)
31446 .nr(4)
31447 .kr(2)
31448 .sr(4)
31449 .m(1)
31450 .n(4)
31451 .k(8)
31452 .cm_stride(7)
31453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31454 }
31455
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31456 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31457 for (size_t k = 1; k <= 40; k += 9) {
31458 GemmMicrokernelTester()
31459 .mr(1)
31460 .nr(4)
31461 .kr(2)
31462 .sr(4)
31463 .m(1)
31464 .n(4)
31465 .k(k)
31466 .a_zero_point(0)
31467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31468 }
31469 }
31470
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31471 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31472 for (size_t k = 1; k <= 40; k += 9) {
31473 GemmMicrokernelTester()
31474 .mr(1)
31475 .nr(4)
31476 .kr(2)
31477 .sr(4)
31478 .m(1)
31479 .n(4)
31480 .k(k)
31481 .b_zero_point(0)
31482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31483 }
31484 }
31485
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)31486 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31487 for (size_t k = 1; k <= 40; k += 9) {
31488 GemmMicrokernelTester()
31489 .mr(1)
31490 .nr(4)
31491 .kr(2)
31492 .sr(4)
31493 .m(1)
31494 .n(4)
31495 .k(k)
31496 .a_zero_point(0)
31497 .b_zero_point(0)
31498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31499 }
31500 }
31501 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31502
31503
31504 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)31505 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31506 GemmMicrokernelTester()
31507 .mr(2)
31508 .nr(4)
31509 .kr(2)
31510 .sr(4)
31511 .m(2)
31512 .n(4)
31513 .k(8)
31514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31515 }
31516
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)31517 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
31518 GemmMicrokernelTester()
31519 .mr(2)
31520 .nr(4)
31521 .kr(2)
31522 .sr(4)
31523 .m(2)
31524 .n(4)
31525 .k(8)
31526 .cn_stride(7)
31527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31528 }
31529
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31531 for (uint32_t n = 1; n <= 4; n++) {
31532 for (uint32_t m = 1; m <= 2; m++) {
31533 GemmMicrokernelTester()
31534 .mr(2)
31535 .nr(4)
31536 .kr(2)
31537 .sr(4)
31538 .m(m)
31539 .n(n)
31540 .k(8)
31541 .iterations(1)
31542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31543 }
31544 }
31545 }
31546
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31547 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31548 for (uint32_t m = 1; m <= 2; m++) {
31549 GemmMicrokernelTester()
31550 .mr(2)
31551 .nr(4)
31552 .kr(2)
31553 .sr(4)
31554 .m(m)
31555 .n(4)
31556 .k(8)
31557 .iterations(1)
31558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31559 }
31560 }
31561
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31562 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31563 for (uint32_t n = 1; n <= 4; n++) {
31564 GemmMicrokernelTester()
31565 .mr(2)
31566 .nr(4)
31567 .kr(2)
31568 .sr(4)
31569 .m(2)
31570 .n(n)
31571 .k(8)
31572 .iterations(1)
31573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31574 }
31575 }
31576
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)31577 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31578 for (size_t k = 1; k < 8; k++) {
31579 GemmMicrokernelTester()
31580 .mr(2)
31581 .nr(4)
31582 .kr(2)
31583 .sr(4)
31584 .m(2)
31585 .n(4)
31586 .k(k)
31587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31588 }
31589 }
31590
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31591 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31592 for (size_t k = 1; k < 8; k++) {
31593 for (uint32_t n = 1; n <= 4; n++) {
31594 for (uint32_t m = 1; m <= 2; m++) {
31595 GemmMicrokernelTester()
31596 .mr(2)
31597 .nr(4)
31598 .kr(2)
31599 .sr(4)
31600 .m(m)
31601 .n(n)
31602 .k(k)
31603 .iterations(1)
31604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31605 }
31606 }
31607 }
31608 }
31609
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)31610 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31611 for (size_t k = 9; k < 16; k++) {
31612 GemmMicrokernelTester()
31613 .mr(2)
31614 .nr(4)
31615 .kr(2)
31616 .sr(4)
31617 .m(2)
31618 .n(4)
31619 .k(k)
31620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31621 }
31622 }
31623
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31624 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31625 for (size_t k = 9; k < 16; k++) {
31626 for (uint32_t n = 1; n <= 4; n++) {
31627 for (uint32_t m = 1; m <= 2; m++) {
31628 GemmMicrokernelTester()
31629 .mr(2)
31630 .nr(4)
31631 .kr(2)
31632 .sr(4)
31633 .m(m)
31634 .n(n)
31635 .k(k)
31636 .iterations(1)
31637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31638 }
31639 }
31640 }
31641 }
31642
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)31643 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
31644 for (size_t k = 16; k <= 80; k += 8) {
31645 GemmMicrokernelTester()
31646 .mr(2)
31647 .nr(4)
31648 .kr(2)
31649 .sr(4)
31650 .m(2)
31651 .n(4)
31652 .k(k)
31653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31654 }
31655 }
31656
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31657 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31658 for (size_t k = 16; k <= 80; k += 8) {
31659 for (uint32_t n = 1; n <= 4; n++) {
31660 for (uint32_t m = 1; m <= 2; m++) {
31661 GemmMicrokernelTester()
31662 .mr(2)
31663 .nr(4)
31664 .kr(2)
31665 .sr(4)
31666 .m(m)
31667 .n(n)
31668 .k(k)
31669 .iterations(1)
31670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31671 }
31672 }
31673 }
31674 }
31675
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)31676 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31677 for (uint32_t n = 5; n < 8; n++) {
31678 for (size_t k = 1; k <= 40; k += 9) {
31679 GemmMicrokernelTester()
31680 .mr(2)
31681 .nr(4)
31682 .kr(2)
31683 .sr(4)
31684 .m(2)
31685 .n(n)
31686 .k(k)
31687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31688 }
31689 }
31690 }
31691
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31692 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31693 for (uint32_t n = 5; n < 8; n++) {
31694 for (size_t k = 1; k <= 40; k += 9) {
31695 GemmMicrokernelTester()
31696 .mr(2)
31697 .nr(4)
31698 .kr(2)
31699 .sr(4)
31700 .m(2)
31701 .n(n)
31702 .k(k)
31703 .cn_stride(7)
31704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31705 }
31706 }
31707 }
31708
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31709 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31710 for (uint32_t n = 5; n < 8; n++) {
31711 for (size_t k = 1; k <= 40; k += 9) {
31712 for (uint32_t m = 1; m <= 2; m++) {
31713 GemmMicrokernelTester()
31714 .mr(2)
31715 .nr(4)
31716 .kr(2)
31717 .sr(4)
31718 .m(m)
31719 .n(n)
31720 .k(k)
31721 .iterations(1)
31722 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31723 }
31724 }
31725 }
31726 }
31727
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)31728 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
31729 for (uint32_t n = 8; n <= 12; n += 4) {
31730 for (size_t k = 1; k <= 40; k += 9) {
31731 GemmMicrokernelTester()
31732 .mr(2)
31733 .nr(4)
31734 .kr(2)
31735 .sr(4)
31736 .m(2)
31737 .n(n)
31738 .k(k)
31739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31740 }
31741 }
31742 }
31743
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31744 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31745 for (uint32_t n = 8; n <= 12; n += 4) {
31746 for (size_t k = 1; k <= 40; k += 9) {
31747 GemmMicrokernelTester()
31748 .mr(2)
31749 .nr(4)
31750 .kr(2)
31751 .sr(4)
31752 .m(2)
31753 .n(n)
31754 .k(k)
31755 .cn_stride(7)
31756 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31757 }
31758 }
31759 }
31760
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31761 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31762 for (uint32_t n = 8; n <= 12; n += 4) {
31763 for (size_t k = 1; k <= 40; k += 9) {
31764 for (uint32_t m = 1; m <= 2; m++) {
31765 GemmMicrokernelTester()
31766 .mr(2)
31767 .nr(4)
31768 .kr(2)
31769 .sr(4)
31770 .m(m)
31771 .n(n)
31772 .k(k)
31773 .iterations(1)
31774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31775 }
31776 }
31777 }
31778 }
31779
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)31780 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
31781 for (size_t k = 1; k <= 40; k += 9) {
31782 GemmMicrokernelTester()
31783 .mr(2)
31784 .nr(4)
31785 .kr(2)
31786 .sr(4)
31787 .m(2)
31788 .n(4)
31789 .k(k)
31790 .ks(3)
31791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31792 }
31793 }
31794
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31795 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31796 for (size_t k = 1; k <= 40; k += 9) {
31797 for (uint32_t n = 1; n <= 4; n++) {
31798 for (uint32_t m = 1; m <= 2; m++) {
31799 GemmMicrokernelTester()
31800 .mr(2)
31801 .nr(4)
31802 .kr(2)
31803 .sr(4)
31804 .m(m)
31805 .n(n)
31806 .k(k)
31807 .ks(3)
31808 .iterations(1)
31809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31810 }
31811 }
31812 }
31813 }
31814
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31815 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31816 for (uint32_t n = 5; n < 8; n++) {
31817 for (size_t k = 1; k <= 40; k += 9) {
31818 GemmMicrokernelTester()
31819 .mr(2)
31820 .nr(4)
31821 .kr(2)
31822 .sr(4)
31823 .m(2)
31824 .n(n)
31825 .k(k)
31826 .ks(3)
31827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31828 }
31829 }
31830 }
31831
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31832 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31833 for (uint32_t n = 8; n <= 12; n += 4) {
31834 for (size_t k = 1; k <= 40; k += 9) {
31835 GemmMicrokernelTester()
31836 .mr(2)
31837 .nr(4)
31838 .kr(2)
31839 .sr(4)
31840 .m(2)
31841 .n(n)
31842 .k(k)
31843 .ks(3)
31844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31845 }
31846 }
31847 }
31848
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31849 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31850 for (size_t k = 1; k <= 40; k += 9) {
31851 for (uint32_t n = 1; n <= 4; n++) {
31852 for (uint32_t m = 1; m <= 2; m++) {
31853 GemmMicrokernelTester()
31854 .mr(2)
31855 .nr(4)
31856 .kr(2)
31857 .sr(4)
31858 .m(m)
31859 .n(n)
31860 .k(k)
31861 .cm_stride(7)
31862 .iterations(1)
31863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31864 }
31865 }
31866 }
31867 }
31868
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)31869 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
31870 for (size_t k = 1; k <= 40; k += 9) {
31871 GemmMicrokernelTester()
31872 .mr(2)
31873 .nr(4)
31874 .kr(2)
31875 .sr(4)
31876 .m(2)
31877 .n(4)
31878 .k(k)
31879 .ks(3)
31880 .a_offset(83)
31881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31882 }
31883 }
31884
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,zero)31885 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
31886 for (size_t k = 1; k <= 40; k += 9) {
31887 for (uint32_t mz = 0; mz < 2; mz++) {
31888 GemmMicrokernelTester()
31889 .mr(2)
31890 .nr(4)
31891 .kr(2)
31892 .sr(4)
31893 .m(2)
31894 .n(4)
31895 .k(k)
31896 .ks(3)
31897 .a_offset(83)
31898 .zero_index(mz)
31899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31900 }
31901 }
31902 }
31903
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)31904 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
31905 GemmMicrokernelTester()
31906 .mr(2)
31907 .nr(4)
31908 .kr(2)
31909 .sr(4)
31910 .m(2)
31911 .n(4)
31912 .k(8)
31913 .qmin(128)
31914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31915 }
31916
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)31917 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
31918 GemmMicrokernelTester()
31919 .mr(2)
31920 .nr(4)
31921 .kr(2)
31922 .sr(4)
31923 .m(2)
31924 .n(4)
31925 .k(8)
31926 .qmax(128)
31927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31928 }
31929
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)31930 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
31931 GemmMicrokernelTester()
31932 .mr(2)
31933 .nr(4)
31934 .kr(2)
31935 .sr(4)
31936 .m(2)
31937 .n(4)
31938 .k(8)
31939 .cm_stride(7)
31940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31941 }
31942
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31943 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31944 for (size_t k = 1; k <= 40; k += 9) {
31945 GemmMicrokernelTester()
31946 .mr(2)
31947 .nr(4)
31948 .kr(2)
31949 .sr(4)
31950 .m(2)
31951 .n(4)
31952 .k(k)
31953 .a_zero_point(0)
31954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31955 }
31956 }
31957
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31958 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31959 for (size_t k = 1; k <= 40; k += 9) {
31960 GemmMicrokernelTester()
31961 .mr(2)
31962 .nr(4)
31963 .kr(2)
31964 .sr(4)
31965 .m(2)
31966 .n(4)
31967 .k(k)
31968 .b_zero_point(0)
31969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31970 }
31971 }
31972
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)31973 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31974 for (size_t k = 1; k <= 40; k += 9) {
31975 GemmMicrokernelTester()
31976 .mr(2)
31977 .nr(4)
31978 .kr(2)
31979 .sr(4)
31980 .m(2)
31981 .n(4)
31982 .k(k)
31983 .a_zero_point(0)
31984 .b_zero_point(0)
31985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31986 }
31987 }
31988 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31989
31990
31991 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)31992 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31993 GemmMicrokernelTester()
31994 .mr(2)
31995 .nr(4)
31996 .kr(2)
31997 .sr(4)
31998 .m(2)
31999 .n(4)
32000 .k(8)
32001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32002 }
32003
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)32004 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
32005 GemmMicrokernelTester()
32006 .mr(2)
32007 .nr(4)
32008 .kr(2)
32009 .sr(4)
32010 .m(2)
32011 .n(4)
32012 .k(8)
32013 .cn_stride(7)
32014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32015 }
32016
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)32017 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
32018 for (uint32_t n = 1; n <= 4; n++) {
32019 for (uint32_t m = 1; m <= 2; m++) {
32020 GemmMicrokernelTester()
32021 .mr(2)
32022 .nr(4)
32023 .kr(2)
32024 .sr(4)
32025 .m(m)
32026 .n(n)
32027 .k(8)
32028 .iterations(1)
32029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32030 }
32031 }
32032 }
32033
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)32034 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32035 for (uint32_t m = 1; m <= 2; m++) {
32036 GemmMicrokernelTester()
32037 .mr(2)
32038 .nr(4)
32039 .kr(2)
32040 .sr(4)
32041 .m(m)
32042 .n(4)
32043 .k(8)
32044 .iterations(1)
32045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32046 }
32047 }
32048
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)32049 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32050 for (uint32_t n = 1; n <= 4; n++) {
32051 GemmMicrokernelTester()
32052 .mr(2)
32053 .nr(4)
32054 .kr(2)
32055 .sr(4)
32056 .m(2)
32057 .n(n)
32058 .k(8)
32059 .iterations(1)
32060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32061 }
32062 }
32063
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)32064 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32065 for (size_t k = 1; k < 8; k++) {
32066 GemmMicrokernelTester()
32067 .mr(2)
32068 .nr(4)
32069 .kr(2)
32070 .sr(4)
32071 .m(2)
32072 .n(4)
32073 .k(k)
32074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32075 }
32076 }
32077
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)32078 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32079 for (size_t k = 1; k < 8; k++) {
32080 for (uint32_t n = 1; n <= 4; n++) {
32081 for (uint32_t m = 1; m <= 2; m++) {
32082 GemmMicrokernelTester()
32083 .mr(2)
32084 .nr(4)
32085 .kr(2)
32086 .sr(4)
32087 .m(m)
32088 .n(n)
32089 .k(k)
32090 .iterations(1)
32091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32092 }
32093 }
32094 }
32095 }
32096
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)32097 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32098 for (size_t k = 9; k < 16; k++) {
32099 GemmMicrokernelTester()
32100 .mr(2)
32101 .nr(4)
32102 .kr(2)
32103 .sr(4)
32104 .m(2)
32105 .n(4)
32106 .k(k)
32107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32108 }
32109 }
32110
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)32111 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32112 for (size_t k = 9; k < 16; k++) {
32113 for (uint32_t n = 1; n <= 4; n++) {
32114 for (uint32_t m = 1; m <= 2; m++) {
32115 GemmMicrokernelTester()
32116 .mr(2)
32117 .nr(4)
32118 .kr(2)
32119 .sr(4)
32120 .m(m)
32121 .n(n)
32122 .k(k)
32123 .iterations(1)
32124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32125 }
32126 }
32127 }
32128 }
32129
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)32130 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
32131 for (size_t k = 16; k <= 80; k += 8) {
32132 GemmMicrokernelTester()
32133 .mr(2)
32134 .nr(4)
32135 .kr(2)
32136 .sr(4)
32137 .m(2)
32138 .n(4)
32139 .k(k)
32140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32141 }
32142 }
32143
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)32144 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32145 for (size_t k = 16; k <= 80; k += 8) {
32146 for (uint32_t n = 1; n <= 4; n++) {
32147 for (uint32_t m = 1; m <= 2; m++) {
32148 GemmMicrokernelTester()
32149 .mr(2)
32150 .nr(4)
32151 .kr(2)
32152 .sr(4)
32153 .m(m)
32154 .n(n)
32155 .k(k)
32156 .iterations(1)
32157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32158 }
32159 }
32160 }
32161 }
32162
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)32163 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32164 for (uint32_t n = 5; n < 8; n++) {
32165 for (size_t k = 1; k <= 40; k += 9) {
32166 GemmMicrokernelTester()
32167 .mr(2)
32168 .nr(4)
32169 .kr(2)
32170 .sr(4)
32171 .m(2)
32172 .n(n)
32173 .k(k)
32174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32175 }
32176 }
32177 }
32178
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)32179 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32180 for (uint32_t n = 5; n < 8; n++) {
32181 for (size_t k = 1; k <= 40; k += 9) {
32182 GemmMicrokernelTester()
32183 .mr(2)
32184 .nr(4)
32185 .kr(2)
32186 .sr(4)
32187 .m(2)
32188 .n(n)
32189 .k(k)
32190 .cn_stride(7)
32191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32192 }
32193 }
32194 }
32195
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)32196 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32197 for (uint32_t n = 5; n < 8; n++) {
32198 for (size_t k = 1; k <= 40; k += 9) {
32199 for (uint32_t m = 1; m <= 2; m++) {
32200 GemmMicrokernelTester()
32201 .mr(2)
32202 .nr(4)
32203 .kr(2)
32204 .sr(4)
32205 .m(m)
32206 .n(n)
32207 .k(k)
32208 .iterations(1)
32209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32210 }
32211 }
32212 }
32213 }
32214
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)32215 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
32216 for (uint32_t n = 8; n <= 12; n += 4) {
32217 for (size_t k = 1; k <= 40; k += 9) {
32218 GemmMicrokernelTester()
32219 .mr(2)
32220 .nr(4)
32221 .kr(2)
32222 .sr(4)
32223 .m(2)
32224 .n(n)
32225 .k(k)
32226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32227 }
32228 }
32229 }
32230
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)32231 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32232 for (uint32_t n = 8; n <= 12; n += 4) {
32233 for (size_t k = 1; k <= 40; k += 9) {
32234 GemmMicrokernelTester()
32235 .mr(2)
32236 .nr(4)
32237 .kr(2)
32238 .sr(4)
32239 .m(2)
32240 .n(n)
32241 .k(k)
32242 .cn_stride(7)
32243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32244 }
32245 }
32246 }
32247
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)32248 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32249 for (uint32_t n = 8; n <= 12; n += 4) {
32250 for (size_t k = 1; k <= 40; k += 9) {
32251 for (uint32_t m = 1; m <= 2; m++) {
32252 GemmMicrokernelTester()
32253 .mr(2)
32254 .nr(4)
32255 .kr(2)
32256 .sr(4)
32257 .m(m)
32258 .n(n)
32259 .k(k)
32260 .iterations(1)
32261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32262 }
32263 }
32264 }
32265 }
32266
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)32267 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
32268 for (size_t k = 1; k <= 40; k += 9) {
32269 GemmMicrokernelTester()
32270 .mr(2)
32271 .nr(4)
32272 .kr(2)
32273 .sr(4)
32274 .m(2)
32275 .n(4)
32276 .k(k)
32277 .ks(3)
32278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32279 }
32280 }
32281
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)32282 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32283 for (size_t k = 1; k <= 40; k += 9) {
32284 for (uint32_t n = 1; n <= 4; n++) {
32285 for (uint32_t m = 1; m <= 2; m++) {
32286 GemmMicrokernelTester()
32287 .mr(2)
32288 .nr(4)
32289 .kr(2)
32290 .sr(4)
32291 .m(m)
32292 .n(n)
32293 .k(k)
32294 .ks(3)
32295 .iterations(1)
32296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32297 }
32298 }
32299 }
32300 }
32301
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)32302 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32303 for (uint32_t n = 5; n < 8; n++) {
32304 for (size_t k = 1; k <= 40; k += 9) {
32305 GemmMicrokernelTester()
32306 .mr(2)
32307 .nr(4)
32308 .kr(2)
32309 .sr(4)
32310 .m(2)
32311 .n(n)
32312 .k(k)
32313 .ks(3)
32314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32315 }
32316 }
32317 }
32318
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)32319 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32320 for (uint32_t n = 8; n <= 12; n += 4) {
32321 for (size_t k = 1; k <= 40; k += 9) {
32322 GemmMicrokernelTester()
32323 .mr(2)
32324 .nr(4)
32325 .kr(2)
32326 .sr(4)
32327 .m(2)
32328 .n(n)
32329 .k(k)
32330 .ks(3)
32331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32332 }
32333 }
32334 }
32335
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)32336 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32337 for (size_t k = 1; k <= 40; k += 9) {
32338 for (uint32_t n = 1; n <= 4; n++) {
32339 for (uint32_t m = 1; m <= 2; m++) {
32340 GemmMicrokernelTester()
32341 .mr(2)
32342 .nr(4)
32343 .kr(2)
32344 .sr(4)
32345 .m(m)
32346 .n(n)
32347 .k(k)
32348 .cm_stride(7)
32349 .iterations(1)
32350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32351 }
32352 }
32353 }
32354 }
32355
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)32356 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
32357 for (size_t k = 1; k <= 40; k += 9) {
32358 GemmMicrokernelTester()
32359 .mr(2)
32360 .nr(4)
32361 .kr(2)
32362 .sr(4)
32363 .m(2)
32364 .n(4)
32365 .k(k)
32366 .ks(3)
32367 .a_offset(83)
32368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32369 }
32370 }
32371
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,zero)32372 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
32373 for (size_t k = 1; k <= 40; k += 9) {
32374 for (uint32_t mz = 0; mz < 2; mz++) {
32375 GemmMicrokernelTester()
32376 .mr(2)
32377 .nr(4)
32378 .kr(2)
32379 .sr(4)
32380 .m(2)
32381 .n(4)
32382 .k(k)
32383 .ks(3)
32384 .a_offset(83)
32385 .zero_index(mz)
32386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32387 }
32388 }
32389 }
32390
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)32391 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
32392 GemmMicrokernelTester()
32393 .mr(2)
32394 .nr(4)
32395 .kr(2)
32396 .sr(4)
32397 .m(2)
32398 .n(4)
32399 .k(8)
32400 .qmin(128)
32401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32402 }
32403
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)32404 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
32405 GemmMicrokernelTester()
32406 .mr(2)
32407 .nr(4)
32408 .kr(2)
32409 .sr(4)
32410 .m(2)
32411 .n(4)
32412 .k(8)
32413 .qmax(128)
32414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32415 }
32416
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)32417 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
32418 GemmMicrokernelTester()
32419 .mr(2)
32420 .nr(4)
32421 .kr(2)
32422 .sr(4)
32423 .m(2)
32424 .n(4)
32425 .k(8)
32426 .cm_stride(7)
32427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32428 }
32429
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)32430 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
32431 for (size_t k = 1; k <= 40; k += 9) {
32432 GemmMicrokernelTester()
32433 .mr(2)
32434 .nr(4)
32435 .kr(2)
32436 .sr(4)
32437 .m(2)
32438 .n(4)
32439 .k(k)
32440 .a_zero_point(0)
32441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32442 }
32443 }
32444
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)32445 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
32446 for (size_t k = 1; k <= 40; k += 9) {
32447 GemmMicrokernelTester()
32448 .mr(2)
32449 .nr(4)
32450 .kr(2)
32451 .sr(4)
32452 .m(2)
32453 .n(4)
32454 .k(k)
32455 .b_zero_point(0)
32456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32457 }
32458 }
32459
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)32460 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
32461 for (size_t k = 1; k <= 40; k += 9) {
32462 GemmMicrokernelTester()
32463 .mr(2)
32464 .nr(4)
32465 .kr(2)
32466 .sr(4)
32467 .m(2)
32468 .n(4)
32469 .k(k)
32470 .a_zero_point(0)
32471 .b_zero_point(0)
32472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32473 }
32474 }
32475 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32476
32477
32478 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)32479 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
32480 GemmMicrokernelTester()
32481 .mr(2)
32482 .nr(4)
32483 .kr(8)
32484 .sr(1)
32485 .m(2)
32486 .n(4)
32487 .k(8)
32488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32489 }
32490
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)32491 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
32492 GemmMicrokernelTester()
32493 .mr(2)
32494 .nr(4)
32495 .kr(8)
32496 .sr(1)
32497 .m(2)
32498 .n(4)
32499 .k(8)
32500 .cn_stride(7)
32501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32502 }
32503
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)32504 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
32505 for (uint32_t n = 1; n <= 4; n++) {
32506 for (uint32_t m = 1; m <= 2; m++) {
32507 GemmMicrokernelTester()
32508 .mr(2)
32509 .nr(4)
32510 .kr(8)
32511 .sr(1)
32512 .m(m)
32513 .n(n)
32514 .k(8)
32515 .iterations(1)
32516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32517 }
32518 }
32519 }
32520
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)32521 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
32522 for (uint32_t m = 1; m <= 2; m++) {
32523 GemmMicrokernelTester()
32524 .mr(2)
32525 .nr(4)
32526 .kr(8)
32527 .sr(1)
32528 .m(m)
32529 .n(4)
32530 .k(8)
32531 .iterations(1)
32532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32533 }
32534 }
32535
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)32536 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
32537 for (uint32_t n = 1; n <= 4; n++) {
32538 GemmMicrokernelTester()
32539 .mr(2)
32540 .nr(4)
32541 .kr(8)
32542 .sr(1)
32543 .m(2)
32544 .n(n)
32545 .k(8)
32546 .iterations(1)
32547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32548 }
32549 }
32550
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)32551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
32552 for (size_t k = 1; k < 8; k++) {
32553 GemmMicrokernelTester()
32554 .mr(2)
32555 .nr(4)
32556 .kr(8)
32557 .sr(1)
32558 .m(2)
32559 .n(4)
32560 .k(k)
32561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32562 }
32563 }
32564
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)32565 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
32566 for (size_t k = 1; k < 8; k++) {
32567 for (uint32_t n = 1; n <= 4; n++) {
32568 for (uint32_t m = 1; m <= 2; m++) {
32569 GemmMicrokernelTester()
32570 .mr(2)
32571 .nr(4)
32572 .kr(8)
32573 .sr(1)
32574 .m(m)
32575 .n(n)
32576 .k(k)
32577 .iterations(1)
32578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32579 }
32580 }
32581 }
32582 }
32583
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)32584 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
32585 for (size_t k = 9; k < 16; k++) {
32586 GemmMicrokernelTester()
32587 .mr(2)
32588 .nr(4)
32589 .kr(8)
32590 .sr(1)
32591 .m(2)
32592 .n(4)
32593 .k(k)
32594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32595 }
32596 }
32597
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)32598 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
32599 for (size_t k = 9; k < 16; k++) {
32600 for (uint32_t n = 1; n <= 4; n++) {
32601 for (uint32_t m = 1; m <= 2; m++) {
32602 GemmMicrokernelTester()
32603 .mr(2)
32604 .nr(4)
32605 .kr(8)
32606 .sr(1)
32607 .m(m)
32608 .n(n)
32609 .k(k)
32610 .iterations(1)
32611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32612 }
32613 }
32614 }
32615 }
32616
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)32617 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
32618 for (size_t k = 16; k <= 80; k += 8) {
32619 GemmMicrokernelTester()
32620 .mr(2)
32621 .nr(4)
32622 .kr(8)
32623 .sr(1)
32624 .m(2)
32625 .n(4)
32626 .k(k)
32627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32628 }
32629 }
32630
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)32631 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
32632 for (size_t k = 16; k <= 80; k += 8) {
32633 for (uint32_t n = 1; n <= 4; n++) {
32634 for (uint32_t m = 1; m <= 2; m++) {
32635 GemmMicrokernelTester()
32636 .mr(2)
32637 .nr(4)
32638 .kr(8)
32639 .sr(1)
32640 .m(m)
32641 .n(n)
32642 .k(k)
32643 .iterations(1)
32644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32645 }
32646 }
32647 }
32648 }
32649
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)32650 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
32651 for (uint32_t n = 5; n < 8; n++) {
32652 for (size_t k = 1; k <= 40; k += 9) {
32653 GemmMicrokernelTester()
32654 .mr(2)
32655 .nr(4)
32656 .kr(8)
32657 .sr(1)
32658 .m(2)
32659 .n(n)
32660 .k(k)
32661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32662 }
32663 }
32664 }
32665
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)32666 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
32667 for (uint32_t n = 5; n < 8; n++) {
32668 for (size_t k = 1; k <= 40; k += 9) {
32669 GemmMicrokernelTester()
32670 .mr(2)
32671 .nr(4)
32672 .kr(8)
32673 .sr(1)
32674 .m(2)
32675 .n(n)
32676 .k(k)
32677 .cn_stride(7)
32678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32679 }
32680 }
32681 }
32682
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)32683 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
32684 for (uint32_t n = 5; n < 8; n++) {
32685 for (size_t k = 1; k <= 40; k += 9) {
32686 for (uint32_t m = 1; m <= 2; m++) {
32687 GemmMicrokernelTester()
32688 .mr(2)
32689 .nr(4)
32690 .kr(8)
32691 .sr(1)
32692 .m(m)
32693 .n(n)
32694 .k(k)
32695 .iterations(1)
32696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32697 }
32698 }
32699 }
32700 }
32701
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)32702 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
32703 for (uint32_t n = 8; n <= 12; n += 4) {
32704 for (size_t k = 1; k <= 40; k += 9) {
32705 GemmMicrokernelTester()
32706 .mr(2)
32707 .nr(4)
32708 .kr(8)
32709 .sr(1)
32710 .m(2)
32711 .n(n)
32712 .k(k)
32713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32714 }
32715 }
32716 }
32717
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)32718 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
32719 for (uint32_t n = 8; n <= 12; n += 4) {
32720 for (size_t k = 1; k <= 40; k += 9) {
32721 GemmMicrokernelTester()
32722 .mr(2)
32723 .nr(4)
32724 .kr(8)
32725 .sr(1)
32726 .m(2)
32727 .n(n)
32728 .k(k)
32729 .cn_stride(7)
32730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32731 }
32732 }
32733 }
32734
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)32735 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
32736 for (uint32_t n = 8; n <= 12; n += 4) {
32737 for (size_t k = 1; k <= 40; k += 9) {
32738 for (uint32_t m = 1; m <= 2; m++) {
32739 GemmMicrokernelTester()
32740 .mr(2)
32741 .nr(4)
32742 .kr(8)
32743 .sr(1)
32744 .m(m)
32745 .n(n)
32746 .k(k)
32747 .iterations(1)
32748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32749 }
32750 }
32751 }
32752 }
32753
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)32754 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
32755 for (size_t k = 1; k <= 40; k += 9) {
32756 GemmMicrokernelTester()
32757 .mr(2)
32758 .nr(4)
32759 .kr(8)
32760 .sr(1)
32761 .m(2)
32762 .n(4)
32763 .k(k)
32764 .ks(3)
32765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32766 }
32767 }
32768
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)32769 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
32770 for (size_t k = 1; k <= 40; k += 9) {
32771 for (uint32_t n = 1; n <= 4; n++) {
32772 for (uint32_t m = 1; m <= 2; m++) {
32773 GemmMicrokernelTester()
32774 .mr(2)
32775 .nr(4)
32776 .kr(8)
32777 .sr(1)
32778 .m(m)
32779 .n(n)
32780 .k(k)
32781 .ks(3)
32782 .iterations(1)
32783 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32784 }
32785 }
32786 }
32787 }
32788
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)32789 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
32790 for (uint32_t n = 5; n < 8; n++) {
32791 for (size_t k = 1; k <= 40; k += 9) {
32792 GemmMicrokernelTester()
32793 .mr(2)
32794 .nr(4)
32795 .kr(8)
32796 .sr(1)
32797 .m(2)
32798 .n(n)
32799 .k(k)
32800 .ks(3)
32801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32802 }
32803 }
32804 }
32805
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)32806 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
32807 for (uint32_t n = 8; n <= 12; n += 4) {
32808 for (size_t k = 1; k <= 40; k += 9) {
32809 GemmMicrokernelTester()
32810 .mr(2)
32811 .nr(4)
32812 .kr(8)
32813 .sr(1)
32814 .m(2)
32815 .n(n)
32816 .k(k)
32817 .ks(3)
32818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32819 }
32820 }
32821 }
32822
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)32823 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
32824 for (size_t k = 1; k <= 40; k += 9) {
32825 for (uint32_t n = 1; n <= 4; n++) {
32826 for (uint32_t m = 1; m <= 2; m++) {
32827 GemmMicrokernelTester()
32828 .mr(2)
32829 .nr(4)
32830 .kr(8)
32831 .sr(1)
32832 .m(m)
32833 .n(n)
32834 .k(k)
32835 .cm_stride(7)
32836 .iterations(1)
32837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32838 }
32839 }
32840 }
32841 }
32842
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,a_offset)32843 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
32844 for (size_t k = 1; k <= 40; k += 9) {
32845 GemmMicrokernelTester()
32846 .mr(2)
32847 .nr(4)
32848 .kr(8)
32849 .sr(1)
32850 .m(2)
32851 .n(4)
32852 .k(k)
32853 .ks(3)
32854 .a_offset(83)
32855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32856 }
32857 }
32858
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,zero)32859 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
32860 for (size_t k = 1; k <= 40; k += 9) {
32861 for (uint32_t mz = 0; mz < 2; mz++) {
32862 GemmMicrokernelTester()
32863 .mr(2)
32864 .nr(4)
32865 .kr(8)
32866 .sr(1)
32867 .m(2)
32868 .n(4)
32869 .k(k)
32870 .ks(3)
32871 .a_offset(83)
32872 .zero_index(mz)
32873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32874 }
32875 }
32876 }
32877
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)32878 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
32879 GemmMicrokernelTester()
32880 .mr(2)
32881 .nr(4)
32882 .kr(8)
32883 .sr(1)
32884 .m(2)
32885 .n(4)
32886 .k(8)
32887 .qmin(128)
32888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32889 }
32890
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)32891 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
32892 GemmMicrokernelTester()
32893 .mr(2)
32894 .nr(4)
32895 .kr(8)
32896 .sr(1)
32897 .m(2)
32898 .n(4)
32899 .k(8)
32900 .qmax(128)
32901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32902 }
32903
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)32904 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
32905 GemmMicrokernelTester()
32906 .mr(2)
32907 .nr(4)
32908 .kr(8)
32909 .sr(1)
32910 .m(2)
32911 .n(4)
32912 .k(8)
32913 .cm_stride(7)
32914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32915 }
32916
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)32917 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
32918 for (size_t k = 1; k <= 40; k += 9) {
32919 GemmMicrokernelTester()
32920 .mr(2)
32921 .nr(4)
32922 .kr(8)
32923 .sr(1)
32924 .m(2)
32925 .n(4)
32926 .k(k)
32927 .a_zero_point(0)
32928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32929 }
32930 }
32931
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)32932 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
32933 for (size_t k = 1; k <= 40; k += 9) {
32934 GemmMicrokernelTester()
32935 .mr(2)
32936 .nr(4)
32937 .kr(8)
32938 .sr(1)
32939 .m(2)
32940 .n(4)
32941 .k(k)
32942 .b_zero_point(0)
32943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32944 }
32945 }
32946
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)32947 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
32948 for (size_t k = 1; k <= 40; k += 9) {
32949 GemmMicrokernelTester()
32950 .mr(2)
32951 .nr(4)
32952 .kr(8)
32953 .sr(1)
32954 .m(2)
32955 .n(4)
32956 .k(k)
32957 .a_zero_point(0)
32958 .b_zero_point(0)
32959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32960 }
32961 }
32962 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32963
32964
32965 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)32966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
32967 GemmMicrokernelTester()
32968 .mr(3)
32969 .nr(4)
32970 .kr(2)
32971 .sr(1)
32972 .m(3)
32973 .n(4)
32974 .k(8)
32975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32976 }
32977
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)32978 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
32979 GemmMicrokernelTester()
32980 .mr(3)
32981 .nr(4)
32982 .kr(2)
32983 .sr(1)
32984 .m(3)
32985 .n(4)
32986 .k(8)
32987 .cn_stride(7)
32988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32989 }
32990
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)32991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
32992 for (uint32_t n = 1; n <= 4; n++) {
32993 for (uint32_t m = 1; m <= 3; m++) {
32994 GemmMicrokernelTester()
32995 .mr(3)
32996 .nr(4)
32997 .kr(2)
32998 .sr(1)
32999 .m(m)
33000 .n(n)
33001 .k(8)
33002 .iterations(1)
33003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33004 }
33005 }
33006 }
33007
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33008 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33009 for (uint32_t m = 1; m <= 3; m++) {
33010 GemmMicrokernelTester()
33011 .mr(3)
33012 .nr(4)
33013 .kr(2)
33014 .sr(1)
33015 .m(m)
33016 .n(4)
33017 .k(8)
33018 .iterations(1)
33019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33020 }
33021 }
33022
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33023 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33024 for (uint32_t n = 1; n <= 4; n++) {
33025 GemmMicrokernelTester()
33026 .mr(3)
33027 .nr(4)
33028 .kr(2)
33029 .sr(1)
33030 .m(3)
33031 .n(n)
33032 .k(8)
33033 .iterations(1)
33034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33035 }
33036 }
33037
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)33038 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33039 for (size_t k = 1; k < 8; k++) {
33040 GemmMicrokernelTester()
33041 .mr(3)
33042 .nr(4)
33043 .kr(2)
33044 .sr(1)
33045 .m(3)
33046 .n(4)
33047 .k(k)
33048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33049 }
33050 }
33051
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33052 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33053 for (size_t k = 1; k < 8; k++) {
33054 for (uint32_t n = 1; n <= 4; n++) {
33055 for (uint32_t m = 1; m <= 3; m++) {
33056 GemmMicrokernelTester()
33057 .mr(3)
33058 .nr(4)
33059 .kr(2)
33060 .sr(1)
33061 .m(m)
33062 .n(n)
33063 .k(k)
33064 .iterations(1)
33065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33066 }
33067 }
33068 }
33069 }
33070
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)33071 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33072 for (size_t k = 9; k < 16; k++) {
33073 GemmMicrokernelTester()
33074 .mr(3)
33075 .nr(4)
33076 .kr(2)
33077 .sr(1)
33078 .m(3)
33079 .n(4)
33080 .k(k)
33081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33082 }
33083 }
33084
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33085 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33086 for (size_t k = 9; k < 16; k++) {
33087 for (uint32_t n = 1; n <= 4; n++) {
33088 for (uint32_t m = 1; m <= 3; m++) {
33089 GemmMicrokernelTester()
33090 .mr(3)
33091 .nr(4)
33092 .kr(2)
33093 .sr(1)
33094 .m(m)
33095 .n(n)
33096 .k(k)
33097 .iterations(1)
33098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33099 }
33100 }
33101 }
33102 }
33103
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)33104 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
33105 for (size_t k = 16; k <= 80; k += 8) {
33106 GemmMicrokernelTester()
33107 .mr(3)
33108 .nr(4)
33109 .kr(2)
33110 .sr(1)
33111 .m(3)
33112 .n(4)
33113 .k(k)
33114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33115 }
33116 }
33117
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33118 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33119 for (size_t k = 16; k <= 80; k += 8) {
33120 for (uint32_t n = 1; n <= 4; n++) {
33121 for (uint32_t m = 1; m <= 3; m++) {
33122 GemmMicrokernelTester()
33123 .mr(3)
33124 .nr(4)
33125 .kr(2)
33126 .sr(1)
33127 .m(m)
33128 .n(n)
33129 .k(k)
33130 .iterations(1)
33131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33132 }
33133 }
33134 }
33135 }
33136
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)33137 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33138 for (uint32_t n = 5; n < 8; n++) {
33139 for (size_t k = 1; k <= 40; k += 9) {
33140 GemmMicrokernelTester()
33141 .mr(3)
33142 .nr(4)
33143 .kr(2)
33144 .sr(1)
33145 .m(3)
33146 .n(n)
33147 .k(k)
33148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33149 }
33150 }
33151 }
33152
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33153 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33154 for (uint32_t n = 5; n < 8; n++) {
33155 for (size_t k = 1; k <= 40; k += 9) {
33156 GemmMicrokernelTester()
33157 .mr(3)
33158 .nr(4)
33159 .kr(2)
33160 .sr(1)
33161 .m(3)
33162 .n(n)
33163 .k(k)
33164 .cn_stride(7)
33165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33166 }
33167 }
33168 }
33169
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33170 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33171 for (uint32_t n = 5; n < 8; n++) {
33172 for (size_t k = 1; k <= 40; k += 9) {
33173 for (uint32_t m = 1; m <= 3; m++) {
33174 GemmMicrokernelTester()
33175 .mr(3)
33176 .nr(4)
33177 .kr(2)
33178 .sr(1)
33179 .m(m)
33180 .n(n)
33181 .k(k)
33182 .iterations(1)
33183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33184 }
33185 }
33186 }
33187 }
33188
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)33189 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
33190 for (uint32_t n = 8; n <= 12; n += 4) {
33191 for (size_t k = 1; k <= 40; k += 9) {
33192 GemmMicrokernelTester()
33193 .mr(3)
33194 .nr(4)
33195 .kr(2)
33196 .sr(1)
33197 .m(3)
33198 .n(n)
33199 .k(k)
33200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33201 }
33202 }
33203 }
33204
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33205 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33206 for (uint32_t n = 8; n <= 12; n += 4) {
33207 for (size_t k = 1; k <= 40; k += 9) {
33208 GemmMicrokernelTester()
33209 .mr(3)
33210 .nr(4)
33211 .kr(2)
33212 .sr(1)
33213 .m(3)
33214 .n(n)
33215 .k(k)
33216 .cn_stride(7)
33217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33218 }
33219 }
33220 }
33221
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33222 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33223 for (uint32_t n = 8; n <= 12; n += 4) {
33224 for (size_t k = 1; k <= 40; k += 9) {
33225 for (uint32_t m = 1; m <= 3; m++) {
33226 GemmMicrokernelTester()
33227 .mr(3)
33228 .nr(4)
33229 .kr(2)
33230 .sr(1)
33231 .m(m)
33232 .n(n)
33233 .k(k)
33234 .iterations(1)
33235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33236 }
33237 }
33238 }
33239 }
33240
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)33241 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
33242 for (size_t k = 1; k <= 40; k += 9) {
33243 GemmMicrokernelTester()
33244 .mr(3)
33245 .nr(4)
33246 .kr(2)
33247 .sr(1)
33248 .m(3)
33249 .n(4)
33250 .k(k)
33251 .ks(3)
33252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33253 }
33254 }
33255
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33256 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33257 for (size_t k = 1; k <= 40; k += 9) {
33258 for (uint32_t n = 1; n <= 4; n++) {
33259 for (uint32_t m = 1; m <= 3; m++) {
33260 GemmMicrokernelTester()
33261 .mr(3)
33262 .nr(4)
33263 .kr(2)
33264 .sr(1)
33265 .m(m)
33266 .n(n)
33267 .k(k)
33268 .ks(3)
33269 .iterations(1)
33270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33271 }
33272 }
33273 }
33274 }
33275
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33276 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33277 for (uint32_t n = 5; n < 8; n++) {
33278 for (size_t k = 1; k <= 40; k += 9) {
33279 GemmMicrokernelTester()
33280 .mr(3)
33281 .nr(4)
33282 .kr(2)
33283 .sr(1)
33284 .m(3)
33285 .n(n)
33286 .k(k)
33287 .ks(3)
33288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33289 }
33290 }
33291 }
33292
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33293 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33294 for (uint32_t n = 8; n <= 12; n += 4) {
33295 for (size_t k = 1; k <= 40; k += 9) {
33296 GemmMicrokernelTester()
33297 .mr(3)
33298 .nr(4)
33299 .kr(2)
33300 .sr(1)
33301 .m(3)
33302 .n(n)
33303 .k(k)
33304 .ks(3)
33305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33306 }
33307 }
33308 }
33309
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33310 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33311 for (size_t k = 1; k <= 40; k += 9) {
33312 for (uint32_t n = 1; n <= 4; n++) {
33313 for (uint32_t m = 1; m <= 3; m++) {
33314 GemmMicrokernelTester()
33315 .mr(3)
33316 .nr(4)
33317 .kr(2)
33318 .sr(1)
33319 .m(m)
33320 .n(n)
33321 .k(k)
33322 .cm_stride(7)
33323 .iterations(1)
33324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33325 }
33326 }
33327 }
33328 }
33329
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,a_offset)33330 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
33331 for (size_t k = 1; k <= 40; k += 9) {
33332 GemmMicrokernelTester()
33333 .mr(3)
33334 .nr(4)
33335 .kr(2)
33336 .sr(1)
33337 .m(3)
33338 .n(4)
33339 .k(k)
33340 .ks(3)
33341 .a_offset(127)
33342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33343 }
33344 }
33345
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,zero)33346 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
33347 for (size_t k = 1; k <= 40; k += 9) {
33348 for (uint32_t mz = 0; mz < 3; mz++) {
33349 GemmMicrokernelTester()
33350 .mr(3)
33351 .nr(4)
33352 .kr(2)
33353 .sr(1)
33354 .m(3)
33355 .n(4)
33356 .k(k)
33357 .ks(3)
33358 .a_offset(127)
33359 .zero_index(mz)
33360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33361 }
33362 }
33363 }
33364
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)33365 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
33366 GemmMicrokernelTester()
33367 .mr(3)
33368 .nr(4)
33369 .kr(2)
33370 .sr(1)
33371 .m(3)
33372 .n(4)
33373 .k(8)
33374 .qmin(128)
33375 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33376 }
33377
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)33378 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
33379 GemmMicrokernelTester()
33380 .mr(3)
33381 .nr(4)
33382 .kr(2)
33383 .sr(1)
33384 .m(3)
33385 .n(4)
33386 .k(8)
33387 .qmax(128)
33388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33389 }
33390
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)33391 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
33392 GemmMicrokernelTester()
33393 .mr(3)
33394 .nr(4)
33395 .kr(2)
33396 .sr(1)
33397 .m(3)
33398 .n(4)
33399 .k(8)
33400 .cm_stride(7)
33401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33402 }
33403
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)33404 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
33405 for (size_t k = 1; k <= 40; k += 9) {
33406 GemmMicrokernelTester()
33407 .mr(3)
33408 .nr(4)
33409 .kr(2)
33410 .sr(1)
33411 .m(3)
33412 .n(4)
33413 .k(k)
33414 .a_zero_point(0)
33415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33416 }
33417 }
33418
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)33419 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
33420 for (size_t k = 1; k <= 40; k += 9) {
33421 GemmMicrokernelTester()
33422 .mr(3)
33423 .nr(4)
33424 .kr(2)
33425 .sr(1)
33426 .m(3)
33427 .n(4)
33428 .k(k)
33429 .b_zero_point(0)
33430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33431 }
33432 }
33433
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)33434 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
33435 for (size_t k = 1; k <= 40; k += 9) {
33436 GemmMicrokernelTester()
33437 .mr(3)
33438 .nr(4)
33439 .kr(2)
33440 .sr(1)
33441 .m(3)
33442 .n(4)
33443 .k(k)
33444 .a_zero_point(0)
33445 .b_zero_point(0)
33446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33447 }
33448 }
33449 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33450
33451
33452 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)33453 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
33454 GemmMicrokernelTester()
33455 .mr(3)
33456 .nr(4)
33457 .kr(2)
33458 .sr(4)
33459 .m(3)
33460 .n(4)
33461 .k(8)
33462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33463 }
33464
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)33465 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
33466 GemmMicrokernelTester()
33467 .mr(3)
33468 .nr(4)
33469 .kr(2)
33470 .sr(4)
33471 .m(3)
33472 .n(4)
33473 .k(8)
33474 .cn_stride(7)
33475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33476 }
33477
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)33478 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
33479 for (uint32_t n = 1; n <= 4; n++) {
33480 for (uint32_t m = 1; m <= 3; m++) {
33481 GemmMicrokernelTester()
33482 .mr(3)
33483 .nr(4)
33484 .kr(2)
33485 .sr(4)
33486 .m(m)
33487 .n(n)
33488 .k(8)
33489 .iterations(1)
33490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33491 }
33492 }
33493 }
33494
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33495 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33496 for (uint32_t m = 1; m <= 3; m++) {
33497 GemmMicrokernelTester()
33498 .mr(3)
33499 .nr(4)
33500 .kr(2)
33501 .sr(4)
33502 .m(m)
33503 .n(4)
33504 .k(8)
33505 .iterations(1)
33506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33507 }
33508 }
33509
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33510 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33511 for (uint32_t n = 1; n <= 4; n++) {
33512 GemmMicrokernelTester()
33513 .mr(3)
33514 .nr(4)
33515 .kr(2)
33516 .sr(4)
33517 .m(3)
33518 .n(n)
33519 .k(8)
33520 .iterations(1)
33521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33522 }
33523 }
33524
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)33525 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33526 for (size_t k = 1; k < 8; k++) {
33527 GemmMicrokernelTester()
33528 .mr(3)
33529 .nr(4)
33530 .kr(2)
33531 .sr(4)
33532 .m(3)
33533 .n(4)
33534 .k(k)
33535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33536 }
33537 }
33538
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33539 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33540 for (size_t k = 1; k < 8; k++) {
33541 for (uint32_t n = 1; n <= 4; n++) {
33542 for (uint32_t m = 1; m <= 3; m++) {
33543 GemmMicrokernelTester()
33544 .mr(3)
33545 .nr(4)
33546 .kr(2)
33547 .sr(4)
33548 .m(m)
33549 .n(n)
33550 .k(k)
33551 .iterations(1)
33552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33553 }
33554 }
33555 }
33556 }
33557
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)33558 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33559 for (size_t k = 9; k < 16; k++) {
33560 GemmMicrokernelTester()
33561 .mr(3)
33562 .nr(4)
33563 .kr(2)
33564 .sr(4)
33565 .m(3)
33566 .n(4)
33567 .k(k)
33568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33569 }
33570 }
33571
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33572 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33573 for (size_t k = 9; k < 16; k++) {
33574 for (uint32_t n = 1; n <= 4; n++) {
33575 for (uint32_t m = 1; m <= 3; m++) {
33576 GemmMicrokernelTester()
33577 .mr(3)
33578 .nr(4)
33579 .kr(2)
33580 .sr(4)
33581 .m(m)
33582 .n(n)
33583 .k(k)
33584 .iterations(1)
33585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33586 }
33587 }
33588 }
33589 }
33590
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)33591 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
33592 for (size_t k = 16; k <= 80; k += 8) {
33593 GemmMicrokernelTester()
33594 .mr(3)
33595 .nr(4)
33596 .kr(2)
33597 .sr(4)
33598 .m(3)
33599 .n(4)
33600 .k(k)
33601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33602 }
33603 }
33604
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33605 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33606 for (size_t k = 16; k <= 80; k += 8) {
33607 for (uint32_t n = 1; n <= 4; n++) {
33608 for (uint32_t m = 1; m <= 3; m++) {
33609 GemmMicrokernelTester()
33610 .mr(3)
33611 .nr(4)
33612 .kr(2)
33613 .sr(4)
33614 .m(m)
33615 .n(n)
33616 .k(k)
33617 .iterations(1)
33618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33619 }
33620 }
33621 }
33622 }
33623
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)33624 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33625 for (uint32_t n = 5; n < 8; n++) {
33626 for (size_t k = 1; k <= 40; k += 9) {
33627 GemmMicrokernelTester()
33628 .mr(3)
33629 .nr(4)
33630 .kr(2)
33631 .sr(4)
33632 .m(3)
33633 .n(n)
33634 .k(k)
33635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33636 }
33637 }
33638 }
33639
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33640 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33641 for (uint32_t n = 5; n < 8; n++) {
33642 for (size_t k = 1; k <= 40; k += 9) {
33643 GemmMicrokernelTester()
33644 .mr(3)
33645 .nr(4)
33646 .kr(2)
33647 .sr(4)
33648 .m(3)
33649 .n(n)
33650 .k(k)
33651 .cn_stride(7)
33652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33653 }
33654 }
33655 }
33656
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33657 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33658 for (uint32_t n = 5; n < 8; n++) {
33659 for (size_t k = 1; k <= 40; k += 9) {
33660 for (uint32_t m = 1; m <= 3; m++) {
33661 GemmMicrokernelTester()
33662 .mr(3)
33663 .nr(4)
33664 .kr(2)
33665 .sr(4)
33666 .m(m)
33667 .n(n)
33668 .k(k)
33669 .iterations(1)
33670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33671 }
33672 }
33673 }
33674 }
33675
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)33676 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
33677 for (uint32_t n = 8; n <= 12; n += 4) {
33678 for (size_t k = 1; k <= 40; k += 9) {
33679 GemmMicrokernelTester()
33680 .mr(3)
33681 .nr(4)
33682 .kr(2)
33683 .sr(4)
33684 .m(3)
33685 .n(n)
33686 .k(k)
33687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33688 }
33689 }
33690 }
33691
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33692 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33693 for (uint32_t n = 8; n <= 12; n += 4) {
33694 for (size_t k = 1; k <= 40; k += 9) {
33695 GemmMicrokernelTester()
33696 .mr(3)
33697 .nr(4)
33698 .kr(2)
33699 .sr(4)
33700 .m(3)
33701 .n(n)
33702 .k(k)
33703 .cn_stride(7)
33704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33705 }
33706 }
33707 }
33708
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33709 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33710 for (uint32_t n = 8; n <= 12; n += 4) {
33711 for (size_t k = 1; k <= 40; k += 9) {
33712 for (uint32_t m = 1; m <= 3; m++) {
33713 GemmMicrokernelTester()
33714 .mr(3)
33715 .nr(4)
33716 .kr(2)
33717 .sr(4)
33718 .m(m)
33719 .n(n)
33720 .k(k)
33721 .iterations(1)
33722 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33723 }
33724 }
33725 }
33726 }
33727
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)33728 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
33729 for (size_t k = 1; k <= 40; k += 9) {
33730 GemmMicrokernelTester()
33731 .mr(3)
33732 .nr(4)
33733 .kr(2)
33734 .sr(4)
33735 .m(3)
33736 .n(4)
33737 .k(k)
33738 .ks(3)
33739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33740 }
33741 }
33742
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33743 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33744 for (size_t k = 1; k <= 40; k += 9) {
33745 for (uint32_t n = 1; n <= 4; n++) {
33746 for (uint32_t m = 1; m <= 3; m++) {
33747 GemmMicrokernelTester()
33748 .mr(3)
33749 .nr(4)
33750 .kr(2)
33751 .sr(4)
33752 .m(m)
33753 .n(n)
33754 .k(k)
33755 .ks(3)
33756 .iterations(1)
33757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33758 }
33759 }
33760 }
33761 }
33762
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33763 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33764 for (uint32_t n = 5; n < 8; n++) {
33765 for (size_t k = 1; k <= 40; k += 9) {
33766 GemmMicrokernelTester()
33767 .mr(3)
33768 .nr(4)
33769 .kr(2)
33770 .sr(4)
33771 .m(3)
33772 .n(n)
33773 .k(k)
33774 .ks(3)
33775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33776 }
33777 }
33778 }
33779
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33780 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33781 for (uint32_t n = 8; n <= 12; n += 4) {
33782 for (size_t k = 1; k <= 40; k += 9) {
33783 GemmMicrokernelTester()
33784 .mr(3)
33785 .nr(4)
33786 .kr(2)
33787 .sr(4)
33788 .m(3)
33789 .n(n)
33790 .k(k)
33791 .ks(3)
33792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33793 }
33794 }
33795 }
33796
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33797 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33798 for (size_t k = 1; k <= 40; k += 9) {
33799 for (uint32_t n = 1; n <= 4; n++) {
33800 for (uint32_t m = 1; m <= 3; m++) {
33801 GemmMicrokernelTester()
33802 .mr(3)
33803 .nr(4)
33804 .kr(2)
33805 .sr(4)
33806 .m(m)
33807 .n(n)
33808 .k(k)
33809 .cm_stride(7)
33810 .iterations(1)
33811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33812 }
33813 }
33814 }
33815 }
33816
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)33817 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
33818 for (size_t k = 1; k <= 40; k += 9) {
33819 GemmMicrokernelTester()
33820 .mr(3)
33821 .nr(4)
33822 .kr(2)
33823 .sr(4)
33824 .m(3)
33825 .n(4)
33826 .k(k)
33827 .ks(3)
33828 .a_offset(127)
33829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33830 }
33831 }
33832
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,zero)33833 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
33834 for (size_t k = 1; k <= 40; k += 9) {
33835 for (uint32_t mz = 0; mz < 3; mz++) {
33836 GemmMicrokernelTester()
33837 .mr(3)
33838 .nr(4)
33839 .kr(2)
33840 .sr(4)
33841 .m(3)
33842 .n(4)
33843 .k(k)
33844 .ks(3)
33845 .a_offset(127)
33846 .zero_index(mz)
33847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33848 }
33849 }
33850 }
33851
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)33852 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
33853 GemmMicrokernelTester()
33854 .mr(3)
33855 .nr(4)
33856 .kr(2)
33857 .sr(4)
33858 .m(3)
33859 .n(4)
33860 .k(8)
33861 .qmin(128)
33862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33863 }
33864
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)33865 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
33866 GemmMicrokernelTester()
33867 .mr(3)
33868 .nr(4)
33869 .kr(2)
33870 .sr(4)
33871 .m(3)
33872 .n(4)
33873 .k(8)
33874 .qmax(128)
33875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33876 }
33877
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)33878 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
33879 GemmMicrokernelTester()
33880 .mr(3)
33881 .nr(4)
33882 .kr(2)
33883 .sr(4)
33884 .m(3)
33885 .n(4)
33886 .k(8)
33887 .cm_stride(7)
33888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33889 }
33890
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)33891 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
33892 for (size_t k = 1; k <= 40; k += 9) {
33893 GemmMicrokernelTester()
33894 .mr(3)
33895 .nr(4)
33896 .kr(2)
33897 .sr(4)
33898 .m(3)
33899 .n(4)
33900 .k(k)
33901 .a_zero_point(0)
33902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33903 }
33904 }
33905
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)33906 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
33907 for (size_t k = 1; k <= 40; k += 9) {
33908 GemmMicrokernelTester()
33909 .mr(3)
33910 .nr(4)
33911 .kr(2)
33912 .sr(4)
33913 .m(3)
33914 .n(4)
33915 .k(k)
33916 .b_zero_point(0)
33917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33918 }
33919 }
33920
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)33921 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
33922 for (size_t k = 1; k <= 40; k += 9) {
33923 GemmMicrokernelTester()
33924 .mr(3)
33925 .nr(4)
33926 .kr(2)
33927 .sr(4)
33928 .m(3)
33929 .n(4)
33930 .k(k)
33931 .a_zero_point(0)
33932 .b_zero_point(0)
33933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33934 }
33935 }
33936 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33937
33938
33939 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)33940 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33941 GemmMicrokernelTester()
33942 .mr(4)
33943 .nr(4)
33944 .kr(2)
33945 .sr(1)
33946 .m(4)
33947 .n(4)
33948 .k(8)
33949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33950 }
33951
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)33952 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
33953 GemmMicrokernelTester()
33954 .mr(4)
33955 .nr(4)
33956 .kr(2)
33957 .sr(1)
33958 .m(4)
33959 .n(4)
33960 .k(8)
33961 .cn_stride(7)
33962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33963 }
33964
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33965 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33966 for (uint32_t n = 1; n <= 4; n++) {
33967 for (uint32_t m = 1; m <= 4; m++) {
33968 GemmMicrokernelTester()
33969 .mr(4)
33970 .nr(4)
33971 .kr(2)
33972 .sr(1)
33973 .m(m)
33974 .n(n)
33975 .k(8)
33976 .iterations(1)
33977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33978 }
33979 }
33980 }
33981
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33982 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33983 for (uint32_t m = 1; m <= 4; m++) {
33984 GemmMicrokernelTester()
33985 .mr(4)
33986 .nr(4)
33987 .kr(2)
33988 .sr(1)
33989 .m(m)
33990 .n(4)
33991 .k(8)
33992 .iterations(1)
33993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33994 }
33995 }
33996
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33997 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33998 for (uint32_t n = 1; n <= 4; n++) {
33999 GemmMicrokernelTester()
34000 .mr(4)
34001 .nr(4)
34002 .kr(2)
34003 .sr(1)
34004 .m(4)
34005 .n(n)
34006 .k(8)
34007 .iterations(1)
34008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34009 }
34010 }
34011
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)34012 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34013 for (size_t k = 1; k < 8; k++) {
34014 GemmMicrokernelTester()
34015 .mr(4)
34016 .nr(4)
34017 .kr(2)
34018 .sr(1)
34019 .m(4)
34020 .n(4)
34021 .k(k)
34022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34023 }
34024 }
34025
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34026 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34027 for (size_t k = 1; k < 8; k++) {
34028 for (uint32_t n = 1; n <= 4; n++) {
34029 for (uint32_t m = 1; m <= 4; m++) {
34030 GemmMicrokernelTester()
34031 .mr(4)
34032 .nr(4)
34033 .kr(2)
34034 .sr(1)
34035 .m(m)
34036 .n(n)
34037 .k(k)
34038 .iterations(1)
34039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34040 }
34041 }
34042 }
34043 }
34044
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)34045 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34046 for (size_t k = 9; k < 16; k++) {
34047 GemmMicrokernelTester()
34048 .mr(4)
34049 .nr(4)
34050 .kr(2)
34051 .sr(1)
34052 .m(4)
34053 .n(4)
34054 .k(k)
34055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34056 }
34057 }
34058
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34059 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34060 for (size_t k = 9; k < 16; k++) {
34061 for (uint32_t n = 1; n <= 4; n++) {
34062 for (uint32_t m = 1; m <= 4; m++) {
34063 GemmMicrokernelTester()
34064 .mr(4)
34065 .nr(4)
34066 .kr(2)
34067 .sr(1)
34068 .m(m)
34069 .n(n)
34070 .k(k)
34071 .iterations(1)
34072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34073 }
34074 }
34075 }
34076 }
34077
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)34078 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
34079 for (size_t k = 16; k <= 80; k += 8) {
34080 GemmMicrokernelTester()
34081 .mr(4)
34082 .nr(4)
34083 .kr(2)
34084 .sr(1)
34085 .m(4)
34086 .n(4)
34087 .k(k)
34088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34089 }
34090 }
34091
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34092 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34093 for (size_t k = 16; k <= 80; k += 8) {
34094 for (uint32_t n = 1; n <= 4; n++) {
34095 for (uint32_t m = 1; m <= 4; m++) {
34096 GemmMicrokernelTester()
34097 .mr(4)
34098 .nr(4)
34099 .kr(2)
34100 .sr(1)
34101 .m(m)
34102 .n(n)
34103 .k(k)
34104 .iterations(1)
34105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34106 }
34107 }
34108 }
34109 }
34110
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)34111 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34112 for (uint32_t n = 5; n < 8; n++) {
34113 for (size_t k = 1; k <= 40; k += 9) {
34114 GemmMicrokernelTester()
34115 .mr(4)
34116 .nr(4)
34117 .kr(2)
34118 .sr(1)
34119 .m(4)
34120 .n(n)
34121 .k(k)
34122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34123 }
34124 }
34125 }
34126
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34127 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34128 for (uint32_t n = 5; n < 8; n++) {
34129 for (size_t k = 1; k <= 40; k += 9) {
34130 GemmMicrokernelTester()
34131 .mr(4)
34132 .nr(4)
34133 .kr(2)
34134 .sr(1)
34135 .m(4)
34136 .n(n)
34137 .k(k)
34138 .cn_stride(7)
34139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34140 }
34141 }
34142 }
34143
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34144 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34145 for (uint32_t n = 5; n < 8; n++) {
34146 for (size_t k = 1; k <= 40; k += 9) {
34147 for (uint32_t m = 1; m <= 4; m++) {
34148 GemmMicrokernelTester()
34149 .mr(4)
34150 .nr(4)
34151 .kr(2)
34152 .sr(1)
34153 .m(m)
34154 .n(n)
34155 .k(k)
34156 .iterations(1)
34157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34158 }
34159 }
34160 }
34161 }
34162
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)34163 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
34164 for (uint32_t n = 8; n <= 12; n += 4) {
34165 for (size_t k = 1; k <= 40; k += 9) {
34166 GemmMicrokernelTester()
34167 .mr(4)
34168 .nr(4)
34169 .kr(2)
34170 .sr(1)
34171 .m(4)
34172 .n(n)
34173 .k(k)
34174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34175 }
34176 }
34177 }
34178
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34179 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34180 for (uint32_t n = 8; n <= 12; n += 4) {
34181 for (size_t k = 1; k <= 40; k += 9) {
34182 GemmMicrokernelTester()
34183 .mr(4)
34184 .nr(4)
34185 .kr(2)
34186 .sr(1)
34187 .m(4)
34188 .n(n)
34189 .k(k)
34190 .cn_stride(7)
34191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34192 }
34193 }
34194 }
34195
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34196 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34197 for (uint32_t n = 8; n <= 12; n += 4) {
34198 for (size_t k = 1; k <= 40; k += 9) {
34199 for (uint32_t m = 1; m <= 4; m++) {
34200 GemmMicrokernelTester()
34201 .mr(4)
34202 .nr(4)
34203 .kr(2)
34204 .sr(1)
34205 .m(m)
34206 .n(n)
34207 .k(k)
34208 .iterations(1)
34209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34210 }
34211 }
34212 }
34213 }
34214
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)34215 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
34216 for (size_t k = 1; k <= 40; k += 9) {
34217 GemmMicrokernelTester()
34218 .mr(4)
34219 .nr(4)
34220 .kr(2)
34221 .sr(1)
34222 .m(4)
34223 .n(4)
34224 .k(k)
34225 .ks(3)
34226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34227 }
34228 }
34229
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)34230 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
34231 for (size_t k = 1; k <= 40; k += 9) {
34232 for (uint32_t n = 1; n <= 4; n++) {
34233 for (uint32_t m = 1; m <= 4; m++) {
34234 GemmMicrokernelTester()
34235 .mr(4)
34236 .nr(4)
34237 .kr(2)
34238 .sr(1)
34239 .m(m)
34240 .n(n)
34241 .k(k)
34242 .ks(3)
34243 .iterations(1)
34244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34245 }
34246 }
34247 }
34248 }
34249
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34250 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34251 for (uint32_t n = 5; n < 8; n++) {
34252 for (size_t k = 1; k <= 40; k += 9) {
34253 GemmMicrokernelTester()
34254 .mr(4)
34255 .nr(4)
34256 .kr(2)
34257 .sr(1)
34258 .m(4)
34259 .n(n)
34260 .k(k)
34261 .ks(3)
34262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34263 }
34264 }
34265 }
34266
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34267 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34268 for (uint32_t n = 8; n <= 12; n += 4) {
34269 for (size_t k = 1; k <= 40; k += 9) {
34270 GemmMicrokernelTester()
34271 .mr(4)
34272 .nr(4)
34273 .kr(2)
34274 .sr(1)
34275 .m(4)
34276 .n(n)
34277 .k(k)
34278 .ks(3)
34279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34280 }
34281 }
34282 }
34283
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34284 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34285 for (size_t k = 1; k <= 40; k += 9) {
34286 for (uint32_t n = 1; n <= 4; n++) {
34287 for (uint32_t m = 1; m <= 4; m++) {
34288 GemmMicrokernelTester()
34289 .mr(4)
34290 .nr(4)
34291 .kr(2)
34292 .sr(1)
34293 .m(m)
34294 .n(n)
34295 .k(k)
34296 .cm_stride(7)
34297 .iterations(1)
34298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34299 }
34300 }
34301 }
34302 }
34303
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,a_offset)34304 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
34305 for (size_t k = 1; k <= 40; k += 9) {
34306 GemmMicrokernelTester()
34307 .mr(4)
34308 .nr(4)
34309 .kr(2)
34310 .sr(1)
34311 .m(4)
34312 .n(4)
34313 .k(k)
34314 .ks(3)
34315 .a_offset(163)
34316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34317 }
34318 }
34319
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,zero)34320 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, zero) {
34321 for (size_t k = 1; k <= 40; k += 9) {
34322 for (uint32_t mz = 0; mz < 4; mz++) {
34323 GemmMicrokernelTester()
34324 .mr(4)
34325 .nr(4)
34326 .kr(2)
34327 .sr(1)
34328 .m(4)
34329 .n(4)
34330 .k(k)
34331 .ks(3)
34332 .a_offset(163)
34333 .zero_index(mz)
34334 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34335 }
34336 }
34337 }
34338
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)34339 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
34340 GemmMicrokernelTester()
34341 .mr(4)
34342 .nr(4)
34343 .kr(2)
34344 .sr(1)
34345 .m(4)
34346 .n(4)
34347 .k(8)
34348 .qmin(128)
34349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34350 }
34351
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)34352 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
34353 GemmMicrokernelTester()
34354 .mr(4)
34355 .nr(4)
34356 .kr(2)
34357 .sr(1)
34358 .m(4)
34359 .n(4)
34360 .k(8)
34361 .qmax(128)
34362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34363 }
34364
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)34365 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
34366 GemmMicrokernelTester()
34367 .mr(4)
34368 .nr(4)
34369 .kr(2)
34370 .sr(1)
34371 .m(4)
34372 .n(4)
34373 .k(8)
34374 .cm_stride(7)
34375 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34376 }
34377
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)34378 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
34379 for (size_t k = 1; k <= 40; k += 9) {
34380 GemmMicrokernelTester()
34381 .mr(4)
34382 .nr(4)
34383 .kr(2)
34384 .sr(1)
34385 .m(4)
34386 .n(4)
34387 .k(k)
34388 .a_zero_point(0)
34389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34390 }
34391 }
34392
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)34393 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
34394 for (size_t k = 1; k <= 40; k += 9) {
34395 GemmMicrokernelTester()
34396 .mr(4)
34397 .nr(4)
34398 .kr(2)
34399 .sr(1)
34400 .m(4)
34401 .n(4)
34402 .k(k)
34403 .b_zero_point(0)
34404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34405 }
34406 }
34407
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)34408 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
34409 for (size_t k = 1; k <= 40; k += 9) {
34410 GemmMicrokernelTester()
34411 .mr(4)
34412 .nr(4)
34413 .kr(2)
34414 .sr(1)
34415 .m(4)
34416 .n(4)
34417 .k(k)
34418 .a_zero_point(0)
34419 .b_zero_point(0)
34420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34421 }
34422 }
34423 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34424
34425
34426 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)34427 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34428 GemmMicrokernelTester()
34429 .mr(4)
34430 .nr(4)
34431 .kr(2)
34432 .sr(4)
34433 .m(4)
34434 .n(4)
34435 .k(8)
34436 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34437 }
34438
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)34439 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
34440 GemmMicrokernelTester()
34441 .mr(4)
34442 .nr(4)
34443 .kr(2)
34444 .sr(4)
34445 .m(4)
34446 .n(4)
34447 .k(8)
34448 .cn_stride(7)
34449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34450 }
34451
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)34452 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
34453 for (uint32_t n = 1; n <= 4; n++) {
34454 for (uint32_t m = 1; m <= 4; m++) {
34455 GemmMicrokernelTester()
34456 .mr(4)
34457 .nr(4)
34458 .kr(2)
34459 .sr(4)
34460 .m(m)
34461 .n(n)
34462 .k(8)
34463 .iterations(1)
34464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34465 }
34466 }
34467 }
34468
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)34469 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34470 for (uint32_t m = 1; m <= 4; m++) {
34471 GemmMicrokernelTester()
34472 .mr(4)
34473 .nr(4)
34474 .kr(2)
34475 .sr(4)
34476 .m(m)
34477 .n(4)
34478 .k(8)
34479 .iterations(1)
34480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34481 }
34482 }
34483
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34484 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34485 for (uint32_t n = 1; n <= 4; n++) {
34486 GemmMicrokernelTester()
34487 .mr(4)
34488 .nr(4)
34489 .kr(2)
34490 .sr(4)
34491 .m(4)
34492 .n(n)
34493 .k(8)
34494 .iterations(1)
34495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34496 }
34497 }
34498
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)34499 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34500 for (size_t k = 1; k < 8; k++) {
34501 GemmMicrokernelTester()
34502 .mr(4)
34503 .nr(4)
34504 .kr(2)
34505 .sr(4)
34506 .m(4)
34507 .n(4)
34508 .k(k)
34509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34510 }
34511 }
34512
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34513 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34514 for (size_t k = 1; k < 8; k++) {
34515 for (uint32_t n = 1; n <= 4; n++) {
34516 for (uint32_t m = 1; m <= 4; m++) {
34517 GemmMicrokernelTester()
34518 .mr(4)
34519 .nr(4)
34520 .kr(2)
34521 .sr(4)
34522 .m(m)
34523 .n(n)
34524 .k(k)
34525 .iterations(1)
34526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34527 }
34528 }
34529 }
34530 }
34531
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)34532 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34533 for (size_t k = 9; k < 16; k++) {
34534 GemmMicrokernelTester()
34535 .mr(4)
34536 .nr(4)
34537 .kr(2)
34538 .sr(4)
34539 .m(4)
34540 .n(4)
34541 .k(k)
34542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34543 }
34544 }
34545
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34546 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34547 for (size_t k = 9; k < 16; k++) {
34548 for (uint32_t n = 1; n <= 4; n++) {
34549 for (uint32_t m = 1; m <= 4; m++) {
34550 GemmMicrokernelTester()
34551 .mr(4)
34552 .nr(4)
34553 .kr(2)
34554 .sr(4)
34555 .m(m)
34556 .n(n)
34557 .k(k)
34558 .iterations(1)
34559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34560 }
34561 }
34562 }
34563 }
34564
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)34565 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
34566 for (size_t k = 16; k <= 80; k += 8) {
34567 GemmMicrokernelTester()
34568 .mr(4)
34569 .nr(4)
34570 .kr(2)
34571 .sr(4)
34572 .m(4)
34573 .n(4)
34574 .k(k)
34575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34576 }
34577 }
34578
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34579 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34580 for (size_t k = 16; k <= 80; k += 8) {
34581 for (uint32_t n = 1; n <= 4; n++) {
34582 for (uint32_t m = 1; m <= 4; m++) {
34583 GemmMicrokernelTester()
34584 .mr(4)
34585 .nr(4)
34586 .kr(2)
34587 .sr(4)
34588 .m(m)
34589 .n(n)
34590 .k(k)
34591 .iterations(1)
34592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34593 }
34594 }
34595 }
34596 }
34597
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)34598 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34599 for (uint32_t n = 5; n < 8; n++) {
34600 for (size_t k = 1; k <= 40; k += 9) {
34601 GemmMicrokernelTester()
34602 .mr(4)
34603 .nr(4)
34604 .kr(2)
34605 .sr(4)
34606 .m(4)
34607 .n(n)
34608 .k(k)
34609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34610 }
34611 }
34612 }
34613
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34614 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34615 for (uint32_t n = 5; n < 8; n++) {
34616 for (size_t k = 1; k <= 40; k += 9) {
34617 GemmMicrokernelTester()
34618 .mr(4)
34619 .nr(4)
34620 .kr(2)
34621 .sr(4)
34622 .m(4)
34623 .n(n)
34624 .k(k)
34625 .cn_stride(7)
34626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34627 }
34628 }
34629 }
34630
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34631 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34632 for (uint32_t n = 5; n < 8; n++) {
34633 for (size_t k = 1; k <= 40; k += 9) {
34634 for (uint32_t m = 1; m <= 4; m++) {
34635 GemmMicrokernelTester()
34636 .mr(4)
34637 .nr(4)
34638 .kr(2)
34639 .sr(4)
34640 .m(m)
34641 .n(n)
34642 .k(k)
34643 .iterations(1)
34644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34645 }
34646 }
34647 }
34648 }
34649
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)34650 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
34651 for (uint32_t n = 8; n <= 12; n += 4) {
34652 for (size_t k = 1; k <= 40; k += 9) {
34653 GemmMicrokernelTester()
34654 .mr(4)
34655 .nr(4)
34656 .kr(2)
34657 .sr(4)
34658 .m(4)
34659 .n(n)
34660 .k(k)
34661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34662 }
34663 }
34664 }
34665
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34666 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34667 for (uint32_t n = 8; n <= 12; n += 4) {
34668 for (size_t k = 1; k <= 40; k += 9) {
34669 GemmMicrokernelTester()
34670 .mr(4)
34671 .nr(4)
34672 .kr(2)
34673 .sr(4)
34674 .m(4)
34675 .n(n)
34676 .k(k)
34677 .cn_stride(7)
34678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34679 }
34680 }
34681 }
34682
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)34683 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34684 for (uint32_t n = 8; n <= 12; n += 4) {
34685 for (size_t k = 1; k <= 40; k += 9) {
34686 for (uint32_t m = 1; m <= 4; m++) {
34687 GemmMicrokernelTester()
34688 .mr(4)
34689 .nr(4)
34690 .kr(2)
34691 .sr(4)
34692 .m(m)
34693 .n(n)
34694 .k(k)
34695 .iterations(1)
34696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34697 }
34698 }
34699 }
34700 }
34701
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)34702 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
34703 for (size_t k = 1; k <= 40; k += 9) {
34704 GemmMicrokernelTester()
34705 .mr(4)
34706 .nr(4)
34707 .kr(2)
34708 .sr(4)
34709 .m(4)
34710 .n(4)
34711 .k(k)
34712 .ks(3)
34713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34714 }
34715 }
34716
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)34717 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34718 for (size_t k = 1; k <= 40; k += 9) {
34719 for (uint32_t n = 1; n <= 4; n++) {
34720 for (uint32_t m = 1; m <= 4; m++) {
34721 GemmMicrokernelTester()
34722 .mr(4)
34723 .nr(4)
34724 .kr(2)
34725 .sr(4)
34726 .m(m)
34727 .n(n)
34728 .k(k)
34729 .ks(3)
34730 .iterations(1)
34731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34732 }
34733 }
34734 }
34735 }
34736
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)34737 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34738 for (uint32_t n = 5; n < 8; n++) {
34739 for (size_t k = 1; k <= 40; k += 9) {
34740 GemmMicrokernelTester()
34741 .mr(4)
34742 .nr(4)
34743 .kr(2)
34744 .sr(4)
34745 .m(4)
34746 .n(n)
34747 .k(k)
34748 .ks(3)
34749 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34750 }
34751 }
34752 }
34753
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)34754 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34755 for (uint32_t n = 8; n <= 12; n += 4) {
34756 for (size_t k = 1; k <= 40; k += 9) {
34757 GemmMicrokernelTester()
34758 .mr(4)
34759 .nr(4)
34760 .kr(2)
34761 .sr(4)
34762 .m(4)
34763 .n(n)
34764 .k(k)
34765 .ks(3)
34766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34767 }
34768 }
34769 }
34770
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)34771 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34772 for (size_t k = 1; k <= 40; k += 9) {
34773 for (uint32_t n = 1; n <= 4; n++) {
34774 for (uint32_t m = 1; m <= 4; m++) {
34775 GemmMicrokernelTester()
34776 .mr(4)
34777 .nr(4)
34778 .kr(2)
34779 .sr(4)
34780 .m(m)
34781 .n(n)
34782 .k(k)
34783 .cm_stride(7)
34784 .iterations(1)
34785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34786 }
34787 }
34788 }
34789 }
34790
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)34791 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
34792 for (size_t k = 1; k <= 40; k += 9) {
34793 GemmMicrokernelTester()
34794 .mr(4)
34795 .nr(4)
34796 .kr(2)
34797 .sr(4)
34798 .m(4)
34799 .n(4)
34800 .k(k)
34801 .ks(3)
34802 .a_offset(163)
34803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34804 }
34805 }
34806
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,zero)34807 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
34808 for (size_t k = 1; k <= 40; k += 9) {
34809 for (uint32_t mz = 0; mz < 4; mz++) {
34810 GemmMicrokernelTester()
34811 .mr(4)
34812 .nr(4)
34813 .kr(2)
34814 .sr(4)
34815 .m(4)
34816 .n(4)
34817 .k(k)
34818 .ks(3)
34819 .a_offset(163)
34820 .zero_index(mz)
34821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34822 }
34823 }
34824 }
34825
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)34826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
34827 GemmMicrokernelTester()
34828 .mr(4)
34829 .nr(4)
34830 .kr(2)
34831 .sr(4)
34832 .m(4)
34833 .n(4)
34834 .k(8)
34835 .qmin(128)
34836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34837 }
34838
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)34839 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
34840 GemmMicrokernelTester()
34841 .mr(4)
34842 .nr(4)
34843 .kr(2)
34844 .sr(4)
34845 .m(4)
34846 .n(4)
34847 .k(8)
34848 .qmax(128)
34849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34850 }
34851
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)34852 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
34853 GemmMicrokernelTester()
34854 .mr(4)
34855 .nr(4)
34856 .kr(2)
34857 .sr(4)
34858 .m(4)
34859 .n(4)
34860 .k(8)
34861 .cm_stride(7)
34862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34863 }
34864
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)34865 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
34866 for (size_t k = 1; k <= 40; k += 9) {
34867 GemmMicrokernelTester()
34868 .mr(4)
34869 .nr(4)
34870 .kr(2)
34871 .sr(4)
34872 .m(4)
34873 .n(4)
34874 .k(k)
34875 .a_zero_point(0)
34876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34877 }
34878 }
34879
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)34880 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
34881 for (size_t k = 1; k <= 40; k += 9) {
34882 GemmMicrokernelTester()
34883 .mr(4)
34884 .nr(4)
34885 .kr(2)
34886 .sr(4)
34887 .m(4)
34888 .n(4)
34889 .k(k)
34890 .b_zero_point(0)
34891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34892 }
34893 }
34894
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)34895 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
34896 for (size_t k = 1; k <= 40; k += 9) {
34897 GemmMicrokernelTester()
34898 .mr(4)
34899 .nr(4)
34900 .kr(2)
34901 .sr(4)
34902 .m(4)
34903 .n(4)
34904 .k(k)
34905 .a_zero_point(0)
34906 .b_zero_point(0)
34907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34908 }
34909 }
34910 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34911
34912
34913 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)34914 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34915 GemmMicrokernelTester()
34916 .mr(4)
34917 .nr(4)
34918 .kr(2)
34919 .sr(4)
34920 .m(4)
34921 .n(4)
34922 .k(8)
34923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34924 }
34925
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)34926 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
34927 GemmMicrokernelTester()
34928 .mr(4)
34929 .nr(4)
34930 .kr(2)
34931 .sr(4)
34932 .m(4)
34933 .n(4)
34934 .k(8)
34935 .cn_stride(7)
34936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34937 }
34938
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34939 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34940 for (uint32_t n = 1; n <= 4; n++) {
34941 for (uint32_t m = 1; m <= 4; m++) {
34942 GemmMicrokernelTester()
34943 .mr(4)
34944 .nr(4)
34945 .kr(2)
34946 .sr(4)
34947 .m(m)
34948 .n(n)
34949 .k(8)
34950 .iterations(1)
34951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34952 }
34953 }
34954 }
34955
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34956 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34957 for (uint32_t m = 1; m <= 4; m++) {
34958 GemmMicrokernelTester()
34959 .mr(4)
34960 .nr(4)
34961 .kr(2)
34962 .sr(4)
34963 .m(m)
34964 .n(4)
34965 .k(8)
34966 .iterations(1)
34967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34968 }
34969 }
34970
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34971 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34972 for (uint32_t n = 1; n <= 4; n++) {
34973 GemmMicrokernelTester()
34974 .mr(4)
34975 .nr(4)
34976 .kr(2)
34977 .sr(4)
34978 .m(4)
34979 .n(n)
34980 .k(8)
34981 .iterations(1)
34982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34983 }
34984 }
34985
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)34986 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34987 for (size_t k = 1; k < 8; k++) {
34988 GemmMicrokernelTester()
34989 .mr(4)
34990 .nr(4)
34991 .kr(2)
34992 .sr(4)
34993 .m(4)
34994 .n(4)
34995 .k(k)
34996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34997 }
34998 }
34999
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35000 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35001 for (size_t k = 1; k < 8; k++) {
35002 for (uint32_t n = 1; n <= 4; n++) {
35003 for (uint32_t m = 1; m <= 4; m++) {
35004 GemmMicrokernelTester()
35005 .mr(4)
35006 .nr(4)
35007 .kr(2)
35008 .sr(4)
35009 .m(m)
35010 .n(n)
35011 .k(k)
35012 .iterations(1)
35013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35014 }
35015 }
35016 }
35017 }
35018
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)35019 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35020 for (size_t k = 9; k < 16; k++) {
35021 GemmMicrokernelTester()
35022 .mr(4)
35023 .nr(4)
35024 .kr(2)
35025 .sr(4)
35026 .m(4)
35027 .n(4)
35028 .k(k)
35029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35030 }
35031 }
35032
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)35033 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35034 for (size_t k = 9; k < 16; k++) {
35035 for (uint32_t n = 1; n <= 4; n++) {
35036 for (uint32_t m = 1; m <= 4; m++) {
35037 GemmMicrokernelTester()
35038 .mr(4)
35039 .nr(4)
35040 .kr(2)
35041 .sr(4)
35042 .m(m)
35043 .n(n)
35044 .k(k)
35045 .iterations(1)
35046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35047 }
35048 }
35049 }
35050 }
35051
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)35052 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
35053 for (size_t k = 16; k <= 80; k += 8) {
35054 GemmMicrokernelTester()
35055 .mr(4)
35056 .nr(4)
35057 .kr(2)
35058 .sr(4)
35059 .m(4)
35060 .n(4)
35061 .k(k)
35062 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35063 }
35064 }
35065
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)35066 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35067 for (size_t k = 16; k <= 80; k += 8) {
35068 for (uint32_t n = 1; n <= 4; n++) {
35069 for (uint32_t m = 1; m <= 4; m++) {
35070 GemmMicrokernelTester()
35071 .mr(4)
35072 .nr(4)
35073 .kr(2)
35074 .sr(4)
35075 .m(m)
35076 .n(n)
35077 .k(k)
35078 .iterations(1)
35079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35080 }
35081 }
35082 }
35083 }
35084
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)35085 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35086 for (uint32_t n = 5; n < 8; n++) {
35087 for (size_t k = 1; k <= 40; k += 9) {
35088 GemmMicrokernelTester()
35089 .mr(4)
35090 .nr(4)
35091 .kr(2)
35092 .sr(4)
35093 .m(4)
35094 .n(n)
35095 .k(k)
35096 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35097 }
35098 }
35099 }
35100
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)35101 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35102 for (uint32_t n = 5; n < 8; n++) {
35103 for (size_t k = 1; k <= 40; k += 9) {
35104 GemmMicrokernelTester()
35105 .mr(4)
35106 .nr(4)
35107 .kr(2)
35108 .sr(4)
35109 .m(4)
35110 .n(n)
35111 .k(k)
35112 .cn_stride(7)
35113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35114 }
35115 }
35116 }
35117
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)35118 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35119 for (uint32_t n = 5; n < 8; n++) {
35120 for (size_t k = 1; k <= 40; k += 9) {
35121 for (uint32_t m = 1; m <= 4; m++) {
35122 GemmMicrokernelTester()
35123 .mr(4)
35124 .nr(4)
35125 .kr(2)
35126 .sr(4)
35127 .m(m)
35128 .n(n)
35129 .k(k)
35130 .iterations(1)
35131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35132 }
35133 }
35134 }
35135 }
35136
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)35137 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
35138 for (uint32_t n = 8; n <= 12; n += 4) {
35139 for (size_t k = 1; k <= 40; k += 9) {
35140 GemmMicrokernelTester()
35141 .mr(4)
35142 .nr(4)
35143 .kr(2)
35144 .sr(4)
35145 .m(4)
35146 .n(n)
35147 .k(k)
35148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35149 }
35150 }
35151 }
35152
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)35153 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35154 for (uint32_t n = 8; n <= 12; n += 4) {
35155 for (size_t k = 1; k <= 40; k += 9) {
35156 GemmMicrokernelTester()
35157 .mr(4)
35158 .nr(4)
35159 .kr(2)
35160 .sr(4)
35161 .m(4)
35162 .n(n)
35163 .k(k)
35164 .cn_stride(7)
35165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35166 }
35167 }
35168 }
35169
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)35170 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35171 for (uint32_t n = 8; n <= 12; n += 4) {
35172 for (size_t k = 1; k <= 40; k += 9) {
35173 for (uint32_t m = 1; m <= 4; m++) {
35174 GemmMicrokernelTester()
35175 .mr(4)
35176 .nr(4)
35177 .kr(2)
35178 .sr(4)
35179 .m(m)
35180 .n(n)
35181 .k(k)
35182 .iterations(1)
35183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35184 }
35185 }
35186 }
35187 }
35188
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)35189 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
35190 for (size_t k = 1; k <= 40; k += 9) {
35191 GemmMicrokernelTester()
35192 .mr(4)
35193 .nr(4)
35194 .kr(2)
35195 .sr(4)
35196 .m(4)
35197 .n(4)
35198 .k(k)
35199 .ks(3)
35200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35201 }
35202 }
35203
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)35204 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
35205 for (size_t k = 1; k <= 40; k += 9) {
35206 for (uint32_t n = 1; n <= 4; n++) {
35207 for (uint32_t m = 1; m <= 4; m++) {
35208 GemmMicrokernelTester()
35209 .mr(4)
35210 .nr(4)
35211 .kr(2)
35212 .sr(4)
35213 .m(m)
35214 .n(n)
35215 .k(k)
35216 .ks(3)
35217 .iterations(1)
35218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35219 }
35220 }
35221 }
35222 }
35223
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)35224 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
35225 for (uint32_t n = 5; n < 8; n++) {
35226 for (size_t k = 1; k <= 40; k += 9) {
35227 GemmMicrokernelTester()
35228 .mr(4)
35229 .nr(4)
35230 .kr(2)
35231 .sr(4)
35232 .m(4)
35233 .n(n)
35234 .k(k)
35235 .ks(3)
35236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35237 }
35238 }
35239 }
35240
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)35241 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
35242 for (uint32_t n = 8; n <= 12; n += 4) {
35243 for (size_t k = 1; k <= 40; k += 9) {
35244 GemmMicrokernelTester()
35245 .mr(4)
35246 .nr(4)
35247 .kr(2)
35248 .sr(4)
35249 .m(4)
35250 .n(n)
35251 .k(k)
35252 .ks(3)
35253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35254 }
35255 }
35256 }
35257
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)35258 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35259 for (size_t k = 1; k <= 40; k += 9) {
35260 for (uint32_t n = 1; n <= 4; n++) {
35261 for (uint32_t m = 1; m <= 4; m++) {
35262 GemmMicrokernelTester()
35263 .mr(4)
35264 .nr(4)
35265 .kr(2)
35266 .sr(4)
35267 .m(m)
35268 .n(n)
35269 .k(k)
35270 .cm_stride(7)
35271 .iterations(1)
35272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35273 }
35274 }
35275 }
35276 }
35277
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)35278 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
35279 for (size_t k = 1; k <= 40; k += 9) {
35280 GemmMicrokernelTester()
35281 .mr(4)
35282 .nr(4)
35283 .kr(2)
35284 .sr(4)
35285 .m(4)
35286 .n(4)
35287 .k(k)
35288 .ks(3)
35289 .a_offset(163)
35290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35291 }
35292 }
35293
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,zero)35294 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
35295 for (size_t k = 1; k <= 40; k += 9) {
35296 for (uint32_t mz = 0; mz < 4; mz++) {
35297 GemmMicrokernelTester()
35298 .mr(4)
35299 .nr(4)
35300 .kr(2)
35301 .sr(4)
35302 .m(4)
35303 .n(4)
35304 .k(k)
35305 .ks(3)
35306 .a_offset(163)
35307 .zero_index(mz)
35308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35309 }
35310 }
35311 }
35312
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)35313 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
35314 GemmMicrokernelTester()
35315 .mr(4)
35316 .nr(4)
35317 .kr(2)
35318 .sr(4)
35319 .m(4)
35320 .n(4)
35321 .k(8)
35322 .qmin(128)
35323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35324 }
35325
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)35326 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
35327 GemmMicrokernelTester()
35328 .mr(4)
35329 .nr(4)
35330 .kr(2)
35331 .sr(4)
35332 .m(4)
35333 .n(4)
35334 .k(8)
35335 .qmax(128)
35336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35337 }
35338
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)35339 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
35340 GemmMicrokernelTester()
35341 .mr(4)
35342 .nr(4)
35343 .kr(2)
35344 .sr(4)
35345 .m(4)
35346 .n(4)
35347 .k(8)
35348 .cm_stride(7)
35349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35350 }
35351
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)35352 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
35353 for (size_t k = 1; k <= 40; k += 9) {
35354 GemmMicrokernelTester()
35355 .mr(4)
35356 .nr(4)
35357 .kr(2)
35358 .sr(4)
35359 .m(4)
35360 .n(4)
35361 .k(k)
35362 .a_zero_point(0)
35363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35364 }
35365 }
35366
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)35367 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
35368 for (size_t k = 1; k <= 40; k += 9) {
35369 GemmMicrokernelTester()
35370 .mr(4)
35371 .nr(4)
35372 .kr(2)
35373 .sr(4)
35374 .m(4)
35375 .n(4)
35376 .k(k)
35377 .b_zero_point(0)
35378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35379 }
35380 }
35381
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)35382 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
35383 for (size_t k = 1; k <= 40; k += 9) {
35384 GemmMicrokernelTester()
35385 .mr(4)
35386 .nr(4)
35387 .kr(2)
35388 .sr(4)
35389 .m(4)
35390 .n(4)
35391 .k(k)
35392 .a_zero_point(0)
35393 .b_zero_point(0)
35394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35395 }
35396 }
35397 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35398
35399
35400 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)35401 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
35402 GemmMicrokernelTester()
35403 .mr(4)
35404 .nr(4)
35405 .kr(8)
35406 .sr(1)
35407 .m(4)
35408 .n(4)
35409 .k(8)
35410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35411 }
35412
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)35413 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
35414 GemmMicrokernelTester()
35415 .mr(4)
35416 .nr(4)
35417 .kr(8)
35418 .sr(1)
35419 .m(4)
35420 .n(4)
35421 .k(8)
35422 .cn_stride(7)
35423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35424 }
35425
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)35426 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
35427 for (uint32_t n = 1; n <= 4; n++) {
35428 for (uint32_t m = 1; m <= 4; m++) {
35429 GemmMicrokernelTester()
35430 .mr(4)
35431 .nr(4)
35432 .kr(8)
35433 .sr(1)
35434 .m(m)
35435 .n(n)
35436 .k(8)
35437 .iterations(1)
35438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35439 }
35440 }
35441 }
35442
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)35443 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
35444 for (uint32_t m = 1; m <= 4; m++) {
35445 GemmMicrokernelTester()
35446 .mr(4)
35447 .nr(4)
35448 .kr(8)
35449 .sr(1)
35450 .m(m)
35451 .n(4)
35452 .k(8)
35453 .iterations(1)
35454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35455 }
35456 }
35457
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)35458 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
35459 for (uint32_t n = 1; n <= 4; n++) {
35460 GemmMicrokernelTester()
35461 .mr(4)
35462 .nr(4)
35463 .kr(8)
35464 .sr(1)
35465 .m(4)
35466 .n(n)
35467 .k(8)
35468 .iterations(1)
35469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35470 }
35471 }
35472
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)35473 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
35474 for (size_t k = 1; k < 8; k++) {
35475 GemmMicrokernelTester()
35476 .mr(4)
35477 .nr(4)
35478 .kr(8)
35479 .sr(1)
35480 .m(4)
35481 .n(4)
35482 .k(k)
35483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35484 }
35485 }
35486
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)35487 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
35488 for (size_t k = 1; k < 8; k++) {
35489 for (uint32_t n = 1; n <= 4; n++) {
35490 for (uint32_t m = 1; m <= 4; m++) {
35491 GemmMicrokernelTester()
35492 .mr(4)
35493 .nr(4)
35494 .kr(8)
35495 .sr(1)
35496 .m(m)
35497 .n(n)
35498 .k(k)
35499 .iterations(1)
35500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35501 }
35502 }
35503 }
35504 }
35505
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)35506 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
35507 for (size_t k = 9; k < 16; k++) {
35508 GemmMicrokernelTester()
35509 .mr(4)
35510 .nr(4)
35511 .kr(8)
35512 .sr(1)
35513 .m(4)
35514 .n(4)
35515 .k(k)
35516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35517 }
35518 }
35519
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)35520 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
35521 for (size_t k = 9; k < 16; k++) {
35522 for (uint32_t n = 1; n <= 4; n++) {
35523 for (uint32_t m = 1; m <= 4; m++) {
35524 GemmMicrokernelTester()
35525 .mr(4)
35526 .nr(4)
35527 .kr(8)
35528 .sr(1)
35529 .m(m)
35530 .n(n)
35531 .k(k)
35532 .iterations(1)
35533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35534 }
35535 }
35536 }
35537 }
35538
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)35539 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
35540 for (size_t k = 16; k <= 80; k += 8) {
35541 GemmMicrokernelTester()
35542 .mr(4)
35543 .nr(4)
35544 .kr(8)
35545 .sr(1)
35546 .m(4)
35547 .n(4)
35548 .k(k)
35549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35550 }
35551 }
35552
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)35553 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
35554 for (size_t k = 16; k <= 80; k += 8) {
35555 for (uint32_t n = 1; n <= 4; n++) {
35556 for (uint32_t m = 1; m <= 4; m++) {
35557 GemmMicrokernelTester()
35558 .mr(4)
35559 .nr(4)
35560 .kr(8)
35561 .sr(1)
35562 .m(m)
35563 .n(n)
35564 .k(k)
35565 .iterations(1)
35566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35567 }
35568 }
35569 }
35570 }
35571
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)35572 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
35573 for (uint32_t n = 5; n < 8; n++) {
35574 for (size_t k = 1; k <= 40; k += 9) {
35575 GemmMicrokernelTester()
35576 .mr(4)
35577 .nr(4)
35578 .kr(8)
35579 .sr(1)
35580 .m(4)
35581 .n(n)
35582 .k(k)
35583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35584 }
35585 }
35586 }
35587
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)35588 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
35589 for (uint32_t n = 5; n < 8; n++) {
35590 for (size_t k = 1; k <= 40; k += 9) {
35591 GemmMicrokernelTester()
35592 .mr(4)
35593 .nr(4)
35594 .kr(8)
35595 .sr(1)
35596 .m(4)
35597 .n(n)
35598 .k(k)
35599 .cn_stride(7)
35600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35601 }
35602 }
35603 }
35604
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)35605 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
35606 for (uint32_t n = 5; n < 8; n++) {
35607 for (size_t k = 1; k <= 40; k += 9) {
35608 for (uint32_t m = 1; m <= 4; m++) {
35609 GemmMicrokernelTester()
35610 .mr(4)
35611 .nr(4)
35612 .kr(8)
35613 .sr(1)
35614 .m(m)
35615 .n(n)
35616 .k(k)
35617 .iterations(1)
35618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35619 }
35620 }
35621 }
35622 }
35623
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)35624 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
35625 for (uint32_t n = 8; n <= 12; n += 4) {
35626 for (size_t k = 1; k <= 40; k += 9) {
35627 GemmMicrokernelTester()
35628 .mr(4)
35629 .nr(4)
35630 .kr(8)
35631 .sr(1)
35632 .m(4)
35633 .n(n)
35634 .k(k)
35635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35636 }
35637 }
35638 }
35639
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)35640 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
35641 for (uint32_t n = 8; n <= 12; n += 4) {
35642 for (size_t k = 1; k <= 40; k += 9) {
35643 GemmMicrokernelTester()
35644 .mr(4)
35645 .nr(4)
35646 .kr(8)
35647 .sr(1)
35648 .m(4)
35649 .n(n)
35650 .k(k)
35651 .cn_stride(7)
35652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35653 }
35654 }
35655 }
35656
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)35657 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
35658 for (uint32_t n = 8; n <= 12; n += 4) {
35659 for (size_t k = 1; k <= 40; k += 9) {
35660 for (uint32_t m = 1; m <= 4; m++) {
35661 GemmMicrokernelTester()
35662 .mr(4)
35663 .nr(4)
35664 .kr(8)
35665 .sr(1)
35666 .m(m)
35667 .n(n)
35668 .k(k)
35669 .iterations(1)
35670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35671 }
35672 }
35673 }
35674 }
35675
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)35676 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
35677 for (size_t k = 1; k <= 40; k += 9) {
35678 GemmMicrokernelTester()
35679 .mr(4)
35680 .nr(4)
35681 .kr(8)
35682 .sr(1)
35683 .m(4)
35684 .n(4)
35685 .k(k)
35686 .ks(3)
35687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35688 }
35689 }
35690
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)35691 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
35692 for (size_t k = 1; k <= 40; k += 9) {
35693 for (uint32_t n = 1; n <= 4; n++) {
35694 for (uint32_t m = 1; m <= 4; m++) {
35695 GemmMicrokernelTester()
35696 .mr(4)
35697 .nr(4)
35698 .kr(8)
35699 .sr(1)
35700 .m(m)
35701 .n(n)
35702 .k(k)
35703 .ks(3)
35704 .iterations(1)
35705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35706 }
35707 }
35708 }
35709 }
35710
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)35711 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
35712 for (uint32_t n = 5; n < 8; n++) {
35713 for (size_t k = 1; k <= 40; k += 9) {
35714 GemmMicrokernelTester()
35715 .mr(4)
35716 .nr(4)
35717 .kr(8)
35718 .sr(1)
35719 .m(4)
35720 .n(n)
35721 .k(k)
35722 .ks(3)
35723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35724 }
35725 }
35726 }
35727
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)35728 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
35729 for (uint32_t n = 8; n <= 12; n += 4) {
35730 for (size_t k = 1; k <= 40; k += 9) {
35731 GemmMicrokernelTester()
35732 .mr(4)
35733 .nr(4)
35734 .kr(8)
35735 .sr(1)
35736 .m(4)
35737 .n(n)
35738 .k(k)
35739 .ks(3)
35740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35741 }
35742 }
35743 }
35744
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)35745 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
35746 for (size_t k = 1; k <= 40; k += 9) {
35747 for (uint32_t n = 1; n <= 4; n++) {
35748 for (uint32_t m = 1; m <= 4; m++) {
35749 GemmMicrokernelTester()
35750 .mr(4)
35751 .nr(4)
35752 .kr(8)
35753 .sr(1)
35754 .m(m)
35755 .n(n)
35756 .k(k)
35757 .cm_stride(7)
35758 .iterations(1)
35759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35760 }
35761 }
35762 }
35763 }
35764
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,a_offset)35765 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
35766 for (size_t k = 1; k <= 40; k += 9) {
35767 GemmMicrokernelTester()
35768 .mr(4)
35769 .nr(4)
35770 .kr(8)
35771 .sr(1)
35772 .m(4)
35773 .n(4)
35774 .k(k)
35775 .ks(3)
35776 .a_offset(163)
35777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35778 }
35779 }
35780
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,zero)35781 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
35782 for (size_t k = 1; k <= 40; k += 9) {
35783 for (uint32_t mz = 0; mz < 4; mz++) {
35784 GemmMicrokernelTester()
35785 .mr(4)
35786 .nr(4)
35787 .kr(8)
35788 .sr(1)
35789 .m(4)
35790 .n(4)
35791 .k(k)
35792 .ks(3)
35793 .a_offset(163)
35794 .zero_index(mz)
35795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35796 }
35797 }
35798 }
35799
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)35800 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
35801 GemmMicrokernelTester()
35802 .mr(4)
35803 .nr(4)
35804 .kr(8)
35805 .sr(1)
35806 .m(4)
35807 .n(4)
35808 .k(8)
35809 .qmin(128)
35810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35811 }
35812
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)35813 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
35814 GemmMicrokernelTester()
35815 .mr(4)
35816 .nr(4)
35817 .kr(8)
35818 .sr(1)
35819 .m(4)
35820 .n(4)
35821 .k(8)
35822 .qmax(128)
35823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35824 }
35825
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)35826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
35827 GemmMicrokernelTester()
35828 .mr(4)
35829 .nr(4)
35830 .kr(8)
35831 .sr(1)
35832 .m(4)
35833 .n(4)
35834 .k(8)
35835 .cm_stride(7)
35836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35837 }
35838
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)35839 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
35840 for (size_t k = 1; k <= 40; k += 9) {
35841 GemmMicrokernelTester()
35842 .mr(4)
35843 .nr(4)
35844 .kr(8)
35845 .sr(1)
35846 .m(4)
35847 .n(4)
35848 .k(k)
35849 .a_zero_point(0)
35850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35851 }
35852 }
35853
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)35854 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
35855 for (size_t k = 1; k <= 40; k += 9) {
35856 GemmMicrokernelTester()
35857 .mr(4)
35858 .nr(4)
35859 .kr(8)
35860 .sr(1)
35861 .m(4)
35862 .n(4)
35863 .k(k)
35864 .b_zero_point(0)
35865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35866 }
35867 }
35868
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)35869 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
35870 for (size_t k = 1; k <= 40; k += 9) {
35871 GemmMicrokernelTester()
35872 .mr(4)
35873 .nr(4)
35874 .kr(8)
35875 .sr(1)
35876 .m(4)
35877 .n(4)
35878 .k(k)
35879 .a_zero_point(0)
35880 .b_zero_point(0)
35881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35882 }
35883 }
35884 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35885
35886
35887 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)35888 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35889 GemmMicrokernelTester()
35890 .mr(4)
35891 .nr(4)
35892 .kr(8)
35893 .sr(1)
35894 .m(4)
35895 .n(4)
35896 .k(8)
35897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35898 }
35899
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)35900 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
35901 GemmMicrokernelTester()
35902 .mr(4)
35903 .nr(4)
35904 .kr(8)
35905 .sr(1)
35906 .m(4)
35907 .n(4)
35908 .k(8)
35909 .cn_stride(7)
35910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35911 }
35912
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)35913 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
35914 for (uint32_t n = 1; n <= 4; n++) {
35915 for (uint32_t m = 1; m <= 4; m++) {
35916 GemmMicrokernelTester()
35917 .mr(4)
35918 .nr(4)
35919 .kr(8)
35920 .sr(1)
35921 .m(m)
35922 .n(n)
35923 .k(8)
35924 .iterations(1)
35925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35926 }
35927 }
35928 }
35929
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)35930 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35931 for (uint32_t m = 1; m <= 4; m++) {
35932 GemmMicrokernelTester()
35933 .mr(4)
35934 .nr(4)
35935 .kr(8)
35936 .sr(1)
35937 .m(m)
35938 .n(4)
35939 .k(8)
35940 .iterations(1)
35941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35942 }
35943 }
35944
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)35945 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35946 for (uint32_t n = 1; n <= 4; n++) {
35947 GemmMicrokernelTester()
35948 .mr(4)
35949 .nr(4)
35950 .kr(8)
35951 .sr(1)
35952 .m(4)
35953 .n(n)
35954 .k(8)
35955 .iterations(1)
35956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35957 }
35958 }
35959
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)35960 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35961 for (size_t k = 1; k < 8; k++) {
35962 GemmMicrokernelTester()
35963 .mr(4)
35964 .nr(4)
35965 .kr(8)
35966 .sr(1)
35967 .m(4)
35968 .n(4)
35969 .k(k)
35970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35971 }
35972 }
35973
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35974 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35975 for (size_t k = 1; k < 8; k++) {
35976 for (uint32_t n = 1; n <= 4; n++) {
35977 for (uint32_t m = 1; m <= 4; m++) {
35978 GemmMicrokernelTester()
35979 .mr(4)
35980 .nr(4)
35981 .kr(8)
35982 .sr(1)
35983 .m(m)
35984 .n(n)
35985 .k(k)
35986 .iterations(1)
35987 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35988 }
35989 }
35990 }
35991 }
35992
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)35993 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35994 for (size_t k = 9; k < 16; k++) {
35995 GemmMicrokernelTester()
35996 .mr(4)
35997 .nr(4)
35998 .kr(8)
35999 .sr(1)
36000 .m(4)
36001 .n(4)
36002 .k(k)
36003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36004 }
36005 }
36006
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)36007 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
36008 for (size_t k = 9; k < 16; k++) {
36009 for (uint32_t n = 1; n <= 4; n++) {
36010 for (uint32_t m = 1; m <= 4; m++) {
36011 GemmMicrokernelTester()
36012 .mr(4)
36013 .nr(4)
36014 .kr(8)
36015 .sr(1)
36016 .m(m)
36017 .n(n)
36018 .k(k)
36019 .iterations(1)
36020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36021 }
36022 }
36023 }
36024 }
36025
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)36026 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
36027 for (size_t k = 16; k <= 80; k += 8) {
36028 GemmMicrokernelTester()
36029 .mr(4)
36030 .nr(4)
36031 .kr(8)
36032 .sr(1)
36033 .m(4)
36034 .n(4)
36035 .k(k)
36036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36037 }
36038 }
36039
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)36040 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
36041 for (size_t k = 16; k <= 80; k += 8) {
36042 for (uint32_t n = 1; n <= 4; n++) {
36043 for (uint32_t m = 1; m <= 4; m++) {
36044 GemmMicrokernelTester()
36045 .mr(4)
36046 .nr(4)
36047 .kr(8)
36048 .sr(1)
36049 .m(m)
36050 .n(n)
36051 .k(k)
36052 .iterations(1)
36053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36054 }
36055 }
36056 }
36057 }
36058
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)36059 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
36060 for (uint32_t n = 5; n < 8; n++) {
36061 for (size_t k = 1; k <= 40; k += 9) {
36062 GemmMicrokernelTester()
36063 .mr(4)
36064 .nr(4)
36065 .kr(8)
36066 .sr(1)
36067 .m(4)
36068 .n(n)
36069 .k(k)
36070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36071 }
36072 }
36073 }
36074
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)36075 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
36076 for (uint32_t n = 5; n < 8; n++) {
36077 for (size_t k = 1; k <= 40; k += 9) {
36078 GemmMicrokernelTester()
36079 .mr(4)
36080 .nr(4)
36081 .kr(8)
36082 .sr(1)
36083 .m(4)
36084 .n(n)
36085 .k(k)
36086 .cn_stride(7)
36087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36088 }
36089 }
36090 }
36091
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)36092 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
36093 for (uint32_t n = 5; n < 8; n++) {
36094 for (size_t k = 1; k <= 40; k += 9) {
36095 for (uint32_t m = 1; m <= 4; m++) {
36096 GemmMicrokernelTester()
36097 .mr(4)
36098 .nr(4)
36099 .kr(8)
36100 .sr(1)
36101 .m(m)
36102 .n(n)
36103 .k(k)
36104 .iterations(1)
36105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36106 }
36107 }
36108 }
36109 }
36110
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)36111 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
36112 for (uint32_t n = 8; n <= 12; n += 4) {
36113 for (size_t k = 1; k <= 40; k += 9) {
36114 GemmMicrokernelTester()
36115 .mr(4)
36116 .nr(4)
36117 .kr(8)
36118 .sr(1)
36119 .m(4)
36120 .n(n)
36121 .k(k)
36122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36123 }
36124 }
36125 }
36126
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)36127 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
36128 for (uint32_t n = 8; n <= 12; n += 4) {
36129 for (size_t k = 1; k <= 40; k += 9) {
36130 GemmMicrokernelTester()
36131 .mr(4)
36132 .nr(4)
36133 .kr(8)
36134 .sr(1)
36135 .m(4)
36136 .n(n)
36137 .k(k)
36138 .cn_stride(7)
36139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36140 }
36141 }
36142 }
36143
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)36144 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
36145 for (uint32_t n = 8; n <= 12; n += 4) {
36146 for (size_t k = 1; k <= 40; k += 9) {
36147 for (uint32_t m = 1; m <= 4; m++) {
36148 GemmMicrokernelTester()
36149 .mr(4)
36150 .nr(4)
36151 .kr(8)
36152 .sr(1)
36153 .m(m)
36154 .n(n)
36155 .k(k)
36156 .iterations(1)
36157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36158 }
36159 }
36160 }
36161 }
36162
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)36163 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
36164 for (size_t k = 1; k <= 40; k += 9) {
36165 GemmMicrokernelTester()
36166 .mr(4)
36167 .nr(4)
36168 .kr(8)
36169 .sr(1)
36170 .m(4)
36171 .n(4)
36172 .k(k)
36173 .ks(3)
36174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36175 }
36176 }
36177
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)36178 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
36179 for (size_t k = 1; k <= 40; k += 9) {
36180 for (uint32_t n = 1; n <= 4; n++) {
36181 for (uint32_t m = 1; m <= 4; m++) {
36182 GemmMicrokernelTester()
36183 .mr(4)
36184 .nr(4)
36185 .kr(8)
36186 .sr(1)
36187 .m(m)
36188 .n(n)
36189 .k(k)
36190 .ks(3)
36191 .iterations(1)
36192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36193 }
36194 }
36195 }
36196 }
36197
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)36198 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
36199 for (uint32_t n = 5; n < 8; n++) {
36200 for (size_t k = 1; k <= 40; k += 9) {
36201 GemmMicrokernelTester()
36202 .mr(4)
36203 .nr(4)
36204 .kr(8)
36205 .sr(1)
36206 .m(4)
36207 .n(n)
36208 .k(k)
36209 .ks(3)
36210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36211 }
36212 }
36213 }
36214
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)36215 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
36216 for (uint32_t n = 8; n <= 12; n += 4) {
36217 for (size_t k = 1; k <= 40; k += 9) {
36218 GemmMicrokernelTester()
36219 .mr(4)
36220 .nr(4)
36221 .kr(8)
36222 .sr(1)
36223 .m(4)
36224 .n(n)
36225 .k(k)
36226 .ks(3)
36227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36228 }
36229 }
36230 }
36231
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)36232 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
36233 for (size_t k = 1; k <= 40; k += 9) {
36234 for (uint32_t n = 1; n <= 4; n++) {
36235 for (uint32_t m = 1; m <= 4; m++) {
36236 GemmMicrokernelTester()
36237 .mr(4)
36238 .nr(4)
36239 .kr(8)
36240 .sr(1)
36241 .m(m)
36242 .n(n)
36243 .k(k)
36244 .cm_stride(7)
36245 .iterations(1)
36246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36247 }
36248 }
36249 }
36250 }
36251
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,a_offset)36252 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
36253 for (size_t k = 1; k <= 40; k += 9) {
36254 GemmMicrokernelTester()
36255 .mr(4)
36256 .nr(4)
36257 .kr(8)
36258 .sr(1)
36259 .m(4)
36260 .n(4)
36261 .k(k)
36262 .ks(3)
36263 .a_offset(163)
36264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36265 }
36266 }
36267
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,zero)36268 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, zero) {
36269 for (size_t k = 1; k <= 40; k += 9) {
36270 for (uint32_t mz = 0; mz < 4; mz++) {
36271 GemmMicrokernelTester()
36272 .mr(4)
36273 .nr(4)
36274 .kr(8)
36275 .sr(1)
36276 .m(4)
36277 .n(4)
36278 .k(k)
36279 .ks(3)
36280 .a_offset(163)
36281 .zero_index(mz)
36282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36283 }
36284 }
36285 }
36286
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)36287 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
36288 GemmMicrokernelTester()
36289 .mr(4)
36290 .nr(4)
36291 .kr(8)
36292 .sr(1)
36293 .m(4)
36294 .n(4)
36295 .k(8)
36296 .qmin(128)
36297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36298 }
36299
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)36300 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
36301 GemmMicrokernelTester()
36302 .mr(4)
36303 .nr(4)
36304 .kr(8)
36305 .sr(1)
36306 .m(4)
36307 .n(4)
36308 .k(8)
36309 .qmax(128)
36310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36311 }
36312
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)36313 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
36314 GemmMicrokernelTester()
36315 .mr(4)
36316 .nr(4)
36317 .kr(8)
36318 .sr(1)
36319 .m(4)
36320 .n(4)
36321 .k(8)
36322 .cm_stride(7)
36323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36324 }
36325
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)36326 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
36327 for (size_t k = 1; k <= 40; k += 9) {
36328 GemmMicrokernelTester()
36329 .mr(4)
36330 .nr(4)
36331 .kr(8)
36332 .sr(1)
36333 .m(4)
36334 .n(4)
36335 .k(k)
36336 .a_zero_point(0)
36337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36338 }
36339 }
36340
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)36341 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
36342 for (size_t k = 1; k <= 40; k += 9) {
36343 GemmMicrokernelTester()
36344 .mr(4)
36345 .nr(4)
36346 .kr(8)
36347 .sr(1)
36348 .m(4)
36349 .n(4)
36350 .k(k)
36351 .b_zero_point(0)
36352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36353 }
36354 }
36355
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)36356 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
36357 for (size_t k = 1; k <= 40; k += 9) {
36358 GemmMicrokernelTester()
36359 .mr(4)
36360 .nr(4)
36361 .kr(8)
36362 .sr(1)
36363 .m(4)
36364 .n(4)
36365 .k(k)
36366 .a_zero_point(0)
36367 .b_zero_point(0)
36368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36369 }
36370 }
36371 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36372
36373
36374 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)36375 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
36376 GemmMicrokernelTester()
36377 .mr(1)
36378 .nr(2)
36379 .kr(1)
36380 .sr(1)
36381 .m(1)
36382 .n(2)
36383 .k(1)
36384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36385 }
36386
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)36387 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
36388 GemmMicrokernelTester()
36389 .mr(1)
36390 .nr(2)
36391 .kr(1)
36392 .sr(1)
36393 .m(1)
36394 .n(2)
36395 .k(1)
36396 .cn_stride(5)
36397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36398 }
36399
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)36400 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
36401 for (uint32_t n = 1; n <= 2; n++) {
36402 for (uint32_t m = 1; m <= 1; m++) {
36403 GemmMicrokernelTester()
36404 .mr(1)
36405 .nr(2)
36406 .kr(1)
36407 .sr(1)
36408 .m(m)
36409 .n(n)
36410 .k(1)
36411 .iterations(1)
36412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36413 }
36414 }
36415 }
36416
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)36417 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
36418 for (uint32_t m = 1; m <= 1; m++) {
36419 GemmMicrokernelTester()
36420 .mr(1)
36421 .nr(2)
36422 .kr(1)
36423 .sr(1)
36424 .m(m)
36425 .n(2)
36426 .k(1)
36427 .iterations(1)
36428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36429 }
36430 }
36431
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)36432 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
36433 for (uint32_t n = 1; n <= 2; n++) {
36434 GemmMicrokernelTester()
36435 .mr(1)
36436 .nr(2)
36437 .kr(1)
36438 .sr(1)
36439 .m(1)
36440 .n(n)
36441 .k(1)
36442 .iterations(1)
36443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36444 }
36445 }
36446
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)36447 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
36448 for (size_t k = 2; k < 10; k++) {
36449 GemmMicrokernelTester()
36450 .mr(1)
36451 .nr(2)
36452 .kr(1)
36453 .sr(1)
36454 .m(1)
36455 .n(2)
36456 .k(k)
36457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36458 }
36459 }
36460
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)36461 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
36462 for (size_t k = 2; k < 10; k++) {
36463 for (uint32_t n = 1; n <= 2; n++) {
36464 for (uint32_t m = 1; m <= 1; m++) {
36465 GemmMicrokernelTester()
36466 .mr(1)
36467 .nr(2)
36468 .kr(1)
36469 .sr(1)
36470 .m(m)
36471 .n(n)
36472 .k(k)
36473 .iterations(1)
36474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36475 }
36476 }
36477 }
36478 }
36479
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)36480 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
36481 for (uint32_t n = 3; n < 4; n++) {
36482 for (size_t k = 1; k <= 5; k += 2) {
36483 GemmMicrokernelTester()
36484 .mr(1)
36485 .nr(2)
36486 .kr(1)
36487 .sr(1)
36488 .m(1)
36489 .n(n)
36490 .k(k)
36491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36492 }
36493 }
36494 }
36495
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)36496 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
36497 for (uint32_t n = 3; n < 4; n++) {
36498 for (size_t k = 1; k <= 5; k += 2) {
36499 GemmMicrokernelTester()
36500 .mr(1)
36501 .nr(2)
36502 .kr(1)
36503 .sr(1)
36504 .m(1)
36505 .n(n)
36506 .k(k)
36507 .cn_stride(5)
36508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36509 }
36510 }
36511 }
36512
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)36513 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
36514 for (uint32_t n = 3; n < 4; n++) {
36515 for (size_t k = 1; k <= 5; k += 2) {
36516 for (uint32_t m = 1; m <= 1; m++) {
36517 GemmMicrokernelTester()
36518 .mr(1)
36519 .nr(2)
36520 .kr(1)
36521 .sr(1)
36522 .m(m)
36523 .n(n)
36524 .k(k)
36525 .iterations(1)
36526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36527 }
36528 }
36529 }
36530 }
36531
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)36532 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
36533 for (uint32_t n = 4; n <= 6; n += 2) {
36534 for (size_t k = 1; k <= 5; k += 2) {
36535 GemmMicrokernelTester()
36536 .mr(1)
36537 .nr(2)
36538 .kr(1)
36539 .sr(1)
36540 .m(1)
36541 .n(n)
36542 .k(k)
36543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36544 }
36545 }
36546 }
36547
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)36548 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
36549 for (uint32_t n = 4; n <= 6; n += 2) {
36550 for (size_t k = 1; k <= 5; k += 2) {
36551 GemmMicrokernelTester()
36552 .mr(1)
36553 .nr(2)
36554 .kr(1)
36555 .sr(1)
36556 .m(1)
36557 .n(n)
36558 .k(k)
36559 .cn_stride(5)
36560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36561 }
36562 }
36563 }
36564
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)36565 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
36566 for (uint32_t n = 4; n <= 6; n += 2) {
36567 for (size_t k = 1; k <= 5; k += 2) {
36568 for (uint32_t m = 1; m <= 1; m++) {
36569 GemmMicrokernelTester()
36570 .mr(1)
36571 .nr(2)
36572 .kr(1)
36573 .sr(1)
36574 .m(m)
36575 .n(n)
36576 .k(k)
36577 .iterations(1)
36578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36579 }
36580 }
36581 }
36582 }
36583
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel)36584 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
36585 for (size_t k = 1; k <= 5; k += 2) {
36586 GemmMicrokernelTester()
36587 .mr(1)
36588 .nr(2)
36589 .kr(1)
36590 .sr(1)
36591 .m(1)
36592 .n(2)
36593 .k(k)
36594 .ks(3)
36595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36596 }
36597 }
36598
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel_subtile)36599 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
36600 for (size_t k = 1; k <= 5; k += 2) {
36601 for (uint32_t n = 1; n <= 2; n++) {
36602 for (uint32_t m = 1; m <= 1; m++) {
36603 GemmMicrokernelTester()
36604 .mr(1)
36605 .nr(2)
36606 .kr(1)
36607 .sr(1)
36608 .m(m)
36609 .n(n)
36610 .k(k)
36611 .ks(3)
36612 .iterations(1)
36613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36614 }
36615 }
36616 }
36617 }
36618
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_small_kernel)36619 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
36620 for (uint32_t n = 3; n < 4; n++) {
36621 for (size_t k = 1; k <= 5; k += 2) {
36622 GemmMicrokernelTester()
36623 .mr(1)
36624 .nr(2)
36625 .kr(1)
36626 .sr(1)
36627 .m(1)
36628 .n(n)
36629 .k(k)
36630 .ks(3)
36631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36632 }
36633 }
36634 }
36635
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_small_kernel)36636 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
36637 for (uint32_t n = 4; n <= 6; n += 2) {
36638 for (size_t k = 1; k <= 5; k += 2) {
36639 GemmMicrokernelTester()
36640 .mr(1)
36641 .nr(2)
36642 .kr(1)
36643 .sr(1)
36644 .m(1)
36645 .n(n)
36646 .k(k)
36647 .ks(3)
36648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36649 }
36650 }
36651 }
36652
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)36653 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
36654 for (size_t k = 1; k <= 5; k += 2) {
36655 for (uint32_t n = 1; n <= 2; n++) {
36656 for (uint32_t m = 1; m <= 1; m++) {
36657 GemmMicrokernelTester()
36658 .mr(1)
36659 .nr(2)
36660 .kr(1)
36661 .sr(1)
36662 .m(m)
36663 .n(n)
36664 .k(k)
36665 .cm_stride(5)
36666 .iterations(1)
36667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36668 }
36669 }
36670 }
36671 }
36672
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,a_offset)36673 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
36674 for (size_t k = 1; k <= 5; k += 2) {
36675 GemmMicrokernelTester()
36676 .mr(1)
36677 .nr(2)
36678 .kr(1)
36679 .sr(1)
36680 .m(1)
36681 .n(2)
36682 .k(k)
36683 .ks(3)
36684 .a_offset(7)
36685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36686 }
36687 }
36688
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,zero)36689 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
36690 for (size_t k = 1; k <= 5; k += 2) {
36691 for (uint32_t mz = 0; mz < 1; mz++) {
36692 GemmMicrokernelTester()
36693 .mr(1)
36694 .nr(2)
36695 .kr(1)
36696 .sr(1)
36697 .m(1)
36698 .n(2)
36699 .k(k)
36700 .ks(3)
36701 .a_offset(7)
36702 .zero_index(mz)
36703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36704 }
36705 }
36706 }
36707
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)36708 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
36709 GemmMicrokernelTester()
36710 .mr(1)
36711 .nr(2)
36712 .kr(1)
36713 .sr(1)
36714 .m(1)
36715 .n(2)
36716 .k(1)
36717 .qmin(128)
36718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36719 }
36720
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)36721 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
36722 GemmMicrokernelTester()
36723 .mr(1)
36724 .nr(2)
36725 .kr(1)
36726 .sr(1)
36727 .m(1)
36728 .n(2)
36729 .k(1)
36730 .qmax(128)
36731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36732 }
36733
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)36734 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
36735 GemmMicrokernelTester()
36736 .mr(1)
36737 .nr(2)
36738 .kr(1)
36739 .sr(1)
36740 .m(1)
36741 .n(2)
36742 .k(1)
36743 .cm_stride(5)
36744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36745 }
36746
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_a_zero_point)36747 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_a_zero_point) {
36748 for (size_t k = 1; k <= 5; k += 2) {
36749 GemmMicrokernelTester()
36750 .mr(1)
36751 .nr(2)
36752 .kr(1)
36753 .sr(1)
36754 .m(1)
36755 .n(2)
36756 .k(k)
36757 .a_zero_point(0)
36758 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36759 }
36760 }
36761
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_b_zero_point)36762 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_b_zero_point) {
36763 for (size_t k = 1; k <= 5; k += 2) {
36764 GemmMicrokernelTester()
36765 .mr(1)
36766 .nr(2)
36767 .kr(1)
36768 .sr(1)
36769 .m(1)
36770 .n(2)
36771 .k(k)
36772 .b_zero_point(0)
36773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36774 }
36775 }
36776
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_zero_point)36777 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_zero_point) {
36778 for (size_t k = 1; k <= 5; k += 2) {
36779 GemmMicrokernelTester()
36780 .mr(1)
36781 .nr(2)
36782 .kr(1)
36783 .sr(1)
36784 .m(1)
36785 .n(2)
36786 .k(k)
36787 .a_zero_point(0)
36788 .b_zero_point(0)
36789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36790 }
36791 }
36792 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36793
36794
36795 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)36796 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
36797 GemmMicrokernelTester()
36798 .mr(1)
36799 .nr(4)
36800 .kr(1)
36801 .sr(1)
36802 .m(1)
36803 .n(4)
36804 .k(1)
36805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36806 }
36807
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)36808 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
36809 GemmMicrokernelTester()
36810 .mr(1)
36811 .nr(4)
36812 .kr(1)
36813 .sr(1)
36814 .m(1)
36815 .n(4)
36816 .k(1)
36817 .cn_stride(7)
36818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36819 }
36820
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)36821 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
36822 for (uint32_t n = 1; n <= 4; n++) {
36823 for (uint32_t m = 1; m <= 1; m++) {
36824 GemmMicrokernelTester()
36825 .mr(1)
36826 .nr(4)
36827 .kr(1)
36828 .sr(1)
36829 .m(m)
36830 .n(n)
36831 .k(1)
36832 .iterations(1)
36833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36834 }
36835 }
36836 }
36837
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)36838 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
36839 for (uint32_t m = 1; m <= 1; m++) {
36840 GemmMicrokernelTester()
36841 .mr(1)
36842 .nr(4)
36843 .kr(1)
36844 .sr(1)
36845 .m(m)
36846 .n(4)
36847 .k(1)
36848 .iterations(1)
36849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36850 }
36851 }
36852
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)36853 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
36854 for (uint32_t n = 1; n <= 4; n++) {
36855 GemmMicrokernelTester()
36856 .mr(1)
36857 .nr(4)
36858 .kr(1)
36859 .sr(1)
36860 .m(1)
36861 .n(n)
36862 .k(1)
36863 .iterations(1)
36864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36865 }
36866 }
36867
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)36868 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
36869 for (size_t k = 2; k < 10; k++) {
36870 GemmMicrokernelTester()
36871 .mr(1)
36872 .nr(4)
36873 .kr(1)
36874 .sr(1)
36875 .m(1)
36876 .n(4)
36877 .k(k)
36878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36879 }
36880 }
36881
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)36882 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
36883 for (size_t k = 2; k < 10; k++) {
36884 for (uint32_t n = 1; n <= 4; n++) {
36885 for (uint32_t m = 1; m <= 1; m++) {
36886 GemmMicrokernelTester()
36887 .mr(1)
36888 .nr(4)
36889 .kr(1)
36890 .sr(1)
36891 .m(m)
36892 .n(n)
36893 .k(k)
36894 .iterations(1)
36895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36896 }
36897 }
36898 }
36899 }
36900
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)36901 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
36902 for (uint32_t n = 5; n < 8; n++) {
36903 for (size_t k = 1; k <= 5; k += 2) {
36904 GemmMicrokernelTester()
36905 .mr(1)
36906 .nr(4)
36907 .kr(1)
36908 .sr(1)
36909 .m(1)
36910 .n(n)
36911 .k(k)
36912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36913 }
36914 }
36915 }
36916
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)36917 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
36918 for (uint32_t n = 5; n < 8; n++) {
36919 for (size_t k = 1; k <= 5; k += 2) {
36920 GemmMicrokernelTester()
36921 .mr(1)
36922 .nr(4)
36923 .kr(1)
36924 .sr(1)
36925 .m(1)
36926 .n(n)
36927 .k(k)
36928 .cn_stride(7)
36929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36930 }
36931 }
36932 }
36933
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)36934 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
36935 for (uint32_t n = 5; n < 8; n++) {
36936 for (size_t k = 1; k <= 5; k += 2) {
36937 for (uint32_t m = 1; m <= 1; m++) {
36938 GemmMicrokernelTester()
36939 .mr(1)
36940 .nr(4)
36941 .kr(1)
36942 .sr(1)
36943 .m(m)
36944 .n(n)
36945 .k(k)
36946 .iterations(1)
36947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36948 }
36949 }
36950 }
36951 }
36952
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)36953 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
36954 for (uint32_t n = 8; n <= 12; n += 4) {
36955 for (size_t k = 1; k <= 5; k += 2) {
36956 GemmMicrokernelTester()
36957 .mr(1)
36958 .nr(4)
36959 .kr(1)
36960 .sr(1)
36961 .m(1)
36962 .n(n)
36963 .k(k)
36964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36965 }
36966 }
36967 }
36968
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)36969 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
36970 for (uint32_t n = 8; n <= 12; n += 4) {
36971 for (size_t k = 1; k <= 5; k += 2) {
36972 GemmMicrokernelTester()
36973 .mr(1)
36974 .nr(4)
36975 .kr(1)
36976 .sr(1)
36977 .m(1)
36978 .n(n)
36979 .k(k)
36980 .cn_stride(7)
36981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36982 }
36983 }
36984 }
36985
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)36986 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
36987 for (uint32_t n = 8; n <= 12; n += 4) {
36988 for (size_t k = 1; k <= 5; k += 2) {
36989 for (uint32_t m = 1; m <= 1; m++) {
36990 GemmMicrokernelTester()
36991 .mr(1)
36992 .nr(4)
36993 .kr(1)
36994 .sr(1)
36995 .m(m)
36996 .n(n)
36997 .k(k)
36998 .iterations(1)
36999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37000 }
37001 }
37002 }
37003 }
37004
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel)37005 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
37006 for (size_t k = 1; k <= 5; k += 2) {
37007 GemmMicrokernelTester()
37008 .mr(1)
37009 .nr(4)
37010 .kr(1)
37011 .sr(1)
37012 .m(1)
37013 .n(4)
37014 .k(k)
37015 .ks(3)
37016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37017 }
37018 }
37019
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel_subtile)37020 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
37021 for (size_t k = 1; k <= 5; k += 2) {
37022 for (uint32_t n = 1; n <= 4; n++) {
37023 for (uint32_t m = 1; m <= 1; m++) {
37024 GemmMicrokernelTester()
37025 .mr(1)
37026 .nr(4)
37027 .kr(1)
37028 .sr(1)
37029 .m(m)
37030 .n(n)
37031 .k(k)
37032 .ks(3)
37033 .iterations(1)
37034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37035 }
37036 }
37037 }
37038 }
37039
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_small_kernel)37040 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
37041 for (uint32_t n = 5; n < 8; n++) {
37042 for (size_t k = 1; k <= 5; k += 2) {
37043 GemmMicrokernelTester()
37044 .mr(1)
37045 .nr(4)
37046 .kr(1)
37047 .sr(1)
37048 .m(1)
37049 .n(n)
37050 .k(k)
37051 .ks(3)
37052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37053 }
37054 }
37055 }
37056
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_small_kernel)37057 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
37058 for (uint32_t n = 8; n <= 12; n += 4) {
37059 for (size_t k = 1; k <= 5; k += 2) {
37060 GemmMicrokernelTester()
37061 .mr(1)
37062 .nr(4)
37063 .kr(1)
37064 .sr(1)
37065 .m(1)
37066 .n(n)
37067 .k(k)
37068 .ks(3)
37069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37070 }
37071 }
37072 }
37073
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)37074 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
37075 for (size_t k = 1; k <= 5; k += 2) {
37076 for (uint32_t n = 1; n <= 4; n++) {
37077 for (uint32_t m = 1; m <= 1; m++) {
37078 GemmMicrokernelTester()
37079 .mr(1)
37080 .nr(4)
37081 .kr(1)
37082 .sr(1)
37083 .m(m)
37084 .n(n)
37085 .k(k)
37086 .cm_stride(7)
37087 .iterations(1)
37088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37089 }
37090 }
37091 }
37092 }
37093
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,a_offset)37094 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
37095 for (size_t k = 1; k <= 5; k += 2) {
37096 GemmMicrokernelTester()
37097 .mr(1)
37098 .nr(4)
37099 .kr(1)
37100 .sr(1)
37101 .m(1)
37102 .n(4)
37103 .k(k)
37104 .ks(3)
37105 .a_offset(7)
37106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37107 }
37108 }
37109
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,zero)37110 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
37111 for (size_t k = 1; k <= 5; k += 2) {
37112 for (uint32_t mz = 0; mz < 1; mz++) {
37113 GemmMicrokernelTester()
37114 .mr(1)
37115 .nr(4)
37116 .kr(1)
37117 .sr(1)
37118 .m(1)
37119 .n(4)
37120 .k(k)
37121 .ks(3)
37122 .a_offset(7)
37123 .zero_index(mz)
37124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37125 }
37126 }
37127 }
37128
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)37129 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
37130 GemmMicrokernelTester()
37131 .mr(1)
37132 .nr(4)
37133 .kr(1)
37134 .sr(1)
37135 .m(1)
37136 .n(4)
37137 .k(1)
37138 .qmin(128)
37139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37140 }
37141
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)37142 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
37143 GemmMicrokernelTester()
37144 .mr(1)
37145 .nr(4)
37146 .kr(1)
37147 .sr(1)
37148 .m(1)
37149 .n(4)
37150 .k(1)
37151 .qmax(128)
37152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37153 }
37154
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)37155 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
37156 GemmMicrokernelTester()
37157 .mr(1)
37158 .nr(4)
37159 .kr(1)
37160 .sr(1)
37161 .m(1)
37162 .n(4)
37163 .k(1)
37164 .cm_stride(7)
37165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37166 }
37167
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_a_zero_point)37168 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_a_zero_point) {
37169 for (size_t k = 1; k <= 5; k += 2) {
37170 GemmMicrokernelTester()
37171 .mr(1)
37172 .nr(4)
37173 .kr(1)
37174 .sr(1)
37175 .m(1)
37176 .n(4)
37177 .k(k)
37178 .a_zero_point(0)
37179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37180 }
37181 }
37182
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_b_zero_point)37183 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_b_zero_point) {
37184 for (size_t k = 1; k <= 5; k += 2) {
37185 GemmMicrokernelTester()
37186 .mr(1)
37187 .nr(4)
37188 .kr(1)
37189 .sr(1)
37190 .m(1)
37191 .n(4)
37192 .k(k)
37193 .b_zero_point(0)
37194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37195 }
37196 }
37197
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_zero_point)37198 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_zero_point) {
37199 for (size_t k = 1; k <= 5; k += 2) {
37200 GemmMicrokernelTester()
37201 .mr(1)
37202 .nr(4)
37203 .kr(1)
37204 .sr(1)
37205 .m(1)
37206 .n(4)
37207 .k(k)
37208 .a_zero_point(0)
37209 .b_zero_point(0)
37210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37211 }
37212 }
37213 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37214
37215
37216 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)37217 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
37218 GemmMicrokernelTester()
37219 .mr(2)
37220 .nr(2)
37221 .kr(1)
37222 .sr(1)
37223 .m(2)
37224 .n(2)
37225 .k(1)
37226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37227 }
37228
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)37229 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
37230 GemmMicrokernelTester()
37231 .mr(2)
37232 .nr(2)
37233 .kr(1)
37234 .sr(1)
37235 .m(2)
37236 .n(2)
37237 .k(1)
37238 .cn_stride(5)
37239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37240 }
37241
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)37242 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
37243 for (uint32_t n = 1; n <= 2; n++) {
37244 for (uint32_t m = 1; m <= 2; m++) {
37245 GemmMicrokernelTester()
37246 .mr(2)
37247 .nr(2)
37248 .kr(1)
37249 .sr(1)
37250 .m(m)
37251 .n(n)
37252 .k(1)
37253 .iterations(1)
37254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37255 }
37256 }
37257 }
37258
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)37259 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
37260 for (uint32_t m = 1; m <= 2; m++) {
37261 GemmMicrokernelTester()
37262 .mr(2)
37263 .nr(2)
37264 .kr(1)
37265 .sr(1)
37266 .m(m)
37267 .n(2)
37268 .k(1)
37269 .iterations(1)
37270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37271 }
37272 }
37273
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)37274 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
37275 for (uint32_t n = 1; n <= 2; n++) {
37276 GemmMicrokernelTester()
37277 .mr(2)
37278 .nr(2)
37279 .kr(1)
37280 .sr(1)
37281 .m(2)
37282 .n(n)
37283 .k(1)
37284 .iterations(1)
37285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37286 }
37287 }
37288
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)37289 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
37290 for (size_t k = 2; k < 10; k++) {
37291 GemmMicrokernelTester()
37292 .mr(2)
37293 .nr(2)
37294 .kr(1)
37295 .sr(1)
37296 .m(2)
37297 .n(2)
37298 .k(k)
37299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37300 }
37301 }
37302
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)37303 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
37304 for (size_t k = 2; k < 10; k++) {
37305 for (uint32_t n = 1; n <= 2; n++) {
37306 for (uint32_t m = 1; m <= 2; m++) {
37307 GemmMicrokernelTester()
37308 .mr(2)
37309 .nr(2)
37310 .kr(1)
37311 .sr(1)
37312 .m(m)
37313 .n(n)
37314 .k(k)
37315 .iterations(1)
37316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37317 }
37318 }
37319 }
37320 }
37321
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)37322 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
37323 for (uint32_t n = 3; n < 4; n++) {
37324 for (size_t k = 1; k <= 5; k += 2) {
37325 GemmMicrokernelTester()
37326 .mr(2)
37327 .nr(2)
37328 .kr(1)
37329 .sr(1)
37330 .m(2)
37331 .n(n)
37332 .k(k)
37333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37334 }
37335 }
37336 }
37337
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)37338 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
37339 for (uint32_t n = 3; n < 4; n++) {
37340 for (size_t k = 1; k <= 5; k += 2) {
37341 GemmMicrokernelTester()
37342 .mr(2)
37343 .nr(2)
37344 .kr(1)
37345 .sr(1)
37346 .m(2)
37347 .n(n)
37348 .k(k)
37349 .cn_stride(5)
37350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37351 }
37352 }
37353 }
37354
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)37355 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
37356 for (uint32_t n = 3; n < 4; n++) {
37357 for (size_t k = 1; k <= 5; k += 2) {
37358 for (uint32_t m = 1; m <= 2; m++) {
37359 GemmMicrokernelTester()
37360 .mr(2)
37361 .nr(2)
37362 .kr(1)
37363 .sr(1)
37364 .m(m)
37365 .n(n)
37366 .k(k)
37367 .iterations(1)
37368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37369 }
37370 }
37371 }
37372 }
37373
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)37374 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
37375 for (uint32_t n = 4; n <= 6; n += 2) {
37376 for (size_t k = 1; k <= 5; k += 2) {
37377 GemmMicrokernelTester()
37378 .mr(2)
37379 .nr(2)
37380 .kr(1)
37381 .sr(1)
37382 .m(2)
37383 .n(n)
37384 .k(k)
37385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37386 }
37387 }
37388 }
37389
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)37390 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
37391 for (uint32_t n = 4; n <= 6; n += 2) {
37392 for (size_t k = 1; k <= 5; k += 2) {
37393 GemmMicrokernelTester()
37394 .mr(2)
37395 .nr(2)
37396 .kr(1)
37397 .sr(1)
37398 .m(2)
37399 .n(n)
37400 .k(k)
37401 .cn_stride(5)
37402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37403 }
37404 }
37405 }
37406
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)37407 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
37408 for (uint32_t n = 4; n <= 6; n += 2) {
37409 for (size_t k = 1; k <= 5; k += 2) {
37410 for (uint32_t m = 1; m <= 2; m++) {
37411 GemmMicrokernelTester()
37412 .mr(2)
37413 .nr(2)
37414 .kr(1)
37415 .sr(1)
37416 .m(m)
37417 .n(n)
37418 .k(k)
37419 .iterations(1)
37420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37421 }
37422 }
37423 }
37424 }
37425
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel)37426 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
37427 for (size_t k = 1; k <= 5; k += 2) {
37428 GemmMicrokernelTester()
37429 .mr(2)
37430 .nr(2)
37431 .kr(1)
37432 .sr(1)
37433 .m(2)
37434 .n(2)
37435 .k(k)
37436 .ks(3)
37437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37438 }
37439 }
37440
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel_subtile)37441 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
37442 for (size_t k = 1; k <= 5; k += 2) {
37443 for (uint32_t n = 1; n <= 2; n++) {
37444 for (uint32_t m = 1; m <= 2; m++) {
37445 GemmMicrokernelTester()
37446 .mr(2)
37447 .nr(2)
37448 .kr(1)
37449 .sr(1)
37450 .m(m)
37451 .n(n)
37452 .k(k)
37453 .ks(3)
37454 .iterations(1)
37455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37456 }
37457 }
37458 }
37459 }
37460
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_small_kernel)37461 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
37462 for (uint32_t n = 3; n < 4; n++) {
37463 for (size_t k = 1; k <= 5; k += 2) {
37464 GemmMicrokernelTester()
37465 .mr(2)
37466 .nr(2)
37467 .kr(1)
37468 .sr(1)
37469 .m(2)
37470 .n(n)
37471 .k(k)
37472 .ks(3)
37473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37474 }
37475 }
37476 }
37477
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_small_kernel)37478 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
37479 for (uint32_t n = 4; n <= 6; n += 2) {
37480 for (size_t k = 1; k <= 5; k += 2) {
37481 GemmMicrokernelTester()
37482 .mr(2)
37483 .nr(2)
37484 .kr(1)
37485 .sr(1)
37486 .m(2)
37487 .n(n)
37488 .k(k)
37489 .ks(3)
37490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37491 }
37492 }
37493 }
37494
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)37495 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
37496 for (size_t k = 1; k <= 5; k += 2) {
37497 for (uint32_t n = 1; n <= 2; n++) {
37498 for (uint32_t m = 1; m <= 2; m++) {
37499 GemmMicrokernelTester()
37500 .mr(2)
37501 .nr(2)
37502 .kr(1)
37503 .sr(1)
37504 .m(m)
37505 .n(n)
37506 .k(k)
37507 .cm_stride(5)
37508 .iterations(1)
37509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37510 }
37511 }
37512 }
37513 }
37514
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,a_offset)37515 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
37516 for (size_t k = 1; k <= 5; k += 2) {
37517 GemmMicrokernelTester()
37518 .mr(2)
37519 .nr(2)
37520 .kr(1)
37521 .sr(1)
37522 .m(2)
37523 .n(2)
37524 .k(k)
37525 .ks(3)
37526 .a_offset(13)
37527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37528 }
37529 }
37530
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,zero)37531 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
37532 for (size_t k = 1; k <= 5; k += 2) {
37533 for (uint32_t mz = 0; mz < 2; mz++) {
37534 GemmMicrokernelTester()
37535 .mr(2)
37536 .nr(2)
37537 .kr(1)
37538 .sr(1)
37539 .m(2)
37540 .n(2)
37541 .k(k)
37542 .ks(3)
37543 .a_offset(13)
37544 .zero_index(mz)
37545 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37546 }
37547 }
37548 }
37549
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)37550 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
37551 GemmMicrokernelTester()
37552 .mr(2)
37553 .nr(2)
37554 .kr(1)
37555 .sr(1)
37556 .m(2)
37557 .n(2)
37558 .k(1)
37559 .qmin(128)
37560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37561 }
37562
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)37563 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
37564 GemmMicrokernelTester()
37565 .mr(2)
37566 .nr(2)
37567 .kr(1)
37568 .sr(1)
37569 .m(2)
37570 .n(2)
37571 .k(1)
37572 .qmax(128)
37573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37574 }
37575
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)37576 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
37577 GemmMicrokernelTester()
37578 .mr(2)
37579 .nr(2)
37580 .kr(1)
37581 .sr(1)
37582 .m(2)
37583 .n(2)
37584 .k(1)
37585 .cm_stride(5)
37586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37587 }
37588
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_a_zero_point)37589 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_a_zero_point) {
37590 for (size_t k = 1; k <= 5; k += 2) {
37591 GemmMicrokernelTester()
37592 .mr(2)
37593 .nr(2)
37594 .kr(1)
37595 .sr(1)
37596 .m(2)
37597 .n(2)
37598 .k(k)
37599 .a_zero_point(0)
37600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37601 }
37602 }
37603
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_b_zero_point)37604 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_b_zero_point) {
37605 for (size_t k = 1; k <= 5; k += 2) {
37606 GemmMicrokernelTester()
37607 .mr(2)
37608 .nr(2)
37609 .kr(1)
37610 .sr(1)
37611 .m(2)
37612 .n(2)
37613 .k(k)
37614 .b_zero_point(0)
37615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37616 }
37617 }
37618
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_zero_point)37619 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_zero_point) {
37620 for (size_t k = 1; k <= 5; k += 2) {
37621 GemmMicrokernelTester()
37622 .mr(2)
37623 .nr(2)
37624 .kr(1)
37625 .sr(1)
37626 .m(2)
37627 .n(2)
37628 .k(k)
37629 .a_zero_point(0)
37630 .b_zero_point(0)
37631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37632 }
37633 }
37634 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37635
37636
37637 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)37638 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
37639 GemmMicrokernelTester()
37640 .mr(2)
37641 .nr(4)
37642 .kr(1)
37643 .sr(1)
37644 .m(2)
37645 .n(4)
37646 .k(1)
37647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37648 }
37649
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)37650 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
37651 GemmMicrokernelTester()
37652 .mr(2)
37653 .nr(4)
37654 .kr(1)
37655 .sr(1)
37656 .m(2)
37657 .n(4)
37658 .k(1)
37659 .cn_stride(7)
37660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37661 }
37662
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)37663 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
37664 for (uint32_t n = 1; n <= 4; n++) {
37665 for (uint32_t m = 1; m <= 2; m++) {
37666 GemmMicrokernelTester()
37667 .mr(2)
37668 .nr(4)
37669 .kr(1)
37670 .sr(1)
37671 .m(m)
37672 .n(n)
37673 .k(1)
37674 .iterations(1)
37675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37676 }
37677 }
37678 }
37679
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)37680 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
37681 for (uint32_t m = 1; m <= 2; m++) {
37682 GemmMicrokernelTester()
37683 .mr(2)
37684 .nr(4)
37685 .kr(1)
37686 .sr(1)
37687 .m(m)
37688 .n(4)
37689 .k(1)
37690 .iterations(1)
37691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37692 }
37693 }
37694
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)37695 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
37696 for (uint32_t n = 1; n <= 4; n++) {
37697 GemmMicrokernelTester()
37698 .mr(2)
37699 .nr(4)
37700 .kr(1)
37701 .sr(1)
37702 .m(2)
37703 .n(n)
37704 .k(1)
37705 .iterations(1)
37706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37707 }
37708 }
37709
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)37710 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
37711 for (size_t k = 2; k < 10; k++) {
37712 GemmMicrokernelTester()
37713 .mr(2)
37714 .nr(4)
37715 .kr(1)
37716 .sr(1)
37717 .m(2)
37718 .n(4)
37719 .k(k)
37720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37721 }
37722 }
37723
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)37724 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
37725 for (size_t k = 2; k < 10; k++) {
37726 for (uint32_t n = 1; n <= 4; n++) {
37727 for (uint32_t m = 1; m <= 2; m++) {
37728 GemmMicrokernelTester()
37729 .mr(2)
37730 .nr(4)
37731 .kr(1)
37732 .sr(1)
37733 .m(m)
37734 .n(n)
37735 .k(k)
37736 .iterations(1)
37737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37738 }
37739 }
37740 }
37741 }
37742
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)37743 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
37744 for (uint32_t n = 5; n < 8; n++) {
37745 for (size_t k = 1; k <= 5; k += 2) {
37746 GemmMicrokernelTester()
37747 .mr(2)
37748 .nr(4)
37749 .kr(1)
37750 .sr(1)
37751 .m(2)
37752 .n(n)
37753 .k(k)
37754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37755 }
37756 }
37757 }
37758
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)37759 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
37760 for (uint32_t n = 5; n < 8; n++) {
37761 for (size_t k = 1; k <= 5; k += 2) {
37762 GemmMicrokernelTester()
37763 .mr(2)
37764 .nr(4)
37765 .kr(1)
37766 .sr(1)
37767 .m(2)
37768 .n(n)
37769 .k(k)
37770 .cn_stride(7)
37771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37772 }
37773 }
37774 }
37775
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)37776 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
37777 for (uint32_t n = 5; n < 8; n++) {
37778 for (size_t k = 1; k <= 5; k += 2) {
37779 for (uint32_t m = 1; m <= 2; m++) {
37780 GemmMicrokernelTester()
37781 .mr(2)
37782 .nr(4)
37783 .kr(1)
37784 .sr(1)
37785 .m(m)
37786 .n(n)
37787 .k(k)
37788 .iterations(1)
37789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37790 }
37791 }
37792 }
37793 }
37794
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)37795 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
37796 for (uint32_t n = 8; n <= 12; n += 4) {
37797 for (size_t k = 1; k <= 5; k += 2) {
37798 GemmMicrokernelTester()
37799 .mr(2)
37800 .nr(4)
37801 .kr(1)
37802 .sr(1)
37803 .m(2)
37804 .n(n)
37805 .k(k)
37806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37807 }
37808 }
37809 }
37810
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)37811 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
37812 for (uint32_t n = 8; n <= 12; n += 4) {
37813 for (size_t k = 1; k <= 5; k += 2) {
37814 GemmMicrokernelTester()
37815 .mr(2)
37816 .nr(4)
37817 .kr(1)
37818 .sr(1)
37819 .m(2)
37820 .n(n)
37821 .k(k)
37822 .cn_stride(7)
37823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37824 }
37825 }
37826 }
37827
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)37828 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
37829 for (uint32_t n = 8; n <= 12; n += 4) {
37830 for (size_t k = 1; k <= 5; k += 2) {
37831 for (uint32_t m = 1; m <= 2; m++) {
37832 GemmMicrokernelTester()
37833 .mr(2)
37834 .nr(4)
37835 .kr(1)
37836 .sr(1)
37837 .m(m)
37838 .n(n)
37839 .k(k)
37840 .iterations(1)
37841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37842 }
37843 }
37844 }
37845 }
37846
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel)37847 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
37848 for (size_t k = 1; k <= 5; k += 2) {
37849 GemmMicrokernelTester()
37850 .mr(2)
37851 .nr(4)
37852 .kr(1)
37853 .sr(1)
37854 .m(2)
37855 .n(4)
37856 .k(k)
37857 .ks(3)
37858 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37859 }
37860 }
37861
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel_subtile)37862 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
37863 for (size_t k = 1; k <= 5; k += 2) {
37864 for (uint32_t n = 1; n <= 4; n++) {
37865 for (uint32_t m = 1; m <= 2; m++) {
37866 GemmMicrokernelTester()
37867 .mr(2)
37868 .nr(4)
37869 .kr(1)
37870 .sr(1)
37871 .m(m)
37872 .n(n)
37873 .k(k)
37874 .ks(3)
37875 .iterations(1)
37876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37877 }
37878 }
37879 }
37880 }
37881
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_small_kernel)37882 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
37883 for (uint32_t n = 5; n < 8; n++) {
37884 for (size_t k = 1; k <= 5; k += 2) {
37885 GemmMicrokernelTester()
37886 .mr(2)
37887 .nr(4)
37888 .kr(1)
37889 .sr(1)
37890 .m(2)
37891 .n(n)
37892 .k(k)
37893 .ks(3)
37894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37895 }
37896 }
37897 }
37898
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_small_kernel)37899 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
37900 for (uint32_t n = 8; n <= 12; n += 4) {
37901 for (size_t k = 1; k <= 5; k += 2) {
37902 GemmMicrokernelTester()
37903 .mr(2)
37904 .nr(4)
37905 .kr(1)
37906 .sr(1)
37907 .m(2)
37908 .n(n)
37909 .k(k)
37910 .ks(3)
37911 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37912 }
37913 }
37914 }
37915
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)37916 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
37917 for (size_t k = 1; k <= 5; k += 2) {
37918 for (uint32_t n = 1; n <= 4; n++) {
37919 for (uint32_t m = 1; m <= 2; m++) {
37920 GemmMicrokernelTester()
37921 .mr(2)
37922 .nr(4)
37923 .kr(1)
37924 .sr(1)
37925 .m(m)
37926 .n(n)
37927 .k(k)
37928 .cm_stride(7)
37929 .iterations(1)
37930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37931 }
37932 }
37933 }
37934 }
37935
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,a_offset)37936 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
37937 for (size_t k = 1; k <= 5; k += 2) {
37938 GemmMicrokernelTester()
37939 .mr(2)
37940 .nr(4)
37941 .kr(1)
37942 .sr(1)
37943 .m(2)
37944 .n(4)
37945 .k(k)
37946 .ks(3)
37947 .a_offset(13)
37948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37949 }
37950 }
37951
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,zero)37952 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
37953 for (size_t k = 1; k <= 5; k += 2) {
37954 for (uint32_t mz = 0; mz < 2; mz++) {
37955 GemmMicrokernelTester()
37956 .mr(2)
37957 .nr(4)
37958 .kr(1)
37959 .sr(1)
37960 .m(2)
37961 .n(4)
37962 .k(k)
37963 .ks(3)
37964 .a_offset(13)
37965 .zero_index(mz)
37966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37967 }
37968 }
37969 }
37970
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)37971 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
37972 GemmMicrokernelTester()
37973 .mr(2)
37974 .nr(4)
37975 .kr(1)
37976 .sr(1)
37977 .m(2)
37978 .n(4)
37979 .k(1)
37980 .qmin(128)
37981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37982 }
37983
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)37984 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
37985 GemmMicrokernelTester()
37986 .mr(2)
37987 .nr(4)
37988 .kr(1)
37989 .sr(1)
37990 .m(2)
37991 .n(4)
37992 .k(1)
37993 .qmax(128)
37994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37995 }
37996
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)37997 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
37998 GemmMicrokernelTester()
37999 .mr(2)
38000 .nr(4)
38001 .kr(1)
38002 .sr(1)
38003 .m(2)
38004 .n(4)
38005 .k(1)
38006 .cm_stride(7)
38007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38008 }
38009
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_a_zero_point)38010 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_a_zero_point) {
38011 for (size_t k = 1; k <= 5; k += 2) {
38012 GemmMicrokernelTester()
38013 .mr(2)
38014 .nr(4)
38015 .kr(1)
38016 .sr(1)
38017 .m(2)
38018 .n(4)
38019 .k(k)
38020 .a_zero_point(0)
38021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38022 }
38023 }
38024
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_b_zero_point)38025 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_b_zero_point) {
38026 for (size_t k = 1; k <= 5; k += 2) {
38027 GemmMicrokernelTester()
38028 .mr(2)
38029 .nr(4)
38030 .kr(1)
38031 .sr(1)
38032 .m(2)
38033 .n(4)
38034 .k(k)
38035 .b_zero_point(0)
38036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38037 }
38038 }
38039
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_zero_point)38040 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_zero_point) {
38041 for (size_t k = 1; k <= 5; k += 2) {
38042 GemmMicrokernelTester()
38043 .mr(2)
38044 .nr(4)
38045 .kr(1)
38046 .sr(1)
38047 .m(2)
38048 .n(4)
38049 .k(k)
38050 .a_zero_point(0)
38051 .b_zero_point(0)
38052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38053 }
38054 }
38055 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38056
38057
38058 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)38059 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
38060 GemmMicrokernelTester()
38061 .mr(4)
38062 .nr(2)
38063 .kr(1)
38064 .sr(1)
38065 .m(4)
38066 .n(2)
38067 .k(1)
38068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38069 }
38070
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)38071 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
38072 GemmMicrokernelTester()
38073 .mr(4)
38074 .nr(2)
38075 .kr(1)
38076 .sr(1)
38077 .m(4)
38078 .n(2)
38079 .k(1)
38080 .cn_stride(5)
38081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38082 }
38083
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)38084 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
38085 for (uint32_t n = 1; n <= 2; n++) {
38086 for (uint32_t m = 1; m <= 4; m++) {
38087 GemmMicrokernelTester()
38088 .mr(4)
38089 .nr(2)
38090 .kr(1)
38091 .sr(1)
38092 .m(m)
38093 .n(n)
38094 .k(1)
38095 .iterations(1)
38096 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38097 }
38098 }
38099 }
38100
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)38101 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
38102 for (uint32_t m = 1; m <= 4; m++) {
38103 GemmMicrokernelTester()
38104 .mr(4)
38105 .nr(2)
38106 .kr(1)
38107 .sr(1)
38108 .m(m)
38109 .n(2)
38110 .k(1)
38111 .iterations(1)
38112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38113 }
38114 }
38115
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)38116 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
38117 for (uint32_t n = 1; n <= 2; n++) {
38118 GemmMicrokernelTester()
38119 .mr(4)
38120 .nr(2)
38121 .kr(1)
38122 .sr(1)
38123 .m(4)
38124 .n(n)
38125 .k(1)
38126 .iterations(1)
38127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38128 }
38129 }
38130
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)38131 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
38132 for (size_t k = 2; k < 10; k++) {
38133 GemmMicrokernelTester()
38134 .mr(4)
38135 .nr(2)
38136 .kr(1)
38137 .sr(1)
38138 .m(4)
38139 .n(2)
38140 .k(k)
38141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38142 }
38143 }
38144
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)38145 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
38146 for (size_t k = 2; k < 10; k++) {
38147 for (uint32_t n = 1; n <= 2; n++) {
38148 for (uint32_t m = 1; m <= 4; m++) {
38149 GemmMicrokernelTester()
38150 .mr(4)
38151 .nr(2)
38152 .kr(1)
38153 .sr(1)
38154 .m(m)
38155 .n(n)
38156 .k(k)
38157 .iterations(1)
38158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38159 }
38160 }
38161 }
38162 }
38163
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)38164 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
38165 for (uint32_t n = 3; n < 4; n++) {
38166 for (size_t k = 1; k <= 5; k += 2) {
38167 GemmMicrokernelTester()
38168 .mr(4)
38169 .nr(2)
38170 .kr(1)
38171 .sr(1)
38172 .m(4)
38173 .n(n)
38174 .k(k)
38175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38176 }
38177 }
38178 }
38179
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)38180 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
38181 for (uint32_t n = 3; n < 4; n++) {
38182 for (size_t k = 1; k <= 5; k += 2) {
38183 GemmMicrokernelTester()
38184 .mr(4)
38185 .nr(2)
38186 .kr(1)
38187 .sr(1)
38188 .m(4)
38189 .n(n)
38190 .k(k)
38191 .cn_stride(5)
38192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38193 }
38194 }
38195 }
38196
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)38197 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
38198 for (uint32_t n = 3; n < 4; n++) {
38199 for (size_t k = 1; k <= 5; k += 2) {
38200 for (uint32_t m = 1; m <= 4; m++) {
38201 GemmMicrokernelTester()
38202 .mr(4)
38203 .nr(2)
38204 .kr(1)
38205 .sr(1)
38206 .m(m)
38207 .n(n)
38208 .k(k)
38209 .iterations(1)
38210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38211 }
38212 }
38213 }
38214 }
38215
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)38216 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
38217 for (uint32_t n = 4; n <= 6; n += 2) {
38218 for (size_t k = 1; k <= 5; k += 2) {
38219 GemmMicrokernelTester()
38220 .mr(4)
38221 .nr(2)
38222 .kr(1)
38223 .sr(1)
38224 .m(4)
38225 .n(n)
38226 .k(k)
38227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38228 }
38229 }
38230 }
38231
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)38232 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
38233 for (uint32_t n = 4; n <= 6; n += 2) {
38234 for (size_t k = 1; k <= 5; k += 2) {
38235 GemmMicrokernelTester()
38236 .mr(4)
38237 .nr(2)
38238 .kr(1)
38239 .sr(1)
38240 .m(4)
38241 .n(n)
38242 .k(k)
38243 .cn_stride(5)
38244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38245 }
38246 }
38247 }
38248
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)38249 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
38250 for (uint32_t n = 4; n <= 6; n += 2) {
38251 for (size_t k = 1; k <= 5; k += 2) {
38252 for (uint32_t m = 1; m <= 4; m++) {
38253 GemmMicrokernelTester()
38254 .mr(4)
38255 .nr(2)
38256 .kr(1)
38257 .sr(1)
38258 .m(m)
38259 .n(n)
38260 .k(k)
38261 .iterations(1)
38262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38263 }
38264 }
38265 }
38266 }
38267
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)38268 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
38269 for (size_t k = 1; k <= 5; k += 2) {
38270 GemmMicrokernelTester()
38271 .mr(4)
38272 .nr(2)
38273 .kr(1)
38274 .sr(1)
38275 .m(4)
38276 .n(2)
38277 .k(k)
38278 .ks(3)
38279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38280 }
38281 }
38282
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)38283 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
38284 for (size_t k = 1; k <= 5; k += 2) {
38285 for (uint32_t n = 1; n <= 2; n++) {
38286 for (uint32_t m = 1; m <= 4; m++) {
38287 GemmMicrokernelTester()
38288 .mr(4)
38289 .nr(2)
38290 .kr(1)
38291 .sr(1)
38292 .m(m)
38293 .n(n)
38294 .k(k)
38295 .ks(3)
38296 .iterations(1)
38297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38298 }
38299 }
38300 }
38301 }
38302
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)38303 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
38304 for (uint32_t n = 3; n < 4; n++) {
38305 for (size_t k = 1; k <= 5; k += 2) {
38306 GemmMicrokernelTester()
38307 .mr(4)
38308 .nr(2)
38309 .kr(1)
38310 .sr(1)
38311 .m(4)
38312 .n(n)
38313 .k(k)
38314 .ks(3)
38315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38316 }
38317 }
38318 }
38319
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)38320 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
38321 for (uint32_t n = 4; n <= 6; n += 2) {
38322 for (size_t k = 1; k <= 5; k += 2) {
38323 GemmMicrokernelTester()
38324 .mr(4)
38325 .nr(2)
38326 .kr(1)
38327 .sr(1)
38328 .m(4)
38329 .n(n)
38330 .k(k)
38331 .ks(3)
38332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38333 }
38334 }
38335 }
38336
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)38337 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
38338 for (size_t k = 1; k <= 5; k += 2) {
38339 for (uint32_t n = 1; n <= 2; n++) {
38340 for (uint32_t m = 1; m <= 4; m++) {
38341 GemmMicrokernelTester()
38342 .mr(4)
38343 .nr(2)
38344 .kr(1)
38345 .sr(1)
38346 .m(m)
38347 .n(n)
38348 .k(k)
38349 .cm_stride(5)
38350 .iterations(1)
38351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38352 }
38353 }
38354 }
38355 }
38356
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)38357 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
38358 for (size_t k = 1; k <= 5; k += 2) {
38359 GemmMicrokernelTester()
38360 .mr(4)
38361 .nr(2)
38362 .kr(1)
38363 .sr(1)
38364 .m(4)
38365 .n(2)
38366 .k(k)
38367 .ks(3)
38368 .a_offset(23)
38369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38370 }
38371 }
38372
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)38373 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
38374 for (size_t k = 1; k <= 5; k += 2) {
38375 for (uint32_t mz = 0; mz < 4; mz++) {
38376 GemmMicrokernelTester()
38377 .mr(4)
38378 .nr(2)
38379 .kr(1)
38380 .sr(1)
38381 .m(4)
38382 .n(2)
38383 .k(k)
38384 .ks(3)
38385 .a_offset(23)
38386 .zero_index(mz)
38387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38388 }
38389 }
38390 }
38391
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)38392 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
38393 GemmMicrokernelTester()
38394 .mr(4)
38395 .nr(2)
38396 .kr(1)
38397 .sr(1)
38398 .m(4)
38399 .n(2)
38400 .k(1)
38401 .qmin(128)
38402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38403 }
38404
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)38405 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
38406 GemmMicrokernelTester()
38407 .mr(4)
38408 .nr(2)
38409 .kr(1)
38410 .sr(1)
38411 .m(4)
38412 .n(2)
38413 .k(1)
38414 .qmax(128)
38415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38416 }
38417
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)38418 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
38419 GemmMicrokernelTester()
38420 .mr(4)
38421 .nr(2)
38422 .kr(1)
38423 .sr(1)
38424 .m(4)
38425 .n(2)
38426 .k(1)
38427 .cm_stride(5)
38428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38429 }
38430
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_a_zero_point)38431 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_a_zero_point) {
38432 for (size_t k = 1; k <= 5; k += 2) {
38433 GemmMicrokernelTester()
38434 .mr(4)
38435 .nr(2)
38436 .kr(1)
38437 .sr(1)
38438 .m(4)
38439 .n(2)
38440 .k(k)
38441 .a_zero_point(0)
38442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38443 }
38444 }
38445
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_b_zero_point)38446 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_b_zero_point) {
38447 for (size_t k = 1; k <= 5; k += 2) {
38448 GemmMicrokernelTester()
38449 .mr(4)
38450 .nr(2)
38451 .kr(1)
38452 .sr(1)
38453 .m(4)
38454 .n(2)
38455 .k(k)
38456 .b_zero_point(0)
38457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38458 }
38459 }
38460
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_zero_point)38461 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_zero_point) {
38462 for (size_t k = 1; k <= 5; k += 2) {
38463 GemmMicrokernelTester()
38464 .mr(4)
38465 .nr(2)
38466 .kr(1)
38467 .sr(1)
38468 .m(4)
38469 .n(2)
38470 .k(k)
38471 .a_zero_point(0)
38472 .b_zero_point(0)
38473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38474 }
38475 }
38476 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38477
38478
38479 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)38480 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
38481 GemmMicrokernelTester()
38482 .mr(4)
38483 .nr(4)
38484 .kr(1)
38485 .sr(1)
38486 .m(4)
38487 .n(4)
38488 .k(1)
38489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38490 }
38491
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)38492 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
38493 GemmMicrokernelTester()
38494 .mr(4)
38495 .nr(4)
38496 .kr(1)
38497 .sr(1)
38498 .m(4)
38499 .n(4)
38500 .k(1)
38501 .cn_stride(7)
38502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38503 }
38504
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)38505 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
38506 for (uint32_t n = 1; n <= 4; n++) {
38507 for (uint32_t m = 1; m <= 4; m++) {
38508 GemmMicrokernelTester()
38509 .mr(4)
38510 .nr(4)
38511 .kr(1)
38512 .sr(1)
38513 .m(m)
38514 .n(n)
38515 .k(1)
38516 .iterations(1)
38517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38518 }
38519 }
38520 }
38521
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)38522 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
38523 for (uint32_t m = 1; m <= 4; m++) {
38524 GemmMicrokernelTester()
38525 .mr(4)
38526 .nr(4)
38527 .kr(1)
38528 .sr(1)
38529 .m(m)
38530 .n(4)
38531 .k(1)
38532 .iterations(1)
38533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38534 }
38535 }
38536
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)38537 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
38538 for (uint32_t n = 1; n <= 4; n++) {
38539 GemmMicrokernelTester()
38540 .mr(4)
38541 .nr(4)
38542 .kr(1)
38543 .sr(1)
38544 .m(4)
38545 .n(n)
38546 .k(1)
38547 .iterations(1)
38548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38549 }
38550 }
38551
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)38552 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
38553 for (size_t k = 2; k < 10; k++) {
38554 GemmMicrokernelTester()
38555 .mr(4)
38556 .nr(4)
38557 .kr(1)
38558 .sr(1)
38559 .m(4)
38560 .n(4)
38561 .k(k)
38562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38563 }
38564 }
38565
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)38566 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
38567 for (size_t k = 2; k < 10; k++) {
38568 for (uint32_t n = 1; n <= 4; n++) {
38569 for (uint32_t m = 1; m <= 4; m++) {
38570 GemmMicrokernelTester()
38571 .mr(4)
38572 .nr(4)
38573 .kr(1)
38574 .sr(1)
38575 .m(m)
38576 .n(n)
38577 .k(k)
38578 .iterations(1)
38579 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38580 }
38581 }
38582 }
38583 }
38584
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)38585 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
38586 for (uint32_t n = 5; n < 8; n++) {
38587 for (size_t k = 1; k <= 5; k += 2) {
38588 GemmMicrokernelTester()
38589 .mr(4)
38590 .nr(4)
38591 .kr(1)
38592 .sr(1)
38593 .m(4)
38594 .n(n)
38595 .k(k)
38596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38597 }
38598 }
38599 }
38600
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)38601 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
38602 for (uint32_t n = 5; n < 8; n++) {
38603 for (size_t k = 1; k <= 5; k += 2) {
38604 GemmMicrokernelTester()
38605 .mr(4)
38606 .nr(4)
38607 .kr(1)
38608 .sr(1)
38609 .m(4)
38610 .n(n)
38611 .k(k)
38612 .cn_stride(7)
38613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38614 }
38615 }
38616 }
38617
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)38618 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
38619 for (uint32_t n = 5; n < 8; n++) {
38620 for (size_t k = 1; k <= 5; k += 2) {
38621 for (uint32_t m = 1; m <= 4; m++) {
38622 GemmMicrokernelTester()
38623 .mr(4)
38624 .nr(4)
38625 .kr(1)
38626 .sr(1)
38627 .m(m)
38628 .n(n)
38629 .k(k)
38630 .iterations(1)
38631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38632 }
38633 }
38634 }
38635 }
38636
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)38637 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
38638 for (uint32_t n = 8; n <= 12; n += 4) {
38639 for (size_t k = 1; k <= 5; k += 2) {
38640 GemmMicrokernelTester()
38641 .mr(4)
38642 .nr(4)
38643 .kr(1)
38644 .sr(1)
38645 .m(4)
38646 .n(n)
38647 .k(k)
38648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38649 }
38650 }
38651 }
38652
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)38653 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
38654 for (uint32_t n = 8; n <= 12; n += 4) {
38655 for (size_t k = 1; k <= 5; k += 2) {
38656 GemmMicrokernelTester()
38657 .mr(4)
38658 .nr(4)
38659 .kr(1)
38660 .sr(1)
38661 .m(4)
38662 .n(n)
38663 .k(k)
38664 .cn_stride(7)
38665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38666 }
38667 }
38668 }
38669
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)38670 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
38671 for (uint32_t n = 8; n <= 12; n += 4) {
38672 for (size_t k = 1; k <= 5; k += 2) {
38673 for (uint32_t m = 1; m <= 4; m++) {
38674 GemmMicrokernelTester()
38675 .mr(4)
38676 .nr(4)
38677 .kr(1)
38678 .sr(1)
38679 .m(m)
38680 .n(n)
38681 .k(k)
38682 .iterations(1)
38683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38684 }
38685 }
38686 }
38687 }
38688
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel)38689 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
38690 for (size_t k = 1; k <= 5; k += 2) {
38691 GemmMicrokernelTester()
38692 .mr(4)
38693 .nr(4)
38694 .kr(1)
38695 .sr(1)
38696 .m(4)
38697 .n(4)
38698 .k(k)
38699 .ks(3)
38700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38701 }
38702 }
38703
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel_subtile)38704 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
38705 for (size_t k = 1; k <= 5; k += 2) {
38706 for (uint32_t n = 1; n <= 4; n++) {
38707 for (uint32_t m = 1; m <= 4; m++) {
38708 GemmMicrokernelTester()
38709 .mr(4)
38710 .nr(4)
38711 .kr(1)
38712 .sr(1)
38713 .m(m)
38714 .n(n)
38715 .k(k)
38716 .ks(3)
38717 .iterations(1)
38718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38719 }
38720 }
38721 }
38722 }
38723
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_small_kernel)38724 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
38725 for (uint32_t n = 5; n < 8; n++) {
38726 for (size_t k = 1; k <= 5; k += 2) {
38727 GemmMicrokernelTester()
38728 .mr(4)
38729 .nr(4)
38730 .kr(1)
38731 .sr(1)
38732 .m(4)
38733 .n(n)
38734 .k(k)
38735 .ks(3)
38736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38737 }
38738 }
38739 }
38740
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_small_kernel)38741 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
38742 for (uint32_t n = 8; n <= 12; n += 4) {
38743 for (size_t k = 1; k <= 5; k += 2) {
38744 GemmMicrokernelTester()
38745 .mr(4)
38746 .nr(4)
38747 .kr(1)
38748 .sr(1)
38749 .m(4)
38750 .n(n)
38751 .k(k)
38752 .ks(3)
38753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38754 }
38755 }
38756 }
38757
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)38758 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
38759 for (size_t k = 1; k <= 5; k += 2) {
38760 for (uint32_t n = 1; n <= 4; n++) {
38761 for (uint32_t m = 1; m <= 4; m++) {
38762 GemmMicrokernelTester()
38763 .mr(4)
38764 .nr(4)
38765 .kr(1)
38766 .sr(1)
38767 .m(m)
38768 .n(n)
38769 .k(k)
38770 .cm_stride(7)
38771 .iterations(1)
38772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38773 }
38774 }
38775 }
38776 }
38777
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,a_offset)38778 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
38779 for (size_t k = 1; k <= 5; k += 2) {
38780 GemmMicrokernelTester()
38781 .mr(4)
38782 .nr(4)
38783 .kr(1)
38784 .sr(1)
38785 .m(4)
38786 .n(4)
38787 .k(k)
38788 .ks(3)
38789 .a_offset(23)
38790 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38791 }
38792 }
38793
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,zero)38794 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
38795 for (size_t k = 1; k <= 5; k += 2) {
38796 for (uint32_t mz = 0; mz < 4; mz++) {
38797 GemmMicrokernelTester()
38798 .mr(4)
38799 .nr(4)
38800 .kr(1)
38801 .sr(1)
38802 .m(4)
38803 .n(4)
38804 .k(k)
38805 .ks(3)
38806 .a_offset(23)
38807 .zero_index(mz)
38808 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38809 }
38810 }
38811 }
38812
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)38813 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
38814 GemmMicrokernelTester()
38815 .mr(4)
38816 .nr(4)
38817 .kr(1)
38818 .sr(1)
38819 .m(4)
38820 .n(4)
38821 .k(1)
38822 .qmin(128)
38823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38824 }
38825
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)38826 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
38827 GemmMicrokernelTester()
38828 .mr(4)
38829 .nr(4)
38830 .kr(1)
38831 .sr(1)
38832 .m(4)
38833 .n(4)
38834 .k(1)
38835 .qmax(128)
38836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38837 }
38838
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)38839 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
38840 GemmMicrokernelTester()
38841 .mr(4)
38842 .nr(4)
38843 .kr(1)
38844 .sr(1)
38845 .m(4)
38846 .n(4)
38847 .k(1)
38848 .cm_stride(7)
38849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38850 }
38851
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_a_zero_point)38852 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_a_zero_point) {
38853 for (size_t k = 1; k <= 5; k += 2) {
38854 GemmMicrokernelTester()
38855 .mr(4)
38856 .nr(4)
38857 .kr(1)
38858 .sr(1)
38859 .m(4)
38860 .n(4)
38861 .k(k)
38862 .a_zero_point(0)
38863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38864 }
38865 }
38866
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_b_zero_point)38867 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_b_zero_point) {
38868 for (size_t k = 1; k <= 5; k += 2) {
38869 GemmMicrokernelTester()
38870 .mr(4)
38871 .nr(4)
38872 .kr(1)
38873 .sr(1)
38874 .m(4)
38875 .n(4)
38876 .k(k)
38877 .b_zero_point(0)
38878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38879 }
38880 }
38881
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_zero_point)38882 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_zero_point) {
38883 for (size_t k = 1; k <= 5; k += 2) {
38884 GemmMicrokernelTester()
38885 .mr(4)
38886 .nr(4)
38887 .kr(1)
38888 .sr(1)
38889 .m(4)
38890 .n(4)
38891 .k(k)
38892 .a_zero_point(0)
38893 .b_zero_point(0)
38894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38895 }
38896 }
38897 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38898
38899
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)38900 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
38901 GemmMicrokernelTester()
38902 .mr(1)
38903 .nr(2)
38904 .kr(1)
38905 .sr(1)
38906 .m(1)
38907 .n(2)
38908 .k(1)
38909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38910 }
38911
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)38912 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
38913 GemmMicrokernelTester()
38914 .mr(1)
38915 .nr(2)
38916 .kr(1)
38917 .sr(1)
38918 .m(1)
38919 .n(2)
38920 .k(1)
38921 .cn_stride(5)
38922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38923 }
38924
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)38925 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
38926 for (uint32_t n = 1; n <= 2; n++) {
38927 for (uint32_t m = 1; m <= 1; m++) {
38928 GemmMicrokernelTester()
38929 .mr(1)
38930 .nr(2)
38931 .kr(1)
38932 .sr(1)
38933 .m(m)
38934 .n(n)
38935 .k(1)
38936 .iterations(1)
38937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38938 }
38939 }
38940 }
38941
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)38942 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
38943 for (uint32_t m = 1; m <= 1; m++) {
38944 GemmMicrokernelTester()
38945 .mr(1)
38946 .nr(2)
38947 .kr(1)
38948 .sr(1)
38949 .m(m)
38950 .n(2)
38951 .k(1)
38952 .iterations(1)
38953 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38954 }
38955 }
38956
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)38957 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
38958 for (uint32_t n = 1; n <= 2; n++) {
38959 GemmMicrokernelTester()
38960 .mr(1)
38961 .nr(2)
38962 .kr(1)
38963 .sr(1)
38964 .m(1)
38965 .n(n)
38966 .k(1)
38967 .iterations(1)
38968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38969 }
38970 }
38971
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)38972 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
38973 for (size_t k = 2; k < 10; k++) {
38974 GemmMicrokernelTester()
38975 .mr(1)
38976 .nr(2)
38977 .kr(1)
38978 .sr(1)
38979 .m(1)
38980 .n(2)
38981 .k(k)
38982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38983 }
38984 }
38985
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)38986 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
38987 for (size_t k = 2; k < 10; k++) {
38988 for (uint32_t n = 1; n <= 2; n++) {
38989 for (uint32_t m = 1; m <= 1; m++) {
38990 GemmMicrokernelTester()
38991 .mr(1)
38992 .nr(2)
38993 .kr(1)
38994 .sr(1)
38995 .m(m)
38996 .n(n)
38997 .k(k)
38998 .iterations(1)
38999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39000 }
39001 }
39002 }
39003 }
39004
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)39005 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
39006 for (uint32_t n = 3; n < 4; n++) {
39007 for (size_t k = 1; k <= 5; k += 2) {
39008 GemmMicrokernelTester()
39009 .mr(1)
39010 .nr(2)
39011 .kr(1)
39012 .sr(1)
39013 .m(1)
39014 .n(n)
39015 .k(k)
39016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39017 }
39018 }
39019 }
39020
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)39021 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
39022 for (uint32_t n = 3; n < 4; n++) {
39023 for (size_t k = 1; k <= 5; k += 2) {
39024 GemmMicrokernelTester()
39025 .mr(1)
39026 .nr(2)
39027 .kr(1)
39028 .sr(1)
39029 .m(1)
39030 .n(n)
39031 .k(k)
39032 .cn_stride(5)
39033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39034 }
39035 }
39036 }
39037
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)39038 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
39039 for (uint32_t n = 3; n < 4; n++) {
39040 for (size_t k = 1; k <= 5; k += 2) {
39041 for (uint32_t m = 1; m <= 1; m++) {
39042 GemmMicrokernelTester()
39043 .mr(1)
39044 .nr(2)
39045 .kr(1)
39046 .sr(1)
39047 .m(m)
39048 .n(n)
39049 .k(k)
39050 .iterations(1)
39051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39052 }
39053 }
39054 }
39055 }
39056
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)39057 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
39058 for (uint32_t n = 4; n <= 6; n += 2) {
39059 for (size_t k = 1; k <= 5; k += 2) {
39060 GemmMicrokernelTester()
39061 .mr(1)
39062 .nr(2)
39063 .kr(1)
39064 .sr(1)
39065 .m(1)
39066 .n(n)
39067 .k(k)
39068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39069 }
39070 }
39071 }
39072
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)39073 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
39074 for (uint32_t n = 4; n <= 6; n += 2) {
39075 for (size_t k = 1; k <= 5; k += 2) {
39076 GemmMicrokernelTester()
39077 .mr(1)
39078 .nr(2)
39079 .kr(1)
39080 .sr(1)
39081 .m(1)
39082 .n(n)
39083 .k(k)
39084 .cn_stride(5)
39085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39086 }
39087 }
39088 }
39089
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)39090 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
39091 for (uint32_t n = 4; n <= 6; n += 2) {
39092 for (size_t k = 1; k <= 5; k += 2) {
39093 for (uint32_t m = 1; m <= 1; m++) {
39094 GemmMicrokernelTester()
39095 .mr(1)
39096 .nr(2)
39097 .kr(1)
39098 .sr(1)
39099 .m(m)
39100 .n(n)
39101 .k(k)
39102 .iterations(1)
39103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39104 }
39105 }
39106 }
39107 }
39108
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel)39109 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel) {
39110 for (size_t k = 1; k <= 5; k += 2) {
39111 GemmMicrokernelTester()
39112 .mr(1)
39113 .nr(2)
39114 .kr(1)
39115 .sr(1)
39116 .m(1)
39117 .n(2)
39118 .k(k)
39119 .ks(3)
39120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39121 }
39122 }
39123
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel_subtile)39124 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel_subtile) {
39125 for (size_t k = 1; k <= 5; k += 2) {
39126 for (uint32_t n = 1; n <= 2; n++) {
39127 for (uint32_t m = 1; m <= 1; m++) {
39128 GemmMicrokernelTester()
39129 .mr(1)
39130 .nr(2)
39131 .kr(1)
39132 .sr(1)
39133 .m(m)
39134 .n(n)
39135 .k(k)
39136 .ks(3)
39137 .iterations(1)
39138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39139 }
39140 }
39141 }
39142 }
39143
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_small_kernel)39144 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
39145 for (uint32_t n = 3; n < 4; n++) {
39146 for (size_t k = 1; k <= 5; k += 2) {
39147 GemmMicrokernelTester()
39148 .mr(1)
39149 .nr(2)
39150 .kr(1)
39151 .sr(1)
39152 .m(1)
39153 .n(n)
39154 .k(k)
39155 .ks(3)
39156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39157 }
39158 }
39159 }
39160
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_small_kernel)39161 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
39162 for (uint32_t n = 4; n <= 6; n += 2) {
39163 for (size_t k = 1; k <= 5; k += 2) {
39164 GemmMicrokernelTester()
39165 .mr(1)
39166 .nr(2)
39167 .kr(1)
39168 .sr(1)
39169 .m(1)
39170 .n(n)
39171 .k(k)
39172 .ks(3)
39173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39174 }
39175 }
39176 }
39177
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)39178 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
39179 for (size_t k = 1; k <= 5; k += 2) {
39180 for (uint32_t n = 1; n <= 2; n++) {
39181 for (uint32_t m = 1; m <= 1; m++) {
39182 GemmMicrokernelTester()
39183 .mr(1)
39184 .nr(2)
39185 .kr(1)
39186 .sr(1)
39187 .m(m)
39188 .n(n)
39189 .k(k)
39190 .cm_stride(5)
39191 .iterations(1)
39192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39193 }
39194 }
39195 }
39196 }
39197
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,a_offset)39198 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, a_offset) {
39199 for (size_t k = 1; k <= 5; k += 2) {
39200 GemmMicrokernelTester()
39201 .mr(1)
39202 .nr(2)
39203 .kr(1)
39204 .sr(1)
39205 .m(1)
39206 .n(2)
39207 .k(k)
39208 .ks(3)
39209 .a_offset(7)
39210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39211 }
39212 }
39213
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,zero)39214 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, zero) {
39215 for (size_t k = 1; k <= 5; k += 2) {
39216 for (uint32_t mz = 0; mz < 1; mz++) {
39217 GemmMicrokernelTester()
39218 .mr(1)
39219 .nr(2)
39220 .kr(1)
39221 .sr(1)
39222 .m(1)
39223 .n(2)
39224 .k(k)
39225 .ks(3)
39226 .a_offset(7)
39227 .zero_index(mz)
39228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39229 }
39230 }
39231 }
39232
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)39233 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
39234 GemmMicrokernelTester()
39235 .mr(1)
39236 .nr(2)
39237 .kr(1)
39238 .sr(1)
39239 .m(1)
39240 .n(2)
39241 .k(1)
39242 .qmin(128)
39243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39244 }
39245
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)39246 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
39247 GemmMicrokernelTester()
39248 .mr(1)
39249 .nr(2)
39250 .kr(1)
39251 .sr(1)
39252 .m(1)
39253 .n(2)
39254 .k(1)
39255 .qmax(128)
39256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39257 }
39258
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)39259 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
39260 GemmMicrokernelTester()
39261 .mr(1)
39262 .nr(2)
39263 .kr(1)
39264 .sr(1)
39265 .m(1)
39266 .n(2)
39267 .k(1)
39268 .cm_stride(5)
39269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39270 }
39271
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_a_zero_point)39272 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_a_zero_point) {
39273 for (size_t k = 1; k <= 5; k += 2) {
39274 GemmMicrokernelTester()
39275 .mr(1)
39276 .nr(2)
39277 .kr(1)
39278 .sr(1)
39279 .m(1)
39280 .n(2)
39281 .k(k)
39282 .a_zero_point(0)
39283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39284 }
39285 }
39286
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_b_zero_point)39287 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_b_zero_point) {
39288 for (size_t k = 1; k <= 5; k += 2) {
39289 GemmMicrokernelTester()
39290 .mr(1)
39291 .nr(2)
39292 .kr(1)
39293 .sr(1)
39294 .m(1)
39295 .n(2)
39296 .k(k)
39297 .b_zero_point(0)
39298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39299 }
39300 }
39301
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_zero_point)39302 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_zero_point) {
39303 for (size_t k = 1; k <= 5; k += 2) {
39304 GemmMicrokernelTester()
39305 .mr(1)
39306 .nr(2)
39307 .kr(1)
39308 .sr(1)
39309 .m(1)
39310 .n(2)
39311 .k(k)
39312 .a_zero_point(0)
39313 .b_zero_point(0)
39314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39315 }
39316 }
39317
39318
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)39319 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
39320 GemmMicrokernelTester()
39321 .mr(1)
39322 .nr(2)
39323 .kr(1)
39324 .sr(1)
39325 .m(1)
39326 .n(2)
39327 .k(1)
39328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39329 }
39330
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)39331 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
39332 GemmMicrokernelTester()
39333 .mr(1)
39334 .nr(2)
39335 .kr(1)
39336 .sr(1)
39337 .m(1)
39338 .n(2)
39339 .k(1)
39340 .cn_stride(5)
39341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39342 }
39343
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)39344 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
39345 for (uint32_t n = 1; n <= 2; n++) {
39346 for (uint32_t m = 1; m <= 1; m++) {
39347 GemmMicrokernelTester()
39348 .mr(1)
39349 .nr(2)
39350 .kr(1)
39351 .sr(1)
39352 .m(m)
39353 .n(n)
39354 .k(1)
39355 .iterations(1)
39356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39357 }
39358 }
39359 }
39360
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)39361 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
39362 for (uint32_t m = 1; m <= 1; m++) {
39363 GemmMicrokernelTester()
39364 .mr(1)
39365 .nr(2)
39366 .kr(1)
39367 .sr(1)
39368 .m(m)
39369 .n(2)
39370 .k(1)
39371 .iterations(1)
39372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39373 }
39374 }
39375
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)39376 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
39377 for (uint32_t n = 1; n <= 2; n++) {
39378 GemmMicrokernelTester()
39379 .mr(1)
39380 .nr(2)
39381 .kr(1)
39382 .sr(1)
39383 .m(1)
39384 .n(n)
39385 .k(1)
39386 .iterations(1)
39387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39388 }
39389 }
39390
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)39391 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
39392 for (size_t k = 2; k < 10; k++) {
39393 GemmMicrokernelTester()
39394 .mr(1)
39395 .nr(2)
39396 .kr(1)
39397 .sr(1)
39398 .m(1)
39399 .n(2)
39400 .k(k)
39401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39402 }
39403 }
39404
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)39405 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
39406 for (size_t k = 2; k < 10; k++) {
39407 for (uint32_t n = 1; n <= 2; n++) {
39408 for (uint32_t m = 1; m <= 1; m++) {
39409 GemmMicrokernelTester()
39410 .mr(1)
39411 .nr(2)
39412 .kr(1)
39413 .sr(1)
39414 .m(m)
39415 .n(n)
39416 .k(k)
39417 .iterations(1)
39418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39419 }
39420 }
39421 }
39422 }
39423
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)39424 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
39425 for (uint32_t n = 3; n < 4; n++) {
39426 for (size_t k = 1; k <= 5; k += 2) {
39427 GemmMicrokernelTester()
39428 .mr(1)
39429 .nr(2)
39430 .kr(1)
39431 .sr(1)
39432 .m(1)
39433 .n(n)
39434 .k(k)
39435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39436 }
39437 }
39438 }
39439
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)39440 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
39441 for (uint32_t n = 3; n < 4; n++) {
39442 for (size_t k = 1; k <= 5; k += 2) {
39443 GemmMicrokernelTester()
39444 .mr(1)
39445 .nr(2)
39446 .kr(1)
39447 .sr(1)
39448 .m(1)
39449 .n(n)
39450 .k(k)
39451 .cn_stride(5)
39452 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39453 }
39454 }
39455 }
39456
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)39457 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
39458 for (uint32_t n = 3; n < 4; n++) {
39459 for (size_t k = 1; k <= 5; k += 2) {
39460 for (uint32_t m = 1; m <= 1; m++) {
39461 GemmMicrokernelTester()
39462 .mr(1)
39463 .nr(2)
39464 .kr(1)
39465 .sr(1)
39466 .m(m)
39467 .n(n)
39468 .k(k)
39469 .iterations(1)
39470 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39471 }
39472 }
39473 }
39474 }
39475
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)39476 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
39477 for (uint32_t n = 4; n <= 6; n += 2) {
39478 for (size_t k = 1; k <= 5; k += 2) {
39479 GemmMicrokernelTester()
39480 .mr(1)
39481 .nr(2)
39482 .kr(1)
39483 .sr(1)
39484 .m(1)
39485 .n(n)
39486 .k(k)
39487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39488 }
39489 }
39490 }
39491
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)39492 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
39493 for (uint32_t n = 4; n <= 6; n += 2) {
39494 for (size_t k = 1; k <= 5; k += 2) {
39495 GemmMicrokernelTester()
39496 .mr(1)
39497 .nr(2)
39498 .kr(1)
39499 .sr(1)
39500 .m(1)
39501 .n(n)
39502 .k(k)
39503 .cn_stride(5)
39504 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39505 }
39506 }
39507 }
39508
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)39509 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
39510 for (uint32_t n = 4; n <= 6; n += 2) {
39511 for (size_t k = 1; k <= 5; k += 2) {
39512 for (uint32_t m = 1; m <= 1; m++) {
39513 GemmMicrokernelTester()
39514 .mr(1)
39515 .nr(2)
39516 .kr(1)
39517 .sr(1)
39518 .m(m)
39519 .n(n)
39520 .k(k)
39521 .iterations(1)
39522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39523 }
39524 }
39525 }
39526 }
39527
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel)39528 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
39529 for (size_t k = 1; k <= 5; k += 2) {
39530 GemmMicrokernelTester()
39531 .mr(1)
39532 .nr(2)
39533 .kr(1)
39534 .sr(1)
39535 .m(1)
39536 .n(2)
39537 .k(k)
39538 .ks(3)
39539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39540 }
39541 }
39542
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel_subtile)39543 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
39544 for (size_t k = 1; k <= 5; k += 2) {
39545 for (uint32_t n = 1; n <= 2; n++) {
39546 for (uint32_t m = 1; m <= 1; m++) {
39547 GemmMicrokernelTester()
39548 .mr(1)
39549 .nr(2)
39550 .kr(1)
39551 .sr(1)
39552 .m(m)
39553 .n(n)
39554 .k(k)
39555 .ks(3)
39556 .iterations(1)
39557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39558 }
39559 }
39560 }
39561 }
39562
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_small_kernel)39563 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
39564 for (uint32_t n = 3; n < 4; n++) {
39565 for (size_t k = 1; k <= 5; k += 2) {
39566 GemmMicrokernelTester()
39567 .mr(1)
39568 .nr(2)
39569 .kr(1)
39570 .sr(1)
39571 .m(1)
39572 .n(n)
39573 .k(k)
39574 .ks(3)
39575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39576 }
39577 }
39578 }
39579
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_small_kernel)39580 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
39581 for (uint32_t n = 4; n <= 6; n += 2) {
39582 for (size_t k = 1; k <= 5; k += 2) {
39583 GemmMicrokernelTester()
39584 .mr(1)
39585 .nr(2)
39586 .kr(1)
39587 .sr(1)
39588 .m(1)
39589 .n(n)
39590 .k(k)
39591 .ks(3)
39592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39593 }
39594 }
39595 }
39596
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)39597 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
39598 for (size_t k = 1; k <= 5; k += 2) {
39599 for (uint32_t n = 1; n <= 2; n++) {
39600 for (uint32_t m = 1; m <= 1; m++) {
39601 GemmMicrokernelTester()
39602 .mr(1)
39603 .nr(2)
39604 .kr(1)
39605 .sr(1)
39606 .m(m)
39607 .n(n)
39608 .k(k)
39609 .cm_stride(5)
39610 .iterations(1)
39611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39612 }
39613 }
39614 }
39615 }
39616
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,a_offset)39617 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
39618 for (size_t k = 1; k <= 5; k += 2) {
39619 GemmMicrokernelTester()
39620 .mr(1)
39621 .nr(2)
39622 .kr(1)
39623 .sr(1)
39624 .m(1)
39625 .n(2)
39626 .k(k)
39627 .ks(3)
39628 .a_offset(7)
39629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39630 }
39631 }
39632
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,zero)39633 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
39634 for (size_t k = 1; k <= 5; k += 2) {
39635 for (uint32_t mz = 0; mz < 1; mz++) {
39636 GemmMicrokernelTester()
39637 .mr(1)
39638 .nr(2)
39639 .kr(1)
39640 .sr(1)
39641 .m(1)
39642 .n(2)
39643 .k(k)
39644 .ks(3)
39645 .a_offset(7)
39646 .zero_index(mz)
39647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39648 }
39649 }
39650 }
39651
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)39652 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
39653 GemmMicrokernelTester()
39654 .mr(1)
39655 .nr(2)
39656 .kr(1)
39657 .sr(1)
39658 .m(1)
39659 .n(2)
39660 .k(1)
39661 .qmin(128)
39662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39663 }
39664
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)39665 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
39666 GemmMicrokernelTester()
39667 .mr(1)
39668 .nr(2)
39669 .kr(1)
39670 .sr(1)
39671 .m(1)
39672 .n(2)
39673 .k(1)
39674 .qmax(128)
39675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39676 }
39677
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)39678 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
39679 GemmMicrokernelTester()
39680 .mr(1)
39681 .nr(2)
39682 .kr(1)
39683 .sr(1)
39684 .m(1)
39685 .n(2)
39686 .k(1)
39687 .cm_stride(5)
39688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39689 }
39690
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_a_zero_point)39691 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
39692 for (size_t k = 1; k <= 5; k += 2) {
39693 GemmMicrokernelTester()
39694 .mr(1)
39695 .nr(2)
39696 .kr(1)
39697 .sr(1)
39698 .m(1)
39699 .n(2)
39700 .k(k)
39701 .a_zero_point(0)
39702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39703 }
39704 }
39705
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_b_zero_point)39706 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
39707 for (size_t k = 1; k <= 5; k += 2) {
39708 GemmMicrokernelTester()
39709 .mr(1)
39710 .nr(2)
39711 .kr(1)
39712 .sr(1)
39713 .m(1)
39714 .n(2)
39715 .k(k)
39716 .b_zero_point(0)
39717 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39718 }
39719 }
39720
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_zero_point)39721 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
39722 for (size_t k = 1; k <= 5; k += 2) {
39723 GemmMicrokernelTester()
39724 .mr(1)
39725 .nr(2)
39726 .kr(1)
39727 .sr(1)
39728 .m(1)
39729 .n(2)
39730 .k(k)
39731 .a_zero_point(0)
39732 .b_zero_point(0)
39733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39734 }
39735 }
39736
39737
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)39738 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
39739 GemmMicrokernelTester()
39740 .mr(1)
39741 .nr(4)
39742 .kr(1)
39743 .sr(1)
39744 .m(1)
39745 .n(4)
39746 .k(1)
39747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39748 }
39749
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)39750 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
39751 GemmMicrokernelTester()
39752 .mr(1)
39753 .nr(4)
39754 .kr(1)
39755 .sr(1)
39756 .m(1)
39757 .n(4)
39758 .k(1)
39759 .cn_stride(7)
39760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39761 }
39762
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)39763 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
39764 for (uint32_t n = 1; n <= 4; n++) {
39765 for (uint32_t m = 1; m <= 1; m++) {
39766 GemmMicrokernelTester()
39767 .mr(1)
39768 .nr(4)
39769 .kr(1)
39770 .sr(1)
39771 .m(m)
39772 .n(n)
39773 .k(1)
39774 .iterations(1)
39775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39776 }
39777 }
39778 }
39779
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)39780 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
39781 for (uint32_t m = 1; m <= 1; m++) {
39782 GemmMicrokernelTester()
39783 .mr(1)
39784 .nr(4)
39785 .kr(1)
39786 .sr(1)
39787 .m(m)
39788 .n(4)
39789 .k(1)
39790 .iterations(1)
39791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39792 }
39793 }
39794
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)39795 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
39796 for (uint32_t n = 1; n <= 4; n++) {
39797 GemmMicrokernelTester()
39798 .mr(1)
39799 .nr(4)
39800 .kr(1)
39801 .sr(1)
39802 .m(1)
39803 .n(n)
39804 .k(1)
39805 .iterations(1)
39806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39807 }
39808 }
39809
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)39810 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
39811 for (size_t k = 2; k < 10; k++) {
39812 GemmMicrokernelTester()
39813 .mr(1)
39814 .nr(4)
39815 .kr(1)
39816 .sr(1)
39817 .m(1)
39818 .n(4)
39819 .k(k)
39820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39821 }
39822 }
39823
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)39824 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
39825 for (size_t k = 2; k < 10; k++) {
39826 for (uint32_t n = 1; n <= 4; n++) {
39827 for (uint32_t m = 1; m <= 1; m++) {
39828 GemmMicrokernelTester()
39829 .mr(1)
39830 .nr(4)
39831 .kr(1)
39832 .sr(1)
39833 .m(m)
39834 .n(n)
39835 .k(k)
39836 .iterations(1)
39837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39838 }
39839 }
39840 }
39841 }
39842
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)39843 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
39844 for (uint32_t n = 5; n < 8; n++) {
39845 for (size_t k = 1; k <= 5; k += 2) {
39846 GemmMicrokernelTester()
39847 .mr(1)
39848 .nr(4)
39849 .kr(1)
39850 .sr(1)
39851 .m(1)
39852 .n(n)
39853 .k(k)
39854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39855 }
39856 }
39857 }
39858
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)39859 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
39860 for (uint32_t n = 5; n < 8; n++) {
39861 for (size_t k = 1; k <= 5; k += 2) {
39862 GemmMicrokernelTester()
39863 .mr(1)
39864 .nr(4)
39865 .kr(1)
39866 .sr(1)
39867 .m(1)
39868 .n(n)
39869 .k(k)
39870 .cn_stride(7)
39871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39872 }
39873 }
39874 }
39875
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)39876 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
39877 for (uint32_t n = 5; n < 8; n++) {
39878 for (size_t k = 1; k <= 5; k += 2) {
39879 for (uint32_t m = 1; m <= 1; m++) {
39880 GemmMicrokernelTester()
39881 .mr(1)
39882 .nr(4)
39883 .kr(1)
39884 .sr(1)
39885 .m(m)
39886 .n(n)
39887 .k(k)
39888 .iterations(1)
39889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39890 }
39891 }
39892 }
39893 }
39894
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)39895 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
39896 for (uint32_t n = 8; n <= 12; n += 4) {
39897 for (size_t k = 1; k <= 5; k += 2) {
39898 GemmMicrokernelTester()
39899 .mr(1)
39900 .nr(4)
39901 .kr(1)
39902 .sr(1)
39903 .m(1)
39904 .n(n)
39905 .k(k)
39906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39907 }
39908 }
39909 }
39910
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)39911 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
39912 for (uint32_t n = 8; n <= 12; n += 4) {
39913 for (size_t k = 1; k <= 5; k += 2) {
39914 GemmMicrokernelTester()
39915 .mr(1)
39916 .nr(4)
39917 .kr(1)
39918 .sr(1)
39919 .m(1)
39920 .n(n)
39921 .k(k)
39922 .cn_stride(7)
39923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39924 }
39925 }
39926 }
39927
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)39928 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
39929 for (uint32_t n = 8; n <= 12; n += 4) {
39930 for (size_t k = 1; k <= 5; k += 2) {
39931 for (uint32_t m = 1; m <= 1; m++) {
39932 GemmMicrokernelTester()
39933 .mr(1)
39934 .nr(4)
39935 .kr(1)
39936 .sr(1)
39937 .m(m)
39938 .n(n)
39939 .k(k)
39940 .iterations(1)
39941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39942 }
39943 }
39944 }
39945 }
39946
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel)39947 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel) {
39948 for (size_t k = 1; k <= 5; k += 2) {
39949 GemmMicrokernelTester()
39950 .mr(1)
39951 .nr(4)
39952 .kr(1)
39953 .sr(1)
39954 .m(1)
39955 .n(4)
39956 .k(k)
39957 .ks(3)
39958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39959 }
39960 }
39961
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel_subtile)39962 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel_subtile) {
39963 for (size_t k = 1; k <= 5; k += 2) {
39964 for (uint32_t n = 1; n <= 4; n++) {
39965 for (uint32_t m = 1; m <= 1; m++) {
39966 GemmMicrokernelTester()
39967 .mr(1)
39968 .nr(4)
39969 .kr(1)
39970 .sr(1)
39971 .m(m)
39972 .n(n)
39973 .k(k)
39974 .ks(3)
39975 .iterations(1)
39976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39977 }
39978 }
39979 }
39980 }
39981
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_small_kernel)39982 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
39983 for (uint32_t n = 5; n < 8; n++) {
39984 for (size_t k = 1; k <= 5; k += 2) {
39985 GemmMicrokernelTester()
39986 .mr(1)
39987 .nr(4)
39988 .kr(1)
39989 .sr(1)
39990 .m(1)
39991 .n(n)
39992 .k(k)
39993 .ks(3)
39994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39995 }
39996 }
39997 }
39998
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_small_kernel)39999 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
40000 for (uint32_t n = 8; n <= 12; n += 4) {
40001 for (size_t k = 1; k <= 5; k += 2) {
40002 GemmMicrokernelTester()
40003 .mr(1)
40004 .nr(4)
40005 .kr(1)
40006 .sr(1)
40007 .m(1)
40008 .n(n)
40009 .k(k)
40010 .ks(3)
40011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40012 }
40013 }
40014 }
40015
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)40016 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
40017 for (size_t k = 1; k <= 5; k += 2) {
40018 for (uint32_t n = 1; n <= 4; n++) {
40019 for (uint32_t m = 1; m <= 1; m++) {
40020 GemmMicrokernelTester()
40021 .mr(1)
40022 .nr(4)
40023 .kr(1)
40024 .sr(1)
40025 .m(m)
40026 .n(n)
40027 .k(k)
40028 .cm_stride(7)
40029 .iterations(1)
40030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40031 }
40032 }
40033 }
40034 }
40035
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,a_offset)40036 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, a_offset) {
40037 for (size_t k = 1; k <= 5; k += 2) {
40038 GemmMicrokernelTester()
40039 .mr(1)
40040 .nr(4)
40041 .kr(1)
40042 .sr(1)
40043 .m(1)
40044 .n(4)
40045 .k(k)
40046 .ks(3)
40047 .a_offset(7)
40048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40049 }
40050 }
40051
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,zero)40052 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, zero) {
40053 for (size_t k = 1; k <= 5; k += 2) {
40054 for (uint32_t mz = 0; mz < 1; mz++) {
40055 GemmMicrokernelTester()
40056 .mr(1)
40057 .nr(4)
40058 .kr(1)
40059 .sr(1)
40060 .m(1)
40061 .n(4)
40062 .k(k)
40063 .ks(3)
40064 .a_offset(7)
40065 .zero_index(mz)
40066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40067 }
40068 }
40069 }
40070
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)40071 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
40072 GemmMicrokernelTester()
40073 .mr(1)
40074 .nr(4)
40075 .kr(1)
40076 .sr(1)
40077 .m(1)
40078 .n(4)
40079 .k(1)
40080 .qmin(128)
40081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40082 }
40083
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)40084 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
40085 GemmMicrokernelTester()
40086 .mr(1)
40087 .nr(4)
40088 .kr(1)
40089 .sr(1)
40090 .m(1)
40091 .n(4)
40092 .k(1)
40093 .qmax(128)
40094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40095 }
40096
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)40097 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
40098 GemmMicrokernelTester()
40099 .mr(1)
40100 .nr(4)
40101 .kr(1)
40102 .sr(1)
40103 .m(1)
40104 .n(4)
40105 .k(1)
40106 .cm_stride(7)
40107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40108 }
40109
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_a_zero_point)40110 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_a_zero_point) {
40111 for (size_t k = 1; k <= 5; k += 2) {
40112 GemmMicrokernelTester()
40113 .mr(1)
40114 .nr(4)
40115 .kr(1)
40116 .sr(1)
40117 .m(1)
40118 .n(4)
40119 .k(k)
40120 .a_zero_point(0)
40121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40122 }
40123 }
40124
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_b_zero_point)40125 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_b_zero_point) {
40126 for (size_t k = 1; k <= 5; k += 2) {
40127 GemmMicrokernelTester()
40128 .mr(1)
40129 .nr(4)
40130 .kr(1)
40131 .sr(1)
40132 .m(1)
40133 .n(4)
40134 .k(k)
40135 .b_zero_point(0)
40136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40137 }
40138 }
40139
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_zero_point)40140 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_zero_point) {
40141 for (size_t k = 1; k <= 5; k += 2) {
40142 GemmMicrokernelTester()
40143 .mr(1)
40144 .nr(4)
40145 .kr(1)
40146 .sr(1)
40147 .m(1)
40148 .n(4)
40149 .k(k)
40150 .a_zero_point(0)
40151 .b_zero_point(0)
40152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40153 }
40154 }
40155
40156
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)40157 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
40158 GemmMicrokernelTester()
40159 .mr(1)
40160 .nr(4)
40161 .kr(1)
40162 .sr(1)
40163 .m(1)
40164 .n(4)
40165 .k(1)
40166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40167 }
40168
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)40169 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
40170 GemmMicrokernelTester()
40171 .mr(1)
40172 .nr(4)
40173 .kr(1)
40174 .sr(1)
40175 .m(1)
40176 .n(4)
40177 .k(1)
40178 .cn_stride(7)
40179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40180 }
40181
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)40182 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
40183 for (uint32_t n = 1; n <= 4; n++) {
40184 for (uint32_t m = 1; m <= 1; m++) {
40185 GemmMicrokernelTester()
40186 .mr(1)
40187 .nr(4)
40188 .kr(1)
40189 .sr(1)
40190 .m(m)
40191 .n(n)
40192 .k(1)
40193 .iterations(1)
40194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40195 }
40196 }
40197 }
40198
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)40199 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
40200 for (uint32_t m = 1; m <= 1; m++) {
40201 GemmMicrokernelTester()
40202 .mr(1)
40203 .nr(4)
40204 .kr(1)
40205 .sr(1)
40206 .m(m)
40207 .n(4)
40208 .k(1)
40209 .iterations(1)
40210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40211 }
40212 }
40213
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)40214 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
40215 for (uint32_t n = 1; n <= 4; n++) {
40216 GemmMicrokernelTester()
40217 .mr(1)
40218 .nr(4)
40219 .kr(1)
40220 .sr(1)
40221 .m(1)
40222 .n(n)
40223 .k(1)
40224 .iterations(1)
40225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40226 }
40227 }
40228
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)40229 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
40230 for (size_t k = 2; k < 10; k++) {
40231 GemmMicrokernelTester()
40232 .mr(1)
40233 .nr(4)
40234 .kr(1)
40235 .sr(1)
40236 .m(1)
40237 .n(4)
40238 .k(k)
40239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40240 }
40241 }
40242
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)40243 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
40244 for (size_t k = 2; k < 10; k++) {
40245 for (uint32_t n = 1; n <= 4; n++) {
40246 for (uint32_t m = 1; m <= 1; m++) {
40247 GemmMicrokernelTester()
40248 .mr(1)
40249 .nr(4)
40250 .kr(1)
40251 .sr(1)
40252 .m(m)
40253 .n(n)
40254 .k(k)
40255 .iterations(1)
40256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40257 }
40258 }
40259 }
40260 }
40261
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)40262 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
40263 for (uint32_t n = 5; n < 8; n++) {
40264 for (size_t k = 1; k <= 5; k += 2) {
40265 GemmMicrokernelTester()
40266 .mr(1)
40267 .nr(4)
40268 .kr(1)
40269 .sr(1)
40270 .m(1)
40271 .n(n)
40272 .k(k)
40273 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40274 }
40275 }
40276 }
40277
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)40278 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
40279 for (uint32_t n = 5; n < 8; n++) {
40280 for (size_t k = 1; k <= 5; k += 2) {
40281 GemmMicrokernelTester()
40282 .mr(1)
40283 .nr(4)
40284 .kr(1)
40285 .sr(1)
40286 .m(1)
40287 .n(n)
40288 .k(k)
40289 .cn_stride(7)
40290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40291 }
40292 }
40293 }
40294
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)40295 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
40296 for (uint32_t n = 5; n < 8; n++) {
40297 for (size_t k = 1; k <= 5; k += 2) {
40298 for (uint32_t m = 1; m <= 1; m++) {
40299 GemmMicrokernelTester()
40300 .mr(1)
40301 .nr(4)
40302 .kr(1)
40303 .sr(1)
40304 .m(m)
40305 .n(n)
40306 .k(k)
40307 .iterations(1)
40308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40309 }
40310 }
40311 }
40312 }
40313
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)40314 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
40315 for (uint32_t n = 8; n <= 12; n += 4) {
40316 for (size_t k = 1; k <= 5; k += 2) {
40317 GemmMicrokernelTester()
40318 .mr(1)
40319 .nr(4)
40320 .kr(1)
40321 .sr(1)
40322 .m(1)
40323 .n(n)
40324 .k(k)
40325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40326 }
40327 }
40328 }
40329
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)40330 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
40331 for (uint32_t n = 8; n <= 12; n += 4) {
40332 for (size_t k = 1; k <= 5; k += 2) {
40333 GemmMicrokernelTester()
40334 .mr(1)
40335 .nr(4)
40336 .kr(1)
40337 .sr(1)
40338 .m(1)
40339 .n(n)
40340 .k(k)
40341 .cn_stride(7)
40342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40343 }
40344 }
40345 }
40346
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)40347 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
40348 for (uint32_t n = 8; n <= 12; n += 4) {
40349 for (size_t k = 1; k <= 5; k += 2) {
40350 for (uint32_t m = 1; m <= 1; m++) {
40351 GemmMicrokernelTester()
40352 .mr(1)
40353 .nr(4)
40354 .kr(1)
40355 .sr(1)
40356 .m(m)
40357 .n(n)
40358 .k(k)
40359 .iterations(1)
40360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40361 }
40362 }
40363 }
40364 }
40365
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel)40366 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
40367 for (size_t k = 1; k <= 5; k += 2) {
40368 GemmMicrokernelTester()
40369 .mr(1)
40370 .nr(4)
40371 .kr(1)
40372 .sr(1)
40373 .m(1)
40374 .n(4)
40375 .k(k)
40376 .ks(3)
40377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40378 }
40379 }
40380
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel_subtile)40381 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
40382 for (size_t k = 1; k <= 5; k += 2) {
40383 for (uint32_t n = 1; n <= 4; n++) {
40384 for (uint32_t m = 1; m <= 1; m++) {
40385 GemmMicrokernelTester()
40386 .mr(1)
40387 .nr(4)
40388 .kr(1)
40389 .sr(1)
40390 .m(m)
40391 .n(n)
40392 .k(k)
40393 .ks(3)
40394 .iterations(1)
40395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40396 }
40397 }
40398 }
40399 }
40400
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_small_kernel)40401 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
40402 for (uint32_t n = 5; n < 8; n++) {
40403 for (size_t k = 1; k <= 5; k += 2) {
40404 GemmMicrokernelTester()
40405 .mr(1)
40406 .nr(4)
40407 .kr(1)
40408 .sr(1)
40409 .m(1)
40410 .n(n)
40411 .k(k)
40412 .ks(3)
40413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40414 }
40415 }
40416 }
40417
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_small_kernel)40418 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
40419 for (uint32_t n = 8; n <= 12; n += 4) {
40420 for (size_t k = 1; k <= 5; k += 2) {
40421 GemmMicrokernelTester()
40422 .mr(1)
40423 .nr(4)
40424 .kr(1)
40425 .sr(1)
40426 .m(1)
40427 .n(n)
40428 .k(k)
40429 .ks(3)
40430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40431 }
40432 }
40433 }
40434
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)40435 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
40436 for (size_t k = 1; k <= 5; k += 2) {
40437 for (uint32_t n = 1; n <= 4; n++) {
40438 for (uint32_t m = 1; m <= 1; m++) {
40439 GemmMicrokernelTester()
40440 .mr(1)
40441 .nr(4)
40442 .kr(1)
40443 .sr(1)
40444 .m(m)
40445 .n(n)
40446 .k(k)
40447 .cm_stride(7)
40448 .iterations(1)
40449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40450 }
40451 }
40452 }
40453 }
40454
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,a_offset)40455 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
40456 for (size_t k = 1; k <= 5; k += 2) {
40457 GemmMicrokernelTester()
40458 .mr(1)
40459 .nr(4)
40460 .kr(1)
40461 .sr(1)
40462 .m(1)
40463 .n(4)
40464 .k(k)
40465 .ks(3)
40466 .a_offset(7)
40467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40468 }
40469 }
40470
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,zero)40471 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
40472 for (size_t k = 1; k <= 5; k += 2) {
40473 for (uint32_t mz = 0; mz < 1; mz++) {
40474 GemmMicrokernelTester()
40475 .mr(1)
40476 .nr(4)
40477 .kr(1)
40478 .sr(1)
40479 .m(1)
40480 .n(4)
40481 .k(k)
40482 .ks(3)
40483 .a_offset(7)
40484 .zero_index(mz)
40485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40486 }
40487 }
40488 }
40489
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)40490 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
40491 GemmMicrokernelTester()
40492 .mr(1)
40493 .nr(4)
40494 .kr(1)
40495 .sr(1)
40496 .m(1)
40497 .n(4)
40498 .k(1)
40499 .qmin(128)
40500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40501 }
40502
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)40503 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
40504 GemmMicrokernelTester()
40505 .mr(1)
40506 .nr(4)
40507 .kr(1)
40508 .sr(1)
40509 .m(1)
40510 .n(4)
40511 .k(1)
40512 .qmax(128)
40513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40514 }
40515
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)40516 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
40517 GemmMicrokernelTester()
40518 .mr(1)
40519 .nr(4)
40520 .kr(1)
40521 .sr(1)
40522 .m(1)
40523 .n(4)
40524 .k(1)
40525 .cm_stride(7)
40526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40527 }
40528
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_a_zero_point)40529 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
40530 for (size_t k = 1; k <= 5; k += 2) {
40531 GemmMicrokernelTester()
40532 .mr(1)
40533 .nr(4)
40534 .kr(1)
40535 .sr(1)
40536 .m(1)
40537 .n(4)
40538 .k(k)
40539 .a_zero_point(0)
40540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40541 }
40542 }
40543
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_b_zero_point)40544 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
40545 for (size_t k = 1; k <= 5; k += 2) {
40546 GemmMicrokernelTester()
40547 .mr(1)
40548 .nr(4)
40549 .kr(1)
40550 .sr(1)
40551 .m(1)
40552 .n(4)
40553 .k(k)
40554 .b_zero_point(0)
40555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40556 }
40557 }
40558
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_zero_point)40559 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
40560 for (size_t k = 1; k <= 5; k += 2) {
40561 GemmMicrokernelTester()
40562 .mr(1)
40563 .nr(4)
40564 .kr(1)
40565 .sr(1)
40566 .m(1)
40567 .n(4)
40568 .k(k)
40569 .a_zero_point(0)
40570 .b_zero_point(0)
40571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40572 }
40573 }
40574
40575
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)40576 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
40577 GemmMicrokernelTester()
40578 .mr(2)
40579 .nr(2)
40580 .kr(1)
40581 .sr(1)
40582 .m(2)
40583 .n(2)
40584 .k(1)
40585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40586 }
40587
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)40588 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
40589 GemmMicrokernelTester()
40590 .mr(2)
40591 .nr(2)
40592 .kr(1)
40593 .sr(1)
40594 .m(2)
40595 .n(2)
40596 .k(1)
40597 .cn_stride(5)
40598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40599 }
40600
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)40601 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
40602 for (uint32_t n = 1; n <= 2; n++) {
40603 for (uint32_t m = 1; m <= 2; m++) {
40604 GemmMicrokernelTester()
40605 .mr(2)
40606 .nr(2)
40607 .kr(1)
40608 .sr(1)
40609 .m(m)
40610 .n(n)
40611 .k(1)
40612 .iterations(1)
40613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40614 }
40615 }
40616 }
40617
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)40618 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
40619 for (uint32_t m = 1; m <= 2; m++) {
40620 GemmMicrokernelTester()
40621 .mr(2)
40622 .nr(2)
40623 .kr(1)
40624 .sr(1)
40625 .m(m)
40626 .n(2)
40627 .k(1)
40628 .iterations(1)
40629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40630 }
40631 }
40632
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)40633 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
40634 for (uint32_t n = 1; n <= 2; n++) {
40635 GemmMicrokernelTester()
40636 .mr(2)
40637 .nr(2)
40638 .kr(1)
40639 .sr(1)
40640 .m(2)
40641 .n(n)
40642 .k(1)
40643 .iterations(1)
40644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40645 }
40646 }
40647
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)40648 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
40649 for (size_t k = 2; k < 10; k++) {
40650 GemmMicrokernelTester()
40651 .mr(2)
40652 .nr(2)
40653 .kr(1)
40654 .sr(1)
40655 .m(2)
40656 .n(2)
40657 .k(k)
40658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40659 }
40660 }
40661
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)40662 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
40663 for (size_t k = 2; k < 10; k++) {
40664 for (uint32_t n = 1; n <= 2; n++) {
40665 for (uint32_t m = 1; m <= 2; m++) {
40666 GemmMicrokernelTester()
40667 .mr(2)
40668 .nr(2)
40669 .kr(1)
40670 .sr(1)
40671 .m(m)
40672 .n(n)
40673 .k(k)
40674 .iterations(1)
40675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40676 }
40677 }
40678 }
40679 }
40680
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)40681 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
40682 for (uint32_t n = 3; n < 4; n++) {
40683 for (size_t k = 1; k <= 5; k += 2) {
40684 GemmMicrokernelTester()
40685 .mr(2)
40686 .nr(2)
40687 .kr(1)
40688 .sr(1)
40689 .m(2)
40690 .n(n)
40691 .k(k)
40692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40693 }
40694 }
40695 }
40696
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)40697 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
40698 for (uint32_t n = 3; n < 4; n++) {
40699 for (size_t k = 1; k <= 5; k += 2) {
40700 GemmMicrokernelTester()
40701 .mr(2)
40702 .nr(2)
40703 .kr(1)
40704 .sr(1)
40705 .m(2)
40706 .n(n)
40707 .k(k)
40708 .cn_stride(5)
40709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40710 }
40711 }
40712 }
40713
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)40714 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
40715 for (uint32_t n = 3; n < 4; n++) {
40716 for (size_t k = 1; k <= 5; k += 2) {
40717 for (uint32_t m = 1; m <= 2; m++) {
40718 GemmMicrokernelTester()
40719 .mr(2)
40720 .nr(2)
40721 .kr(1)
40722 .sr(1)
40723 .m(m)
40724 .n(n)
40725 .k(k)
40726 .iterations(1)
40727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40728 }
40729 }
40730 }
40731 }
40732
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)40733 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
40734 for (uint32_t n = 4; n <= 6; n += 2) {
40735 for (size_t k = 1; k <= 5; k += 2) {
40736 GemmMicrokernelTester()
40737 .mr(2)
40738 .nr(2)
40739 .kr(1)
40740 .sr(1)
40741 .m(2)
40742 .n(n)
40743 .k(k)
40744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40745 }
40746 }
40747 }
40748
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)40749 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
40750 for (uint32_t n = 4; n <= 6; n += 2) {
40751 for (size_t k = 1; k <= 5; k += 2) {
40752 GemmMicrokernelTester()
40753 .mr(2)
40754 .nr(2)
40755 .kr(1)
40756 .sr(1)
40757 .m(2)
40758 .n(n)
40759 .k(k)
40760 .cn_stride(5)
40761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40762 }
40763 }
40764 }
40765
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)40766 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
40767 for (uint32_t n = 4; n <= 6; n += 2) {
40768 for (size_t k = 1; k <= 5; k += 2) {
40769 for (uint32_t m = 1; m <= 2; m++) {
40770 GemmMicrokernelTester()
40771 .mr(2)
40772 .nr(2)
40773 .kr(1)
40774 .sr(1)
40775 .m(m)
40776 .n(n)
40777 .k(k)
40778 .iterations(1)
40779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40780 }
40781 }
40782 }
40783 }
40784
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel)40785 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
40786 for (size_t k = 1; k <= 5; k += 2) {
40787 GemmMicrokernelTester()
40788 .mr(2)
40789 .nr(2)
40790 .kr(1)
40791 .sr(1)
40792 .m(2)
40793 .n(2)
40794 .k(k)
40795 .ks(3)
40796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40797 }
40798 }
40799
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel_subtile)40800 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
40801 for (size_t k = 1; k <= 5; k += 2) {
40802 for (uint32_t n = 1; n <= 2; n++) {
40803 for (uint32_t m = 1; m <= 2; m++) {
40804 GemmMicrokernelTester()
40805 .mr(2)
40806 .nr(2)
40807 .kr(1)
40808 .sr(1)
40809 .m(m)
40810 .n(n)
40811 .k(k)
40812 .ks(3)
40813 .iterations(1)
40814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40815 }
40816 }
40817 }
40818 }
40819
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_small_kernel)40820 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
40821 for (uint32_t n = 3; n < 4; n++) {
40822 for (size_t k = 1; k <= 5; k += 2) {
40823 GemmMicrokernelTester()
40824 .mr(2)
40825 .nr(2)
40826 .kr(1)
40827 .sr(1)
40828 .m(2)
40829 .n(n)
40830 .k(k)
40831 .ks(3)
40832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40833 }
40834 }
40835 }
40836
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_small_kernel)40837 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
40838 for (uint32_t n = 4; n <= 6; n += 2) {
40839 for (size_t k = 1; k <= 5; k += 2) {
40840 GemmMicrokernelTester()
40841 .mr(2)
40842 .nr(2)
40843 .kr(1)
40844 .sr(1)
40845 .m(2)
40846 .n(n)
40847 .k(k)
40848 .ks(3)
40849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40850 }
40851 }
40852 }
40853
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)40854 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
40855 for (size_t k = 1; k <= 5; k += 2) {
40856 for (uint32_t n = 1; n <= 2; n++) {
40857 for (uint32_t m = 1; m <= 2; m++) {
40858 GemmMicrokernelTester()
40859 .mr(2)
40860 .nr(2)
40861 .kr(1)
40862 .sr(1)
40863 .m(m)
40864 .n(n)
40865 .k(k)
40866 .cm_stride(5)
40867 .iterations(1)
40868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40869 }
40870 }
40871 }
40872 }
40873
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,a_offset)40874 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
40875 for (size_t k = 1; k <= 5; k += 2) {
40876 GemmMicrokernelTester()
40877 .mr(2)
40878 .nr(2)
40879 .kr(1)
40880 .sr(1)
40881 .m(2)
40882 .n(2)
40883 .k(k)
40884 .ks(3)
40885 .a_offset(13)
40886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40887 }
40888 }
40889
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,zero)40890 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
40891 for (size_t k = 1; k <= 5; k += 2) {
40892 for (uint32_t mz = 0; mz < 2; mz++) {
40893 GemmMicrokernelTester()
40894 .mr(2)
40895 .nr(2)
40896 .kr(1)
40897 .sr(1)
40898 .m(2)
40899 .n(2)
40900 .k(k)
40901 .ks(3)
40902 .a_offset(13)
40903 .zero_index(mz)
40904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40905 }
40906 }
40907 }
40908
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)40909 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
40910 GemmMicrokernelTester()
40911 .mr(2)
40912 .nr(2)
40913 .kr(1)
40914 .sr(1)
40915 .m(2)
40916 .n(2)
40917 .k(1)
40918 .qmin(128)
40919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40920 }
40921
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)40922 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
40923 GemmMicrokernelTester()
40924 .mr(2)
40925 .nr(2)
40926 .kr(1)
40927 .sr(1)
40928 .m(2)
40929 .n(2)
40930 .k(1)
40931 .qmax(128)
40932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40933 }
40934
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)40935 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
40936 GemmMicrokernelTester()
40937 .mr(2)
40938 .nr(2)
40939 .kr(1)
40940 .sr(1)
40941 .m(2)
40942 .n(2)
40943 .k(1)
40944 .cm_stride(5)
40945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40946 }
40947
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_a_zero_point)40948 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_a_zero_point) {
40949 for (size_t k = 1; k <= 5; k += 2) {
40950 GemmMicrokernelTester()
40951 .mr(2)
40952 .nr(2)
40953 .kr(1)
40954 .sr(1)
40955 .m(2)
40956 .n(2)
40957 .k(k)
40958 .a_zero_point(0)
40959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40960 }
40961 }
40962
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_b_zero_point)40963 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_b_zero_point) {
40964 for (size_t k = 1; k <= 5; k += 2) {
40965 GemmMicrokernelTester()
40966 .mr(2)
40967 .nr(2)
40968 .kr(1)
40969 .sr(1)
40970 .m(2)
40971 .n(2)
40972 .k(k)
40973 .b_zero_point(0)
40974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40975 }
40976 }
40977
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_zero_point)40978 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_zero_point) {
40979 for (size_t k = 1; k <= 5; k += 2) {
40980 GemmMicrokernelTester()
40981 .mr(2)
40982 .nr(2)
40983 .kr(1)
40984 .sr(1)
40985 .m(2)
40986 .n(2)
40987 .k(k)
40988 .a_zero_point(0)
40989 .b_zero_point(0)
40990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40991 }
40992 }
40993
40994
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)40995 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
40996 GemmMicrokernelTester()
40997 .mr(2)
40998 .nr(2)
40999 .kr(1)
41000 .sr(1)
41001 .m(2)
41002 .n(2)
41003 .k(1)
41004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41005 }
41006
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)41007 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
41008 GemmMicrokernelTester()
41009 .mr(2)
41010 .nr(2)
41011 .kr(1)
41012 .sr(1)
41013 .m(2)
41014 .n(2)
41015 .k(1)
41016 .cn_stride(5)
41017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41018 }
41019
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)41020 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
41021 for (uint32_t n = 1; n <= 2; n++) {
41022 for (uint32_t m = 1; m <= 2; m++) {
41023 GemmMicrokernelTester()
41024 .mr(2)
41025 .nr(2)
41026 .kr(1)
41027 .sr(1)
41028 .m(m)
41029 .n(n)
41030 .k(1)
41031 .iterations(1)
41032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41033 }
41034 }
41035 }
41036
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)41037 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
41038 for (uint32_t m = 1; m <= 2; m++) {
41039 GemmMicrokernelTester()
41040 .mr(2)
41041 .nr(2)
41042 .kr(1)
41043 .sr(1)
41044 .m(m)
41045 .n(2)
41046 .k(1)
41047 .iterations(1)
41048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41049 }
41050 }
41051
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)41052 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
41053 for (uint32_t n = 1; n <= 2; n++) {
41054 GemmMicrokernelTester()
41055 .mr(2)
41056 .nr(2)
41057 .kr(1)
41058 .sr(1)
41059 .m(2)
41060 .n(n)
41061 .k(1)
41062 .iterations(1)
41063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41064 }
41065 }
41066
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)41067 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
41068 for (size_t k = 2; k < 10; k++) {
41069 GemmMicrokernelTester()
41070 .mr(2)
41071 .nr(2)
41072 .kr(1)
41073 .sr(1)
41074 .m(2)
41075 .n(2)
41076 .k(k)
41077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41078 }
41079 }
41080
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)41081 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
41082 for (size_t k = 2; k < 10; k++) {
41083 for (uint32_t n = 1; n <= 2; n++) {
41084 for (uint32_t m = 1; m <= 2; m++) {
41085 GemmMicrokernelTester()
41086 .mr(2)
41087 .nr(2)
41088 .kr(1)
41089 .sr(1)
41090 .m(m)
41091 .n(n)
41092 .k(k)
41093 .iterations(1)
41094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41095 }
41096 }
41097 }
41098 }
41099
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)41100 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
41101 for (uint32_t n = 3; n < 4; n++) {
41102 for (size_t k = 1; k <= 5; k += 2) {
41103 GemmMicrokernelTester()
41104 .mr(2)
41105 .nr(2)
41106 .kr(1)
41107 .sr(1)
41108 .m(2)
41109 .n(n)
41110 .k(k)
41111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41112 }
41113 }
41114 }
41115
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)41116 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
41117 for (uint32_t n = 3; n < 4; n++) {
41118 for (size_t k = 1; k <= 5; k += 2) {
41119 GemmMicrokernelTester()
41120 .mr(2)
41121 .nr(2)
41122 .kr(1)
41123 .sr(1)
41124 .m(2)
41125 .n(n)
41126 .k(k)
41127 .cn_stride(5)
41128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41129 }
41130 }
41131 }
41132
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)41133 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
41134 for (uint32_t n = 3; n < 4; n++) {
41135 for (size_t k = 1; k <= 5; k += 2) {
41136 for (uint32_t m = 1; m <= 2; m++) {
41137 GemmMicrokernelTester()
41138 .mr(2)
41139 .nr(2)
41140 .kr(1)
41141 .sr(1)
41142 .m(m)
41143 .n(n)
41144 .k(k)
41145 .iterations(1)
41146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41147 }
41148 }
41149 }
41150 }
41151
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)41152 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
41153 for (uint32_t n = 4; n <= 6; n += 2) {
41154 for (size_t k = 1; k <= 5; k += 2) {
41155 GemmMicrokernelTester()
41156 .mr(2)
41157 .nr(2)
41158 .kr(1)
41159 .sr(1)
41160 .m(2)
41161 .n(n)
41162 .k(k)
41163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41164 }
41165 }
41166 }
41167
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)41168 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
41169 for (uint32_t n = 4; n <= 6; n += 2) {
41170 for (size_t k = 1; k <= 5; k += 2) {
41171 GemmMicrokernelTester()
41172 .mr(2)
41173 .nr(2)
41174 .kr(1)
41175 .sr(1)
41176 .m(2)
41177 .n(n)
41178 .k(k)
41179 .cn_stride(5)
41180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41181 }
41182 }
41183 }
41184
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)41185 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
41186 for (uint32_t n = 4; n <= 6; n += 2) {
41187 for (size_t k = 1; k <= 5; k += 2) {
41188 for (uint32_t m = 1; m <= 2; m++) {
41189 GemmMicrokernelTester()
41190 .mr(2)
41191 .nr(2)
41192 .kr(1)
41193 .sr(1)
41194 .m(m)
41195 .n(n)
41196 .k(k)
41197 .iterations(1)
41198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41199 }
41200 }
41201 }
41202 }
41203
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel)41204 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
41205 for (size_t k = 1; k <= 5; k += 2) {
41206 GemmMicrokernelTester()
41207 .mr(2)
41208 .nr(2)
41209 .kr(1)
41210 .sr(1)
41211 .m(2)
41212 .n(2)
41213 .k(k)
41214 .ks(3)
41215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41216 }
41217 }
41218
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel_subtile)41219 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
41220 for (size_t k = 1; k <= 5; k += 2) {
41221 for (uint32_t n = 1; n <= 2; n++) {
41222 for (uint32_t m = 1; m <= 2; m++) {
41223 GemmMicrokernelTester()
41224 .mr(2)
41225 .nr(2)
41226 .kr(1)
41227 .sr(1)
41228 .m(m)
41229 .n(n)
41230 .k(k)
41231 .ks(3)
41232 .iterations(1)
41233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41234 }
41235 }
41236 }
41237 }
41238
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_small_kernel)41239 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
41240 for (uint32_t n = 3; n < 4; n++) {
41241 for (size_t k = 1; k <= 5; k += 2) {
41242 GemmMicrokernelTester()
41243 .mr(2)
41244 .nr(2)
41245 .kr(1)
41246 .sr(1)
41247 .m(2)
41248 .n(n)
41249 .k(k)
41250 .ks(3)
41251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41252 }
41253 }
41254 }
41255
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_small_kernel)41256 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
41257 for (uint32_t n = 4; n <= 6; n += 2) {
41258 for (size_t k = 1; k <= 5; k += 2) {
41259 GemmMicrokernelTester()
41260 .mr(2)
41261 .nr(2)
41262 .kr(1)
41263 .sr(1)
41264 .m(2)
41265 .n(n)
41266 .k(k)
41267 .ks(3)
41268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41269 }
41270 }
41271 }
41272
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)41273 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
41274 for (size_t k = 1; k <= 5; k += 2) {
41275 for (uint32_t n = 1; n <= 2; n++) {
41276 for (uint32_t m = 1; m <= 2; m++) {
41277 GemmMicrokernelTester()
41278 .mr(2)
41279 .nr(2)
41280 .kr(1)
41281 .sr(1)
41282 .m(m)
41283 .n(n)
41284 .k(k)
41285 .cm_stride(5)
41286 .iterations(1)
41287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41288 }
41289 }
41290 }
41291 }
41292
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,a_offset)41293 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
41294 for (size_t k = 1; k <= 5; k += 2) {
41295 GemmMicrokernelTester()
41296 .mr(2)
41297 .nr(2)
41298 .kr(1)
41299 .sr(1)
41300 .m(2)
41301 .n(2)
41302 .k(k)
41303 .ks(3)
41304 .a_offset(13)
41305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41306 }
41307 }
41308
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,zero)41309 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
41310 for (size_t k = 1; k <= 5; k += 2) {
41311 for (uint32_t mz = 0; mz < 2; mz++) {
41312 GemmMicrokernelTester()
41313 .mr(2)
41314 .nr(2)
41315 .kr(1)
41316 .sr(1)
41317 .m(2)
41318 .n(2)
41319 .k(k)
41320 .ks(3)
41321 .a_offset(13)
41322 .zero_index(mz)
41323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41324 }
41325 }
41326 }
41327
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)41328 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
41329 GemmMicrokernelTester()
41330 .mr(2)
41331 .nr(2)
41332 .kr(1)
41333 .sr(1)
41334 .m(2)
41335 .n(2)
41336 .k(1)
41337 .qmin(128)
41338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41339 }
41340
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)41341 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
41342 GemmMicrokernelTester()
41343 .mr(2)
41344 .nr(2)
41345 .kr(1)
41346 .sr(1)
41347 .m(2)
41348 .n(2)
41349 .k(1)
41350 .qmax(128)
41351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41352 }
41353
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)41354 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
41355 GemmMicrokernelTester()
41356 .mr(2)
41357 .nr(2)
41358 .kr(1)
41359 .sr(1)
41360 .m(2)
41361 .n(2)
41362 .k(1)
41363 .cm_stride(5)
41364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41365 }
41366
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_a_zero_point)41367 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
41368 for (size_t k = 1; k <= 5; k += 2) {
41369 GemmMicrokernelTester()
41370 .mr(2)
41371 .nr(2)
41372 .kr(1)
41373 .sr(1)
41374 .m(2)
41375 .n(2)
41376 .k(k)
41377 .a_zero_point(0)
41378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41379 }
41380 }
41381
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_b_zero_point)41382 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
41383 for (size_t k = 1; k <= 5; k += 2) {
41384 GemmMicrokernelTester()
41385 .mr(2)
41386 .nr(2)
41387 .kr(1)
41388 .sr(1)
41389 .m(2)
41390 .n(2)
41391 .k(k)
41392 .b_zero_point(0)
41393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41394 }
41395 }
41396
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_zero_point)41397 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
41398 for (size_t k = 1; k <= 5; k += 2) {
41399 GemmMicrokernelTester()
41400 .mr(2)
41401 .nr(2)
41402 .kr(1)
41403 .sr(1)
41404 .m(2)
41405 .n(2)
41406 .k(k)
41407 .a_zero_point(0)
41408 .b_zero_point(0)
41409 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41410 }
41411 }
41412
41413
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)41414 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
41415 GemmMicrokernelTester()
41416 .mr(2)
41417 .nr(4)
41418 .kr(1)
41419 .sr(1)
41420 .m(2)
41421 .n(4)
41422 .k(1)
41423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41424 }
41425
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)41426 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
41427 GemmMicrokernelTester()
41428 .mr(2)
41429 .nr(4)
41430 .kr(1)
41431 .sr(1)
41432 .m(2)
41433 .n(4)
41434 .k(1)
41435 .cn_stride(7)
41436 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41437 }
41438
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)41439 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
41440 for (uint32_t n = 1; n <= 4; n++) {
41441 for (uint32_t m = 1; m <= 2; m++) {
41442 GemmMicrokernelTester()
41443 .mr(2)
41444 .nr(4)
41445 .kr(1)
41446 .sr(1)
41447 .m(m)
41448 .n(n)
41449 .k(1)
41450 .iterations(1)
41451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41452 }
41453 }
41454 }
41455
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)41456 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
41457 for (uint32_t m = 1; m <= 2; m++) {
41458 GemmMicrokernelTester()
41459 .mr(2)
41460 .nr(4)
41461 .kr(1)
41462 .sr(1)
41463 .m(m)
41464 .n(4)
41465 .k(1)
41466 .iterations(1)
41467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41468 }
41469 }
41470
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)41471 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
41472 for (uint32_t n = 1; n <= 4; n++) {
41473 GemmMicrokernelTester()
41474 .mr(2)
41475 .nr(4)
41476 .kr(1)
41477 .sr(1)
41478 .m(2)
41479 .n(n)
41480 .k(1)
41481 .iterations(1)
41482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41483 }
41484 }
41485
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)41486 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
41487 for (size_t k = 2; k < 10; k++) {
41488 GemmMicrokernelTester()
41489 .mr(2)
41490 .nr(4)
41491 .kr(1)
41492 .sr(1)
41493 .m(2)
41494 .n(4)
41495 .k(k)
41496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41497 }
41498 }
41499
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)41500 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
41501 for (size_t k = 2; k < 10; k++) {
41502 for (uint32_t n = 1; n <= 4; n++) {
41503 for (uint32_t m = 1; m <= 2; m++) {
41504 GemmMicrokernelTester()
41505 .mr(2)
41506 .nr(4)
41507 .kr(1)
41508 .sr(1)
41509 .m(m)
41510 .n(n)
41511 .k(k)
41512 .iterations(1)
41513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41514 }
41515 }
41516 }
41517 }
41518
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)41519 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
41520 for (uint32_t n = 5; n < 8; n++) {
41521 for (size_t k = 1; k <= 5; k += 2) {
41522 GemmMicrokernelTester()
41523 .mr(2)
41524 .nr(4)
41525 .kr(1)
41526 .sr(1)
41527 .m(2)
41528 .n(n)
41529 .k(k)
41530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41531 }
41532 }
41533 }
41534
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)41535 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
41536 for (uint32_t n = 5; n < 8; n++) {
41537 for (size_t k = 1; k <= 5; k += 2) {
41538 GemmMicrokernelTester()
41539 .mr(2)
41540 .nr(4)
41541 .kr(1)
41542 .sr(1)
41543 .m(2)
41544 .n(n)
41545 .k(k)
41546 .cn_stride(7)
41547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41548 }
41549 }
41550 }
41551
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)41552 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
41553 for (uint32_t n = 5; n < 8; n++) {
41554 for (size_t k = 1; k <= 5; k += 2) {
41555 for (uint32_t m = 1; m <= 2; m++) {
41556 GemmMicrokernelTester()
41557 .mr(2)
41558 .nr(4)
41559 .kr(1)
41560 .sr(1)
41561 .m(m)
41562 .n(n)
41563 .k(k)
41564 .iterations(1)
41565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41566 }
41567 }
41568 }
41569 }
41570
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)41571 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
41572 for (uint32_t n = 8; n <= 12; n += 4) {
41573 for (size_t k = 1; k <= 5; k += 2) {
41574 GemmMicrokernelTester()
41575 .mr(2)
41576 .nr(4)
41577 .kr(1)
41578 .sr(1)
41579 .m(2)
41580 .n(n)
41581 .k(k)
41582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41583 }
41584 }
41585 }
41586
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)41587 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
41588 for (uint32_t n = 8; n <= 12; n += 4) {
41589 for (size_t k = 1; k <= 5; k += 2) {
41590 GemmMicrokernelTester()
41591 .mr(2)
41592 .nr(4)
41593 .kr(1)
41594 .sr(1)
41595 .m(2)
41596 .n(n)
41597 .k(k)
41598 .cn_stride(7)
41599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41600 }
41601 }
41602 }
41603
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)41604 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
41605 for (uint32_t n = 8; n <= 12; n += 4) {
41606 for (size_t k = 1; k <= 5; k += 2) {
41607 for (uint32_t m = 1; m <= 2; m++) {
41608 GemmMicrokernelTester()
41609 .mr(2)
41610 .nr(4)
41611 .kr(1)
41612 .sr(1)
41613 .m(m)
41614 .n(n)
41615 .k(k)
41616 .iterations(1)
41617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41618 }
41619 }
41620 }
41621 }
41622
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel)41623 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel) {
41624 for (size_t k = 1; k <= 5; k += 2) {
41625 GemmMicrokernelTester()
41626 .mr(2)
41627 .nr(4)
41628 .kr(1)
41629 .sr(1)
41630 .m(2)
41631 .n(4)
41632 .k(k)
41633 .ks(3)
41634 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41635 }
41636 }
41637
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel_subtile)41638 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel_subtile) {
41639 for (size_t k = 1; k <= 5; k += 2) {
41640 for (uint32_t n = 1; n <= 4; n++) {
41641 for (uint32_t m = 1; m <= 2; m++) {
41642 GemmMicrokernelTester()
41643 .mr(2)
41644 .nr(4)
41645 .kr(1)
41646 .sr(1)
41647 .m(m)
41648 .n(n)
41649 .k(k)
41650 .ks(3)
41651 .iterations(1)
41652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41653 }
41654 }
41655 }
41656 }
41657
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_small_kernel)41658 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
41659 for (uint32_t n = 5; n < 8; n++) {
41660 for (size_t k = 1; k <= 5; k += 2) {
41661 GemmMicrokernelTester()
41662 .mr(2)
41663 .nr(4)
41664 .kr(1)
41665 .sr(1)
41666 .m(2)
41667 .n(n)
41668 .k(k)
41669 .ks(3)
41670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41671 }
41672 }
41673 }
41674
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_small_kernel)41675 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
41676 for (uint32_t n = 8; n <= 12; n += 4) {
41677 for (size_t k = 1; k <= 5; k += 2) {
41678 GemmMicrokernelTester()
41679 .mr(2)
41680 .nr(4)
41681 .kr(1)
41682 .sr(1)
41683 .m(2)
41684 .n(n)
41685 .k(k)
41686 .ks(3)
41687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41688 }
41689 }
41690 }
41691
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)41692 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
41693 for (size_t k = 1; k <= 5; k += 2) {
41694 for (uint32_t n = 1; n <= 4; n++) {
41695 for (uint32_t m = 1; m <= 2; m++) {
41696 GemmMicrokernelTester()
41697 .mr(2)
41698 .nr(4)
41699 .kr(1)
41700 .sr(1)
41701 .m(m)
41702 .n(n)
41703 .k(k)
41704 .cm_stride(7)
41705 .iterations(1)
41706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41707 }
41708 }
41709 }
41710 }
41711
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,a_offset)41712 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, a_offset) {
41713 for (size_t k = 1; k <= 5; k += 2) {
41714 GemmMicrokernelTester()
41715 .mr(2)
41716 .nr(4)
41717 .kr(1)
41718 .sr(1)
41719 .m(2)
41720 .n(4)
41721 .k(k)
41722 .ks(3)
41723 .a_offset(13)
41724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41725 }
41726 }
41727
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,zero)41728 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, zero) {
41729 for (size_t k = 1; k <= 5; k += 2) {
41730 for (uint32_t mz = 0; mz < 2; mz++) {
41731 GemmMicrokernelTester()
41732 .mr(2)
41733 .nr(4)
41734 .kr(1)
41735 .sr(1)
41736 .m(2)
41737 .n(4)
41738 .k(k)
41739 .ks(3)
41740 .a_offset(13)
41741 .zero_index(mz)
41742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41743 }
41744 }
41745 }
41746
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)41747 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
41748 GemmMicrokernelTester()
41749 .mr(2)
41750 .nr(4)
41751 .kr(1)
41752 .sr(1)
41753 .m(2)
41754 .n(4)
41755 .k(1)
41756 .qmin(128)
41757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41758 }
41759
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)41760 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
41761 GemmMicrokernelTester()
41762 .mr(2)
41763 .nr(4)
41764 .kr(1)
41765 .sr(1)
41766 .m(2)
41767 .n(4)
41768 .k(1)
41769 .qmax(128)
41770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41771 }
41772
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)41773 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
41774 GemmMicrokernelTester()
41775 .mr(2)
41776 .nr(4)
41777 .kr(1)
41778 .sr(1)
41779 .m(2)
41780 .n(4)
41781 .k(1)
41782 .cm_stride(7)
41783 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41784 }
41785
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_a_zero_point)41786 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_a_zero_point) {
41787 for (size_t k = 1; k <= 5; k += 2) {
41788 GemmMicrokernelTester()
41789 .mr(2)
41790 .nr(4)
41791 .kr(1)
41792 .sr(1)
41793 .m(2)
41794 .n(4)
41795 .k(k)
41796 .a_zero_point(0)
41797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41798 }
41799 }
41800
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_b_zero_point)41801 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_b_zero_point) {
41802 for (size_t k = 1; k <= 5; k += 2) {
41803 GemmMicrokernelTester()
41804 .mr(2)
41805 .nr(4)
41806 .kr(1)
41807 .sr(1)
41808 .m(2)
41809 .n(4)
41810 .k(k)
41811 .b_zero_point(0)
41812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41813 }
41814 }
41815
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_zero_point)41816 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_zero_point) {
41817 for (size_t k = 1; k <= 5; k += 2) {
41818 GemmMicrokernelTester()
41819 .mr(2)
41820 .nr(4)
41821 .kr(1)
41822 .sr(1)
41823 .m(2)
41824 .n(4)
41825 .k(k)
41826 .a_zero_point(0)
41827 .b_zero_point(0)
41828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41829 }
41830 }
41831
41832
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)41833 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
41834 GemmMicrokernelTester()
41835 .mr(2)
41836 .nr(4)
41837 .kr(1)
41838 .sr(1)
41839 .m(2)
41840 .n(4)
41841 .k(1)
41842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41843 }
41844
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)41845 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
41846 GemmMicrokernelTester()
41847 .mr(2)
41848 .nr(4)
41849 .kr(1)
41850 .sr(1)
41851 .m(2)
41852 .n(4)
41853 .k(1)
41854 .cn_stride(7)
41855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41856 }
41857
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)41858 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
41859 for (uint32_t n = 1; n <= 4; n++) {
41860 for (uint32_t m = 1; m <= 2; m++) {
41861 GemmMicrokernelTester()
41862 .mr(2)
41863 .nr(4)
41864 .kr(1)
41865 .sr(1)
41866 .m(m)
41867 .n(n)
41868 .k(1)
41869 .iterations(1)
41870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41871 }
41872 }
41873 }
41874
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)41875 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
41876 for (uint32_t m = 1; m <= 2; m++) {
41877 GemmMicrokernelTester()
41878 .mr(2)
41879 .nr(4)
41880 .kr(1)
41881 .sr(1)
41882 .m(m)
41883 .n(4)
41884 .k(1)
41885 .iterations(1)
41886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41887 }
41888 }
41889
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)41890 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
41891 for (uint32_t n = 1; n <= 4; n++) {
41892 GemmMicrokernelTester()
41893 .mr(2)
41894 .nr(4)
41895 .kr(1)
41896 .sr(1)
41897 .m(2)
41898 .n(n)
41899 .k(1)
41900 .iterations(1)
41901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41902 }
41903 }
41904
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)41905 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
41906 for (size_t k = 2; k < 10; k++) {
41907 GemmMicrokernelTester()
41908 .mr(2)
41909 .nr(4)
41910 .kr(1)
41911 .sr(1)
41912 .m(2)
41913 .n(4)
41914 .k(k)
41915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41916 }
41917 }
41918
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)41919 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
41920 for (size_t k = 2; k < 10; k++) {
41921 for (uint32_t n = 1; n <= 4; n++) {
41922 for (uint32_t m = 1; m <= 2; m++) {
41923 GemmMicrokernelTester()
41924 .mr(2)
41925 .nr(4)
41926 .kr(1)
41927 .sr(1)
41928 .m(m)
41929 .n(n)
41930 .k(k)
41931 .iterations(1)
41932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41933 }
41934 }
41935 }
41936 }
41937
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)41938 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
41939 for (uint32_t n = 5; n < 8; n++) {
41940 for (size_t k = 1; k <= 5; k += 2) {
41941 GemmMicrokernelTester()
41942 .mr(2)
41943 .nr(4)
41944 .kr(1)
41945 .sr(1)
41946 .m(2)
41947 .n(n)
41948 .k(k)
41949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41950 }
41951 }
41952 }
41953
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)41954 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
41955 for (uint32_t n = 5; n < 8; n++) {
41956 for (size_t k = 1; k <= 5; k += 2) {
41957 GemmMicrokernelTester()
41958 .mr(2)
41959 .nr(4)
41960 .kr(1)
41961 .sr(1)
41962 .m(2)
41963 .n(n)
41964 .k(k)
41965 .cn_stride(7)
41966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41967 }
41968 }
41969 }
41970
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)41971 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
41972 for (uint32_t n = 5; n < 8; n++) {
41973 for (size_t k = 1; k <= 5; k += 2) {
41974 for (uint32_t m = 1; m <= 2; m++) {
41975 GemmMicrokernelTester()
41976 .mr(2)
41977 .nr(4)
41978 .kr(1)
41979 .sr(1)
41980 .m(m)
41981 .n(n)
41982 .k(k)
41983 .iterations(1)
41984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41985 }
41986 }
41987 }
41988 }
41989
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)41990 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
41991 for (uint32_t n = 8; n <= 12; n += 4) {
41992 for (size_t k = 1; k <= 5; k += 2) {
41993 GemmMicrokernelTester()
41994 .mr(2)
41995 .nr(4)
41996 .kr(1)
41997 .sr(1)
41998 .m(2)
41999 .n(n)
42000 .k(k)
42001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42002 }
42003 }
42004 }
42005
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)42006 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
42007 for (uint32_t n = 8; n <= 12; n += 4) {
42008 for (size_t k = 1; k <= 5; k += 2) {
42009 GemmMicrokernelTester()
42010 .mr(2)
42011 .nr(4)
42012 .kr(1)
42013 .sr(1)
42014 .m(2)
42015 .n(n)
42016 .k(k)
42017 .cn_stride(7)
42018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42019 }
42020 }
42021 }
42022
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)42023 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
42024 for (uint32_t n = 8; n <= 12; n += 4) {
42025 for (size_t k = 1; k <= 5; k += 2) {
42026 for (uint32_t m = 1; m <= 2; m++) {
42027 GemmMicrokernelTester()
42028 .mr(2)
42029 .nr(4)
42030 .kr(1)
42031 .sr(1)
42032 .m(m)
42033 .n(n)
42034 .k(k)
42035 .iterations(1)
42036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42037 }
42038 }
42039 }
42040 }
42041
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel)42042 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
42043 for (size_t k = 1; k <= 5; k += 2) {
42044 GemmMicrokernelTester()
42045 .mr(2)
42046 .nr(4)
42047 .kr(1)
42048 .sr(1)
42049 .m(2)
42050 .n(4)
42051 .k(k)
42052 .ks(3)
42053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42054 }
42055 }
42056
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel_subtile)42057 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
42058 for (size_t k = 1; k <= 5; k += 2) {
42059 for (uint32_t n = 1; n <= 4; n++) {
42060 for (uint32_t m = 1; m <= 2; m++) {
42061 GemmMicrokernelTester()
42062 .mr(2)
42063 .nr(4)
42064 .kr(1)
42065 .sr(1)
42066 .m(m)
42067 .n(n)
42068 .k(k)
42069 .ks(3)
42070 .iterations(1)
42071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42072 }
42073 }
42074 }
42075 }
42076
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_small_kernel)42077 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
42078 for (uint32_t n = 5; n < 8; n++) {
42079 for (size_t k = 1; k <= 5; k += 2) {
42080 GemmMicrokernelTester()
42081 .mr(2)
42082 .nr(4)
42083 .kr(1)
42084 .sr(1)
42085 .m(2)
42086 .n(n)
42087 .k(k)
42088 .ks(3)
42089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42090 }
42091 }
42092 }
42093
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_small_kernel)42094 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
42095 for (uint32_t n = 8; n <= 12; n += 4) {
42096 for (size_t k = 1; k <= 5; k += 2) {
42097 GemmMicrokernelTester()
42098 .mr(2)
42099 .nr(4)
42100 .kr(1)
42101 .sr(1)
42102 .m(2)
42103 .n(n)
42104 .k(k)
42105 .ks(3)
42106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42107 }
42108 }
42109 }
42110
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)42111 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
42112 for (size_t k = 1; k <= 5; k += 2) {
42113 for (uint32_t n = 1; n <= 4; n++) {
42114 for (uint32_t m = 1; m <= 2; m++) {
42115 GemmMicrokernelTester()
42116 .mr(2)
42117 .nr(4)
42118 .kr(1)
42119 .sr(1)
42120 .m(m)
42121 .n(n)
42122 .k(k)
42123 .cm_stride(7)
42124 .iterations(1)
42125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42126 }
42127 }
42128 }
42129 }
42130
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,a_offset)42131 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
42132 for (size_t k = 1; k <= 5; k += 2) {
42133 GemmMicrokernelTester()
42134 .mr(2)
42135 .nr(4)
42136 .kr(1)
42137 .sr(1)
42138 .m(2)
42139 .n(4)
42140 .k(k)
42141 .ks(3)
42142 .a_offset(13)
42143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42144 }
42145 }
42146
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,zero)42147 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
42148 for (size_t k = 1; k <= 5; k += 2) {
42149 for (uint32_t mz = 0; mz < 2; mz++) {
42150 GemmMicrokernelTester()
42151 .mr(2)
42152 .nr(4)
42153 .kr(1)
42154 .sr(1)
42155 .m(2)
42156 .n(4)
42157 .k(k)
42158 .ks(3)
42159 .a_offset(13)
42160 .zero_index(mz)
42161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42162 }
42163 }
42164 }
42165
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)42166 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
42167 GemmMicrokernelTester()
42168 .mr(2)
42169 .nr(4)
42170 .kr(1)
42171 .sr(1)
42172 .m(2)
42173 .n(4)
42174 .k(1)
42175 .qmin(128)
42176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42177 }
42178
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)42179 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
42180 GemmMicrokernelTester()
42181 .mr(2)
42182 .nr(4)
42183 .kr(1)
42184 .sr(1)
42185 .m(2)
42186 .n(4)
42187 .k(1)
42188 .qmax(128)
42189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42190 }
42191
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)42192 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
42193 GemmMicrokernelTester()
42194 .mr(2)
42195 .nr(4)
42196 .kr(1)
42197 .sr(1)
42198 .m(2)
42199 .n(4)
42200 .k(1)
42201 .cm_stride(7)
42202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42203 }
42204
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_a_zero_point)42205 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
42206 for (size_t k = 1; k <= 5; k += 2) {
42207 GemmMicrokernelTester()
42208 .mr(2)
42209 .nr(4)
42210 .kr(1)
42211 .sr(1)
42212 .m(2)
42213 .n(4)
42214 .k(k)
42215 .a_zero_point(0)
42216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42217 }
42218 }
42219
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_b_zero_point)42220 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
42221 for (size_t k = 1; k <= 5; k += 2) {
42222 GemmMicrokernelTester()
42223 .mr(2)
42224 .nr(4)
42225 .kr(1)
42226 .sr(1)
42227 .m(2)
42228 .n(4)
42229 .k(k)
42230 .b_zero_point(0)
42231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42232 }
42233 }
42234
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_zero_point)42235 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
42236 for (size_t k = 1; k <= 5; k += 2) {
42237 GemmMicrokernelTester()
42238 .mr(2)
42239 .nr(4)
42240 .kr(1)
42241 .sr(1)
42242 .m(2)
42243 .n(4)
42244 .k(k)
42245 .a_zero_point(0)
42246 .b_zero_point(0)
42247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42248 }
42249 }
42250
42251
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)42252 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
42253 GemmMicrokernelTester()
42254 .mr(3)
42255 .nr(2)
42256 .kr(1)
42257 .sr(1)
42258 .m(3)
42259 .n(2)
42260 .k(1)
42261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42262 }
42263
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)42264 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
42265 GemmMicrokernelTester()
42266 .mr(3)
42267 .nr(2)
42268 .kr(1)
42269 .sr(1)
42270 .m(3)
42271 .n(2)
42272 .k(1)
42273 .cn_stride(5)
42274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42275 }
42276
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)42277 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
42278 for (uint32_t n = 1; n <= 2; n++) {
42279 for (uint32_t m = 1; m <= 3; m++) {
42280 GemmMicrokernelTester()
42281 .mr(3)
42282 .nr(2)
42283 .kr(1)
42284 .sr(1)
42285 .m(m)
42286 .n(n)
42287 .k(1)
42288 .iterations(1)
42289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42290 }
42291 }
42292 }
42293
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)42294 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42295 for (uint32_t m = 1; m <= 3; m++) {
42296 GemmMicrokernelTester()
42297 .mr(3)
42298 .nr(2)
42299 .kr(1)
42300 .sr(1)
42301 .m(m)
42302 .n(2)
42303 .k(1)
42304 .iterations(1)
42305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42306 }
42307 }
42308
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)42309 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42310 for (uint32_t n = 1; n <= 2; n++) {
42311 GemmMicrokernelTester()
42312 .mr(3)
42313 .nr(2)
42314 .kr(1)
42315 .sr(1)
42316 .m(3)
42317 .n(n)
42318 .k(1)
42319 .iterations(1)
42320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42321 }
42322 }
42323
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)42324 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
42325 for (size_t k = 2; k < 10; k++) {
42326 GemmMicrokernelTester()
42327 .mr(3)
42328 .nr(2)
42329 .kr(1)
42330 .sr(1)
42331 .m(3)
42332 .n(2)
42333 .k(k)
42334 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42335 }
42336 }
42337
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)42338 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
42339 for (size_t k = 2; k < 10; k++) {
42340 for (uint32_t n = 1; n <= 2; n++) {
42341 for (uint32_t m = 1; m <= 3; m++) {
42342 GemmMicrokernelTester()
42343 .mr(3)
42344 .nr(2)
42345 .kr(1)
42346 .sr(1)
42347 .m(m)
42348 .n(n)
42349 .k(k)
42350 .iterations(1)
42351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42352 }
42353 }
42354 }
42355 }
42356
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)42357 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
42358 for (uint32_t n = 3; n < 4; n++) {
42359 for (size_t k = 1; k <= 5; k += 2) {
42360 GemmMicrokernelTester()
42361 .mr(3)
42362 .nr(2)
42363 .kr(1)
42364 .sr(1)
42365 .m(3)
42366 .n(n)
42367 .k(k)
42368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42369 }
42370 }
42371 }
42372
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)42373 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
42374 for (uint32_t n = 3; n < 4; n++) {
42375 for (size_t k = 1; k <= 5; k += 2) {
42376 GemmMicrokernelTester()
42377 .mr(3)
42378 .nr(2)
42379 .kr(1)
42380 .sr(1)
42381 .m(3)
42382 .n(n)
42383 .k(k)
42384 .cn_stride(5)
42385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42386 }
42387 }
42388 }
42389
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)42390 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
42391 for (uint32_t n = 3; n < 4; n++) {
42392 for (size_t k = 1; k <= 5; k += 2) {
42393 for (uint32_t m = 1; m <= 3; m++) {
42394 GemmMicrokernelTester()
42395 .mr(3)
42396 .nr(2)
42397 .kr(1)
42398 .sr(1)
42399 .m(m)
42400 .n(n)
42401 .k(k)
42402 .iterations(1)
42403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42404 }
42405 }
42406 }
42407 }
42408
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)42409 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
42410 for (uint32_t n = 4; n <= 6; n += 2) {
42411 for (size_t k = 1; k <= 5; k += 2) {
42412 GemmMicrokernelTester()
42413 .mr(3)
42414 .nr(2)
42415 .kr(1)
42416 .sr(1)
42417 .m(3)
42418 .n(n)
42419 .k(k)
42420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42421 }
42422 }
42423 }
42424
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)42425 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
42426 for (uint32_t n = 4; n <= 6; n += 2) {
42427 for (size_t k = 1; k <= 5; k += 2) {
42428 GemmMicrokernelTester()
42429 .mr(3)
42430 .nr(2)
42431 .kr(1)
42432 .sr(1)
42433 .m(3)
42434 .n(n)
42435 .k(k)
42436 .cn_stride(5)
42437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42438 }
42439 }
42440 }
42441
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)42442 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
42443 for (uint32_t n = 4; n <= 6; n += 2) {
42444 for (size_t k = 1; k <= 5; k += 2) {
42445 for (uint32_t m = 1; m <= 3; m++) {
42446 GemmMicrokernelTester()
42447 .mr(3)
42448 .nr(2)
42449 .kr(1)
42450 .sr(1)
42451 .m(m)
42452 .n(n)
42453 .k(k)
42454 .iterations(1)
42455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42456 }
42457 }
42458 }
42459 }
42460
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel)42461 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
42462 for (size_t k = 1; k <= 5; k += 2) {
42463 GemmMicrokernelTester()
42464 .mr(3)
42465 .nr(2)
42466 .kr(1)
42467 .sr(1)
42468 .m(3)
42469 .n(2)
42470 .k(k)
42471 .ks(3)
42472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42473 }
42474 }
42475
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel_subtile)42476 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
42477 for (size_t k = 1; k <= 5; k += 2) {
42478 for (uint32_t n = 1; n <= 2; n++) {
42479 for (uint32_t m = 1; m <= 3; m++) {
42480 GemmMicrokernelTester()
42481 .mr(3)
42482 .nr(2)
42483 .kr(1)
42484 .sr(1)
42485 .m(m)
42486 .n(n)
42487 .k(k)
42488 .ks(3)
42489 .iterations(1)
42490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42491 }
42492 }
42493 }
42494 }
42495
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_small_kernel)42496 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
42497 for (uint32_t n = 3; n < 4; n++) {
42498 for (size_t k = 1; k <= 5; k += 2) {
42499 GemmMicrokernelTester()
42500 .mr(3)
42501 .nr(2)
42502 .kr(1)
42503 .sr(1)
42504 .m(3)
42505 .n(n)
42506 .k(k)
42507 .ks(3)
42508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42509 }
42510 }
42511 }
42512
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_small_kernel)42513 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
42514 for (uint32_t n = 4; n <= 6; n += 2) {
42515 for (size_t k = 1; k <= 5; k += 2) {
42516 GemmMicrokernelTester()
42517 .mr(3)
42518 .nr(2)
42519 .kr(1)
42520 .sr(1)
42521 .m(3)
42522 .n(n)
42523 .k(k)
42524 .ks(3)
42525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42526 }
42527 }
42528 }
42529
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)42530 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
42531 for (size_t k = 1; k <= 5; k += 2) {
42532 for (uint32_t n = 1; n <= 2; n++) {
42533 for (uint32_t m = 1; m <= 3; m++) {
42534 GemmMicrokernelTester()
42535 .mr(3)
42536 .nr(2)
42537 .kr(1)
42538 .sr(1)
42539 .m(m)
42540 .n(n)
42541 .k(k)
42542 .cm_stride(5)
42543 .iterations(1)
42544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42545 }
42546 }
42547 }
42548 }
42549
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,a_offset)42550 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
42551 for (size_t k = 1; k <= 5; k += 2) {
42552 GemmMicrokernelTester()
42553 .mr(3)
42554 .nr(2)
42555 .kr(1)
42556 .sr(1)
42557 .m(3)
42558 .n(2)
42559 .k(k)
42560 .ks(3)
42561 .a_offset(17)
42562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42563 }
42564 }
42565
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,zero)42566 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
42567 for (size_t k = 1; k <= 5; k += 2) {
42568 for (uint32_t mz = 0; mz < 3; mz++) {
42569 GemmMicrokernelTester()
42570 .mr(3)
42571 .nr(2)
42572 .kr(1)
42573 .sr(1)
42574 .m(3)
42575 .n(2)
42576 .k(k)
42577 .ks(3)
42578 .a_offset(17)
42579 .zero_index(mz)
42580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42581 }
42582 }
42583 }
42584
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)42585 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
42586 GemmMicrokernelTester()
42587 .mr(3)
42588 .nr(2)
42589 .kr(1)
42590 .sr(1)
42591 .m(3)
42592 .n(2)
42593 .k(1)
42594 .qmin(128)
42595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42596 }
42597
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)42598 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
42599 GemmMicrokernelTester()
42600 .mr(3)
42601 .nr(2)
42602 .kr(1)
42603 .sr(1)
42604 .m(3)
42605 .n(2)
42606 .k(1)
42607 .qmax(128)
42608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42609 }
42610
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)42611 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
42612 GemmMicrokernelTester()
42613 .mr(3)
42614 .nr(2)
42615 .kr(1)
42616 .sr(1)
42617 .m(3)
42618 .n(2)
42619 .k(1)
42620 .cm_stride(5)
42621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42622 }
42623
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_a_zero_point)42624 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
42625 for (size_t k = 1; k <= 5; k += 2) {
42626 GemmMicrokernelTester()
42627 .mr(3)
42628 .nr(2)
42629 .kr(1)
42630 .sr(1)
42631 .m(3)
42632 .n(2)
42633 .k(k)
42634 .a_zero_point(0)
42635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42636 }
42637 }
42638
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_b_zero_point)42639 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
42640 for (size_t k = 1; k <= 5; k += 2) {
42641 GemmMicrokernelTester()
42642 .mr(3)
42643 .nr(2)
42644 .kr(1)
42645 .sr(1)
42646 .m(3)
42647 .n(2)
42648 .k(k)
42649 .b_zero_point(0)
42650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42651 }
42652 }
42653
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_zero_point)42654 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
42655 for (size_t k = 1; k <= 5; k += 2) {
42656 GemmMicrokernelTester()
42657 .mr(3)
42658 .nr(2)
42659 .kr(1)
42660 .sr(1)
42661 .m(3)
42662 .n(2)
42663 .k(k)
42664 .a_zero_point(0)
42665 .b_zero_point(0)
42666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42667 }
42668 }
42669
42670
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)42671 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
42672 GemmMicrokernelTester()
42673 .mr(3)
42674 .nr(4)
42675 .kr(1)
42676 .sr(1)
42677 .m(3)
42678 .n(4)
42679 .k(1)
42680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42681 }
42682
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)42683 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
42684 GemmMicrokernelTester()
42685 .mr(3)
42686 .nr(4)
42687 .kr(1)
42688 .sr(1)
42689 .m(3)
42690 .n(4)
42691 .k(1)
42692 .cn_stride(7)
42693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42694 }
42695
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)42696 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
42697 for (uint32_t n = 1; n <= 4; n++) {
42698 for (uint32_t m = 1; m <= 3; m++) {
42699 GemmMicrokernelTester()
42700 .mr(3)
42701 .nr(4)
42702 .kr(1)
42703 .sr(1)
42704 .m(m)
42705 .n(n)
42706 .k(1)
42707 .iterations(1)
42708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42709 }
42710 }
42711 }
42712
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)42713 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42714 for (uint32_t m = 1; m <= 3; m++) {
42715 GemmMicrokernelTester()
42716 .mr(3)
42717 .nr(4)
42718 .kr(1)
42719 .sr(1)
42720 .m(m)
42721 .n(4)
42722 .k(1)
42723 .iterations(1)
42724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42725 }
42726 }
42727
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)42728 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42729 for (uint32_t n = 1; n <= 4; n++) {
42730 GemmMicrokernelTester()
42731 .mr(3)
42732 .nr(4)
42733 .kr(1)
42734 .sr(1)
42735 .m(3)
42736 .n(n)
42737 .k(1)
42738 .iterations(1)
42739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42740 }
42741 }
42742
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)42743 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
42744 for (size_t k = 2; k < 10; k++) {
42745 GemmMicrokernelTester()
42746 .mr(3)
42747 .nr(4)
42748 .kr(1)
42749 .sr(1)
42750 .m(3)
42751 .n(4)
42752 .k(k)
42753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42754 }
42755 }
42756
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)42757 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
42758 for (size_t k = 2; k < 10; k++) {
42759 for (uint32_t n = 1; n <= 4; n++) {
42760 for (uint32_t m = 1; m <= 3; m++) {
42761 GemmMicrokernelTester()
42762 .mr(3)
42763 .nr(4)
42764 .kr(1)
42765 .sr(1)
42766 .m(m)
42767 .n(n)
42768 .k(k)
42769 .iterations(1)
42770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42771 }
42772 }
42773 }
42774 }
42775
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)42776 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
42777 for (uint32_t n = 5; n < 8; n++) {
42778 for (size_t k = 1; k <= 5; k += 2) {
42779 GemmMicrokernelTester()
42780 .mr(3)
42781 .nr(4)
42782 .kr(1)
42783 .sr(1)
42784 .m(3)
42785 .n(n)
42786 .k(k)
42787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42788 }
42789 }
42790 }
42791
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)42792 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
42793 for (uint32_t n = 5; n < 8; n++) {
42794 for (size_t k = 1; k <= 5; k += 2) {
42795 GemmMicrokernelTester()
42796 .mr(3)
42797 .nr(4)
42798 .kr(1)
42799 .sr(1)
42800 .m(3)
42801 .n(n)
42802 .k(k)
42803 .cn_stride(7)
42804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42805 }
42806 }
42807 }
42808
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)42809 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
42810 for (uint32_t n = 5; n < 8; n++) {
42811 for (size_t k = 1; k <= 5; k += 2) {
42812 for (uint32_t m = 1; m <= 3; m++) {
42813 GemmMicrokernelTester()
42814 .mr(3)
42815 .nr(4)
42816 .kr(1)
42817 .sr(1)
42818 .m(m)
42819 .n(n)
42820 .k(k)
42821 .iterations(1)
42822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42823 }
42824 }
42825 }
42826 }
42827
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)42828 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
42829 for (uint32_t n = 8; n <= 12; n += 4) {
42830 for (size_t k = 1; k <= 5; k += 2) {
42831 GemmMicrokernelTester()
42832 .mr(3)
42833 .nr(4)
42834 .kr(1)
42835 .sr(1)
42836 .m(3)
42837 .n(n)
42838 .k(k)
42839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42840 }
42841 }
42842 }
42843
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)42844 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
42845 for (uint32_t n = 8; n <= 12; n += 4) {
42846 for (size_t k = 1; k <= 5; k += 2) {
42847 GemmMicrokernelTester()
42848 .mr(3)
42849 .nr(4)
42850 .kr(1)
42851 .sr(1)
42852 .m(3)
42853 .n(n)
42854 .k(k)
42855 .cn_stride(7)
42856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42857 }
42858 }
42859 }
42860
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)42861 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
42862 for (uint32_t n = 8; n <= 12; n += 4) {
42863 for (size_t k = 1; k <= 5; k += 2) {
42864 for (uint32_t m = 1; m <= 3; m++) {
42865 GemmMicrokernelTester()
42866 .mr(3)
42867 .nr(4)
42868 .kr(1)
42869 .sr(1)
42870 .m(m)
42871 .n(n)
42872 .k(k)
42873 .iterations(1)
42874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42875 }
42876 }
42877 }
42878 }
42879
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel)42880 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
42881 for (size_t k = 1; k <= 5; k += 2) {
42882 GemmMicrokernelTester()
42883 .mr(3)
42884 .nr(4)
42885 .kr(1)
42886 .sr(1)
42887 .m(3)
42888 .n(4)
42889 .k(k)
42890 .ks(3)
42891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42892 }
42893 }
42894
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel_subtile)42895 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
42896 for (size_t k = 1; k <= 5; k += 2) {
42897 for (uint32_t n = 1; n <= 4; n++) {
42898 for (uint32_t m = 1; m <= 3; m++) {
42899 GemmMicrokernelTester()
42900 .mr(3)
42901 .nr(4)
42902 .kr(1)
42903 .sr(1)
42904 .m(m)
42905 .n(n)
42906 .k(k)
42907 .ks(3)
42908 .iterations(1)
42909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42910 }
42911 }
42912 }
42913 }
42914
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_small_kernel)42915 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
42916 for (uint32_t n = 5; n < 8; n++) {
42917 for (size_t k = 1; k <= 5; k += 2) {
42918 GemmMicrokernelTester()
42919 .mr(3)
42920 .nr(4)
42921 .kr(1)
42922 .sr(1)
42923 .m(3)
42924 .n(n)
42925 .k(k)
42926 .ks(3)
42927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42928 }
42929 }
42930 }
42931
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_small_kernel)42932 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
42933 for (uint32_t n = 8; n <= 12; n += 4) {
42934 for (size_t k = 1; k <= 5; k += 2) {
42935 GemmMicrokernelTester()
42936 .mr(3)
42937 .nr(4)
42938 .kr(1)
42939 .sr(1)
42940 .m(3)
42941 .n(n)
42942 .k(k)
42943 .ks(3)
42944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42945 }
42946 }
42947 }
42948
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)42949 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
42950 for (size_t k = 1; k <= 5; k += 2) {
42951 for (uint32_t n = 1; n <= 4; n++) {
42952 for (uint32_t m = 1; m <= 3; m++) {
42953 GemmMicrokernelTester()
42954 .mr(3)
42955 .nr(4)
42956 .kr(1)
42957 .sr(1)
42958 .m(m)
42959 .n(n)
42960 .k(k)
42961 .cm_stride(7)
42962 .iterations(1)
42963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42964 }
42965 }
42966 }
42967 }
42968
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,a_offset)42969 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
42970 for (size_t k = 1; k <= 5; k += 2) {
42971 GemmMicrokernelTester()
42972 .mr(3)
42973 .nr(4)
42974 .kr(1)
42975 .sr(1)
42976 .m(3)
42977 .n(4)
42978 .k(k)
42979 .ks(3)
42980 .a_offset(17)
42981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42982 }
42983 }
42984
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,zero)42985 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
42986 for (size_t k = 1; k <= 5; k += 2) {
42987 for (uint32_t mz = 0; mz < 3; mz++) {
42988 GemmMicrokernelTester()
42989 .mr(3)
42990 .nr(4)
42991 .kr(1)
42992 .sr(1)
42993 .m(3)
42994 .n(4)
42995 .k(k)
42996 .ks(3)
42997 .a_offset(17)
42998 .zero_index(mz)
42999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43000 }
43001 }
43002 }
43003
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)43004 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
43005 GemmMicrokernelTester()
43006 .mr(3)
43007 .nr(4)
43008 .kr(1)
43009 .sr(1)
43010 .m(3)
43011 .n(4)
43012 .k(1)
43013 .qmin(128)
43014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43015 }
43016
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)43017 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
43018 GemmMicrokernelTester()
43019 .mr(3)
43020 .nr(4)
43021 .kr(1)
43022 .sr(1)
43023 .m(3)
43024 .n(4)
43025 .k(1)
43026 .qmax(128)
43027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43028 }
43029
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)43030 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
43031 GemmMicrokernelTester()
43032 .mr(3)
43033 .nr(4)
43034 .kr(1)
43035 .sr(1)
43036 .m(3)
43037 .n(4)
43038 .k(1)
43039 .cm_stride(7)
43040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43041 }
43042
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_a_zero_point)43043 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
43044 for (size_t k = 1; k <= 5; k += 2) {
43045 GemmMicrokernelTester()
43046 .mr(3)
43047 .nr(4)
43048 .kr(1)
43049 .sr(1)
43050 .m(3)
43051 .n(4)
43052 .k(k)
43053 .a_zero_point(0)
43054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43055 }
43056 }
43057
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_b_zero_point)43058 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
43059 for (size_t k = 1; k <= 5; k += 2) {
43060 GemmMicrokernelTester()
43061 .mr(3)
43062 .nr(4)
43063 .kr(1)
43064 .sr(1)
43065 .m(3)
43066 .n(4)
43067 .k(k)
43068 .b_zero_point(0)
43069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43070 }
43071 }
43072
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_zero_point)43073 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
43074 for (size_t k = 1; k <= 5; k += 2) {
43075 GemmMicrokernelTester()
43076 .mr(3)
43077 .nr(4)
43078 .kr(1)
43079 .sr(1)
43080 .m(3)
43081 .n(4)
43082 .k(k)
43083 .a_zero_point(0)
43084 .b_zero_point(0)
43085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43086 }
43087 }
43088
43089
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)43090 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
43091 GemmMicrokernelTester()
43092 .mr(4)
43093 .nr(2)
43094 .kr(1)
43095 .sr(1)
43096 .m(4)
43097 .n(2)
43098 .k(1)
43099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43100 }
43101
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)43102 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
43103 GemmMicrokernelTester()
43104 .mr(4)
43105 .nr(2)
43106 .kr(1)
43107 .sr(1)
43108 .m(4)
43109 .n(2)
43110 .k(1)
43111 .cn_stride(5)
43112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43113 }
43114
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)43115 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
43116 for (uint32_t n = 1; n <= 2; n++) {
43117 for (uint32_t m = 1; m <= 4; m++) {
43118 GemmMicrokernelTester()
43119 .mr(4)
43120 .nr(2)
43121 .kr(1)
43122 .sr(1)
43123 .m(m)
43124 .n(n)
43125 .k(1)
43126 .iterations(1)
43127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43128 }
43129 }
43130 }
43131
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)43132 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
43133 for (uint32_t m = 1; m <= 4; m++) {
43134 GemmMicrokernelTester()
43135 .mr(4)
43136 .nr(2)
43137 .kr(1)
43138 .sr(1)
43139 .m(m)
43140 .n(2)
43141 .k(1)
43142 .iterations(1)
43143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43144 }
43145 }
43146
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)43147 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
43148 for (uint32_t n = 1; n <= 2; n++) {
43149 GemmMicrokernelTester()
43150 .mr(4)
43151 .nr(2)
43152 .kr(1)
43153 .sr(1)
43154 .m(4)
43155 .n(n)
43156 .k(1)
43157 .iterations(1)
43158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43159 }
43160 }
43161
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)43162 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
43163 for (size_t k = 2; k < 10; k++) {
43164 GemmMicrokernelTester()
43165 .mr(4)
43166 .nr(2)
43167 .kr(1)
43168 .sr(1)
43169 .m(4)
43170 .n(2)
43171 .k(k)
43172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43173 }
43174 }
43175
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)43176 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
43177 for (size_t k = 2; k < 10; k++) {
43178 for (uint32_t n = 1; n <= 2; n++) {
43179 for (uint32_t m = 1; m <= 4; m++) {
43180 GemmMicrokernelTester()
43181 .mr(4)
43182 .nr(2)
43183 .kr(1)
43184 .sr(1)
43185 .m(m)
43186 .n(n)
43187 .k(k)
43188 .iterations(1)
43189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43190 }
43191 }
43192 }
43193 }
43194
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)43195 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
43196 for (uint32_t n = 3; n < 4; n++) {
43197 for (size_t k = 1; k <= 5; k += 2) {
43198 GemmMicrokernelTester()
43199 .mr(4)
43200 .nr(2)
43201 .kr(1)
43202 .sr(1)
43203 .m(4)
43204 .n(n)
43205 .k(k)
43206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43207 }
43208 }
43209 }
43210
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)43211 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
43212 for (uint32_t n = 3; n < 4; n++) {
43213 for (size_t k = 1; k <= 5; k += 2) {
43214 GemmMicrokernelTester()
43215 .mr(4)
43216 .nr(2)
43217 .kr(1)
43218 .sr(1)
43219 .m(4)
43220 .n(n)
43221 .k(k)
43222 .cn_stride(5)
43223 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43224 }
43225 }
43226 }
43227
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)43228 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
43229 for (uint32_t n = 3; n < 4; n++) {
43230 for (size_t k = 1; k <= 5; k += 2) {
43231 for (uint32_t m = 1; m <= 4; m++) {
43232 GemmMicrokernelTester()
43233 .mr(4)
43234 .nr(2)
43235 .kr(1)
43236 .sr(1)
43237 .m(m)
43238 .n(n)
43239 .k(k)
43240 .iterations(1)
43241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43242 }
43243 }
43244 }
43245 }
43246
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)43247 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
43248 for (uint32_t n = 4; n <= 6; n += 2) {
43249 for (size_t k = 1; k <= 5; k += 2) {
43250 GemmMicrokernelTester()
43251 .mr(4)
43252 .nr(2)
43253 .kr(1)
43254 .sr(1)
43255 .m(4)
43256 .n(n)
43257 .k(k)
43258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43259 }
43260 }
43261 }
43262
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)43263 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
43264 for (uint32_t n = 4; n <= 6; n += 2) {
43265 for (size_t k = 1; k <= 5; k += 2) {
43266 GemmMicrokernelTester()
43267 .mr(4)
43268 .nr(2)
43269 .kr(1)
43270 .sr(1)
43271 .m(4)
43272 .n(n)
43273 .k(k)
43274 .cn_stride(5)
43275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43276 }
43277 }
43278 }
43279
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)43280 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
43281 for (uint32_t n = 4; n <= 6; n += 2) {
43282 for (size_t k = 1; k <= 5; k += 2) {
43283 for (uint32_t m = 1; m <= 4; m++) {
43284 GemmMicrokernelTester()
43285 .mr(4)
43286 .nr(2)
43287 .kr(1)
43288 .sr(1)
43289 .m(m)
43290 .n(n)
43291 .k(k)
43292 .iterations(1)
43293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43294 }
43295 }
43296 }
43297 }
43298
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel)43299 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
43300 for (size_t k = 1; k <= 5; k += 2) {
43301 GemmMicrokernelTester()
43302 .mr(4)
43303 .nr(2)
43304 .kr(1)
43305 .sr(1)
43306 .m(4)
43307 .n(2)
43308 .k(k)
43309 .ks(3)
43310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43311 }
43312 }
43313
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel_subtile)43314 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
43315 for (size_t k = 1; k <= 5; k += 2) {
43316 for (uint32_t n = 1; n <= 2; n++) {
43317 for (uint32_t m = 1; m <= 4; m++) {
43318 GemmMicrokernelTester()
43319 .mr(4)
43320 .nr(2)
43321 .kr(1)
43322 .sr(1)
43323 .m(m)
43324 .n(n)
43325 .k(k)
43326 .ks(3)
43327 .iterations(1)
43328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43329 }
43330 }
43331 }
43332 }
43333
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_small_kernel)43334 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
43335 for (uint32_t n = 3; n < 4; n++) {
43336 for (size_t k = 1; k <= 5; k += 2) {
43337 GemmMicrokernelTester()
43338 .mr(4)
43339 .nr(2)
43340 .kr(1)
43341 .sr(1)
43342 .m(4)
43343 .n(n)
43344 .k(k)
43345 .ks(3)
43346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43347 }
43348 }
43349 }
43350
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_small_kernel)43351 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
43352 for (uint32_t n = 4; n <= 6; n += 2) {
43353 for (size_t k = 1; k <= 5; k += 2) {
43354 GemmMicrokernelTester()
43355 .mr(4)
43356 .nr(2)
43357 .kr(1)
43358 .sr(1)
43359 .m(4)
43360 .n(n)
43361 .k(k)
43362 .ks(3)
43363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43364 }
43365 }
43366 }
43367
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)43368 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
43369 for (size_t k = 1; k <= 5; k += 2) {
43370 for (uint32_t n = 1; n <= 2; n++) {
43371 for (uint32_t m = 1; m <= 4; m++) {
43372 GemmMicrokernelTester()
43373 .mr(4)
43374 .nr(2)
43375 .kr(1)
43376 .sr(1)
43377 .m(m)
43378 .n(n)
43379 .k(k)
43380 .cm_stride(5)
43381 .iterations(1)
43382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43383 }
43384 }
43385 }
43386 }
43387
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,a_offset)43388 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
43389 for (size_t k = 1; k <= 5; k += 2) {
43390 GemmMicrokernelTester()
43391 .mr(4)
43392 .nr(2)
43393 .kr(1)
43394 .sr(1)
43395 .m(4)
43396 .n(2)
43397 .k(k)
43398 .ks(3)
43399 .a_offset(23)
43400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43401 }
43402 }
43403
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,zero)43404 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
43405 for (size_t k = 1; k <= 5; k += 2) {
43406 for (uint32_t mz = 0; mz < 4; mz++) {
43407 GemmMicrokernelTester()
43408 .mr(4)
43409 .nr(2)
43410 .kr(1)
43411 .sr(1)
43412 .m(4)
43413 .n(2)
43414 .k(k)
43415 .ks(3)
43416 .a_offset(23)
43417 .zero_index(mz)
43418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43419 }
43420 }
43421 }
43422
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)43423 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
43424 GemmMicrokernelTester()
43425 .mr(4)
43426 .nr(2)
43427 .kr(1)
43428 .sr(1)
43429 .m(4)
43430 .n(2)
43431 .k(1)
43432 .qmin(128)
43433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43434 }
43435
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)43436 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
43437 GemmMicrokernelTester()
43438 .mr(4)
43439 .nr(2)
43440 .kr(1)
43441 .sr(1)
43442 .m(4)
43443 .n(2)
43444 .k(1)
43445 .qmax(128)
43446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43447 }
43448
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)43449 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
43450 GemmMicrokernelTester()
43451 .mr(4)
43452 .nr(2)
43453 .kr(1)
43454 .sr(1)
43455 .m(4)
43456 .n(2)
43457 .k(1)
43458 .cm_stride(5)
43459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43460 }
43461
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_a_zero_point)43462 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
43463 for (size_t k = 1; k <= 5; k += 2) {
43464 GemmMicrokernelTester()
43465 .mr(4)
43466 .nr(2)
43467 .kr(1)
43468 .sr(1)
43469 .m(4)
43470 .n(2)
43471 .k(k)
43472 .a_zero_point(0)
43473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43474 }
43475 }
43476
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_b_zero_point)43477 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
43478 for (size_t k = 1; k <= 5; k += 2) {
43479 GemmMicrokernelTester()
43480 .mr(4)
43481 .nr(2)
43482 .kr(1)
43483 .sr(1)
43484 .m(4)
43485 .n(2)
43486 .k(k)
43487 .b_zero_point(0)
43488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43489 }
43490 }
43491
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_zero_point)43492 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
43493 for (size_t k = 1; k <= 5; k += 2) {
43494 GemmMicrokernelTester()
43495 .mr(4)
43496 .nr(2)
43497 .kr(1)
43498 .sr(1)
43499 .m(4)
43500 .n(2)
43501 .k(k)
43502 .a_zero_point(0)
43503 .b_zero_point(0)
43504 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43505 }
43506 }
43507
43508
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)43509 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
43510 GemmMicrokernelTester()
43511 .mr(4)
43512 .nr(4)
43513 .kr(1)
43514 .sr(1)
43515 .m(4)
43516 .n(4)
43517 .k(1)
43518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43519 }
43520
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)43521 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
43522 GemmMicrokernelTester()
43523 .mr(4)
43524 .nr(4)
43525 .kr(1)
43526 .sr(1)
43527 .m(4)
43528 .n(4)
43529 .k(1)
43530 .cn_stride(7)
43531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43532 }
43533
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)43534 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
43535 for (uint32_t n = 1; n <= 4; n++) {
43536 for (uint32_t m = 1; m <= 4; m++) {
43537 GemmMicrokernelTester()
43538 .mr(4)
43539 .nr(4)
43540 .kr(1)
43541 .sr(1)
43542 .m(m)
43543 .n(n)
43544 .k(1)
43545 .iterations(1)
43546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43547 }
43548 }
43549 }
43550
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)43551 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
43552 for (uint32_t m = 1; m <= 4; m++) {
43553 GemmMicrokernelTester()
43554 .mr(4)
43555 .nr(4)
43556 .kr(1)
43557 .sr(1)
43558 .m(m)
43559 .n(4)
43560 .k(1)
43561 .iterations(1)
43562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43563 }
43564 }
43565
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)43566 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
43567 for (uint32_t n = 1; n <= 4; n++) {
43568 GemmMicrokernelTester()
43569 .mr(4)
43570 .nr(4)
43571 .kr(1)
43572 .sr(1)
43573 .m(4)
43574 .n(n)
43575 .k(1)
43576 .iterations(1)
43577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43578 }
43579 }
43580
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)43581 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
43582 for (size_t k = 2; k < 10; k++) {
43583 GemmMicrokernelTester()
43584 .mr(4)
43585 .nr(4)
43586 .kr(1)
43587 .sr(1)
43588 .m(4)
43589 .n(4)
43590 .k(k)
43591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43592 }
43593 }
43594
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)43595 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
43596 for (size_t k = 2; k < 10; k++) {
43597 for (uint32_t n = 1; n <= 4; n++) {
43598 for (uint32_t m = 1; m <= 4; m++) {
43599 GemmMicrokernelTester()
43600 .mr(4)
43601 .nr(4)
43602 .kr(1)
43603 .sr(1)
43604 .m(m)
43605 .n(n)
43606 .k(k)
43607 .iterations(1)
43608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43609 }
43610 }
43611 }
43612 }
43613
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)43614 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
43615 for (uint32_t n = 5; n < 8; n++) {
43616 for (size_t k = 1; k <= 5; k += 2) {
43617 GemmMicrokernelTester()
43618 .mr(4)
43619 .nr(4)
43620 .kr(1)
43621 .sr(1)
43622 .m(4)
43623 .n(n)
43624 .k(k)
43625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43626 }
43627 }
43628 }
43629
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)43630 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
43631 for (uint32_t n = 5; n < 8; n++) {
43632 for (size_t k = 1; k <= 5; k += 2) {
43633 GemmMicrokernelTester()
43634 .mr(4)
43635 .nr(4)
43636 .kr(1)
43637 .sr(1)
43638 .m(4)
43639 .n(n)
43640 .k(k)
43641 .cn_stride(7)
43642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43643 }
43644 }
43645 }
43646
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)43647 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
43648 for (uint32_t n = 5; n < 8; n++) {
43649 for (size_t k = 1; k <= 5; k += 2) {
43650 for (uint32_t m = 1; m <= 4; m++) {
43651 GemmMicrokernelTester()
43652 .mr(4)
43653 .nr(4)
43654 .kr(1)
43655 .sr(1)
43656 .m(m)
43657 .n(n)
43658 .k(k)
43659 .iterations(1)
43660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43661 }
43662 }
43663 }
43664 }
43665
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)43666 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
43667 for (uint32_t n = 8; n <= 12; n += 4) {
43668 for (size_t k = 1; k <= 5; k += 2) {
43669 GemmMicrokernelTester()
43670 .mr(4)
43671 .nr(4)
43672 .kr(1)
43673 .sr(1)
43674 .m(4)
43675 .n(n)
43676 .k(k)
43677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43678 }
43679 }
43680 }
43681
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)43682 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
43683 for (uint32_t n = 8; n <= 12; n += 4) {
43684 for (size_t k = 1; k <= 5; k += 2) {
43685 GemmMicrokernelTester()
43686 .mr(4)
43687 .nr(4)
43688 .kr(1)
43689 .sr(1)
43690 .m(4)
43691 .n(n)
43692 .k(k)
43693 .cn_stride(7)
43694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43695 }
43696 }
43697 }
43698
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)43699 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
43700 for (uint32_t n = 8; n <= 12; n += 4) {
43701 for (size_t k = 1; k <= 5; k += 2) {
43702 for (uint32_t m = 1; m <= 4; m++) {
43703 GemmMicrokernelTester()
43704 .mr(4)
43705 .nr(4)
43706 .kr(1)
43707 .sr(1)
43708 .m(m)
43709 .n(n)
43710 .k(k)
43711 .iterations(1)
43712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43713 }
43714 }
43715 }
43716 }
43717
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel)43718 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
43719 for (size_t k = 1; k <= 5; k += 2) {
43720 GemmMicrokernelTester()
43721 .mr(4)
43722 .nr(4)
43723 .kr(1)
43724 .sr(1)
43725 .m(4)
43726 .n(4)
43727 .k(k)
43728 .ks(3)
43729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43730 }
43731 }
43732
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel_subtile)43733 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
43734 for (size_t k = 1; k <= 5; k += 2) {
43735 for (uint32_t n = 1; n <= 4; n++) {
43736 for (uint32_t m = 1; m <= 4; m++) {
43737 GemmMicrokernelTester()
43738 .mr(4)
43739 .nr(4)
43740 .kr(1)
43741 .sr(1)
43742 .m(m)
43743 .n(n)
43744 .k(k)
43745 .ks(3)
43746 .iterations(1)
43747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43748 }
43749 }
43750 }
43751 }
43752
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_small_kernel)43753 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
43754 for (uint32_t n = 5; n < 8; n++) {
43755 for (size_t k = 1; k <= 5; k += 2) {
43756 GemmMicrokernelTester()
43757 .mr(4)
43758 .nr(4)
43759 .kr(1)
43760 .sr(1)
43761 .m(4)
43762 .n(n)
43763 .k(k)
43764 .ks(3)
43765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43766 }
43767 }
43768 }
43769
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_small_kernel)43770 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
43771 for (uint32_t n = 8; n <= 12; n += 4) {
43772 for (size_t k = 1; k <= 5; k += 2) {
43773 GemmMicrokernelTester()
43774 .mr(4)
43775 .nr(4)
43776 .kr(1)
43777 .sr(1)
43778 .m(4)
43779 .n(n)
43780 .k(k)
43781 .ks(3)
43782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43783 }
43784 }
43785 }
43786
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)43787 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
43788 for (size_t k = 1; k <= 5; k += 2) {
43789 for (uint32_t n = 1; n <= 4; n++) {
43790 for (uint32_t m = 1; m <= 4; m++) {
43791 GemmMicrokernelTester()
43792 .mr(4)
43793 .nr(4)
43794 .kr(1)
43795 .sr(1)
43796 .m(m)
43797 .n(n)
43798 .k(k)
43799 .cm_stride(7)
43800 .iterations(1)
43801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43802 }
43803 }
43804 }
43805 }
43806
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,a_offset)43807 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
43808 for (size_t k = 1; k <= 5; k += 2) {
43809 GemmMicrokernelTester()
43810 .mr(4)
43811 .nr(4)
43812 .kr(1)
43813 .sr(1)
43814 .m(4)
43815 .n(4)
43816 .k(k)
43817 .ks(3)
43818 .a_offset(23)
43819 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43820 }
43821 }
43822
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,zero)43823 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
43824 for (size_t k = 1; k <= 5; k += 2) {
43825 for (uint32_t mz = 0; mz < 4; mz++) {
43826 GemmMicrokernelTester()
43827 .mr(4)
43828 .nr(4)
43829 .kr(1)
43830 .sr(1)
43831 .m(4)
43832 .n(4)
43833 .k(k)
43834 .ks(3)
43835 .a_offset(23)
43836 .zero_index(mz)
43837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43838 }
43839 }
43840 }
43841
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)43842 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
43843 GemmMicrokernelTester()
43844 .mr(4)
43845 .nr(4)
43846 .kr(1)
43847 .sr(1)
43848 .m(4)
43849 .n(4)
43850 .k(1)
43851 .qmin(128)
43852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43853 }
43854
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)43855 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
43856 GemmMicrokernelTester()
43857 .mr(4)
43858 .nr(4)
43859 .kr(1)
43860 .sr(1)
43861 .m(4)
43862 .n(4)
43863 .k(1)
43864 .qmax(128)
43865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43866 }
43867
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)43868 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
43869 GemmMicrokernelTester()
43870 .mr(4)
43871 .nr(4)
43872 .kr(1)
43873 .sr(1)
43874 .m(4)
43875 .n(4)
43876 .k(1)
43877 .cm_stride(7)
43878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43879 }
43880
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_a_zero_point)43881 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
43882 for (size_t k = 1; k <= 5; k += 2) {
43883 GemmMicrokernelTester()
43884 .mr(4)
43885 .nr(4)
43886 .kr(1)
43887 .sr(1)
43888 .m(4)
43889 .n(4)
43890 .k(k)
43891 .a_zero_point(0)
43892 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43893 }
43894 }
43895
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_b_zero_point)43896 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
43897 for (size_t k = 1; k <= 5; k += 2) {
43898 GemmMicrokernelTester()
43899 .mr(4)
43900 .nr(4)
43901 .kr(1)
43902 .sr(1)
43903 .m(4)
43904 .n(4)
43905 .k(k)
43906 .b_zero_point(0)
43907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43908 }
43909 }
43910
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_zero_point)43911 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
43912 for (size_t k = 1; k <= 5; k += 2) {
43913 GemmMicrokernelTester()
43914 .mr(4)
43915 .nr(4)
43916 .kr(1)
43917 .sr(1)
43918 .m(4)
43919 .n(4)
43920 .k(k)
43921 .a_zero_point(0)
43922 .b_zero_point(0)
43923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43924 }
43925 }
43926