1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qu8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)28 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
29 TEST_REQUIRES_ARM_NEON_DOT;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(16)
33 .kr(4)
34 .sr(1)
35 .m(4)
36 .n(16)
37 .k(16)
38 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
39 }
40
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)41 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_DOT;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(16)
46 .kr(4)
47 .sr(1)
48 .m(4)
49 .n(16)
50 .k(16)
51 .cn_stride(19)
52 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
53 }
54
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)55 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
56 TEST_REQUIRES_ARM_NEON_DOT;
57 for (uint32_t n = 1; n <= 16; n++) {
58 for (uint32_t m = 1; m <= 4; m++) {
59 GemmMicrokernelTester()
60 .mr(4)
61 .nr(16)
62 .kr(4)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(16)
67 .iterations(1)
68 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)73 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
74 TEST_REQUIRES_ARM_NEON_DOT;
75 for (uint32_t m = 1; m <= 4; m++) {
76 GemmMicrokernelTester()
77 .mr(4)
78 .nr(16)
79 .kr(4)
80 .sr(1)
81 .m(m)
82 .n(16)
83 .k(16)
84 .iterations(1)
85 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
86 }
87 }
88
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)89 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
90 TEST_REQUIRES_ARM_NEON_DOT;
91 for (uint32_t n = 1; n <= 16; n++) {
92 GemmMicrokernelTester()
93 .mr(4)
94 .nr(16)
95 .kr(4)
96 .sr(1)
97 .m(4)
98 .n(n)
99 .k(16)
100 .iterations(1)
101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
102 }
103 }
104
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)105 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
106 TEST_REQUIRES_ARM_NEON_DOT;
107 for (size_t k = 1; k < 16; k++) {
108 GemmMicrokernelTester()
109 .mr(4)
110 .nr(16)
111 .kr(4)
112 .sr(1)
113 .m(4)
114 .n(16)
115 .k(k)
116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
117 }
118 }
119
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)120 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
121 TEST_REQUIRES_ARM_NEON_DOT;
122 for (size_t k = 1; k < 16; k++) {
123 for (uint32_t n = 1; n <= 16; n++) {
124 for (uint32_t m = 1; m <= 4; m++) {
125 GemmMicrokernelTester()
126 .mr(4)
127 .nr(16)
128 .kr(4)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)140 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
141 TEST_REQUIRES_ARM_NEON_DOT;
142 for (size_t k = 17; k < 32; k++) {
143 GemmMicrokernelTester()
144 .mr(4)
145 .nr(16)
146 .kr(4)
147 .sr(1)
148 .m(4)
149 .n(16)
150 .k(k)
151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
152 }
153 }
154
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)155 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
156 TEST_REQUIRES_ARM_NEON_DOT;
157 for (size_t k = 17; k < 32; k++) {
158 for (uint32_t n = 1; n <= 16; n++) {
159 for (uint32_t m = 1; m <= 4; m++) {
160 GemmMicrokernelTester()
161 .mr(4)
162 .nr(16)
163 .kr(4)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)175 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
176 TEST_REQUIRES_ARM_NEON_DOT;
177 for (size_t k = 32; k <= 160; k += 16) {
178 GemmMicrokernelTester()
179 .mr(4)
180 .nr(16)
181 .kr(4)
182 .sr(1)
183 .m(4)
184 .n(16)
185 .k(k)
186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
187 }
188 }
189
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)190 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
191 TEST_REQUIRES_ARM_NEON_DOT;
192 for (size_t k = 32; k <= 160; k += 16) {
193 for (uint32_t n = 1; n <= 16; n++) {
194 for (uint32_t m = 1; m <= 4; m++) {
195 GemmMicrokernelTester()
196 .mr(4)
197 .nr(16)
198 .kr(4)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)210 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
211 TEST_REQUIRES_ARM_NEON_DOT;
212 for (uint32_t n = 17; n < 32; n++) {
213 for (size_t k = 1; k <= 80; k += 17) {
214 GemmMicrokernelTester()
215 .mr(4)
216 .nr(16)
217 .kr(4)
218 .sr(1)
219 .m(4)
220 .n(n)
221 .k(k)
222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)227 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
228 TEST_REQUIRES_ARM_NEON_DOT;
229 for (uint32_t n = 17; n < 32; n++) {
230 for (size_t k = 1; k <= 80; k += 17) {
231 GemmMicrokernelTester()
232 .mr(4)
233 .nr(16)
234 .kr(4)
235 .sr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .cn_stride(19)
240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)245 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
246 TEST_REQUIRES_ARM_NEON_DOT;
247 for (uint32_t n = 17; n < 32; n++) {
248 for (size_t k = 1; k <= 80; k += 17) {
249 for (uint32_t m = 1; m <= 4; m++) {
250 GemmMicrokernelTester()
251 .mr(4)
252 .nr(16)
253 .kr(4)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)265 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
266 TEST_REQUIRES_ARM_NEON_DOT;
267 for (uint32_t n = 32; n <= 48; n += 16) {
268 for (size_t k = 1; k <= 80; k += 17) {
269 GemmMicrokernelTester()
270 .mr(4)
271 .nr(16)
272 .kr(4)
273 .sr(1)
274 .m(4)
275 .n(n)
276 .k(k)
277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)282 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
283 TEST_REQUIRES_ARM_NEON_DOT;
284 for (uint32_t n = 32; n <= 48; n += 16) {
285 for (size_t k = 1; k <= 80; k += 17) {
286 GemmMicrokernelTester()
287 .mr(4)
288 .nr(16)
289 .kr(4)
290 .sr(1)
291 .m(4)
292 .n(n)
293 .k(k)
294 .cn_stride(19)
295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)300 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
301 TEST_REQUIRES_ARM_NEON_DOT;
302 for (uint32_t n = 32; n <= 48; n += 16) {
303 for (size_t k = 1; k <= 80; k += 17) {
304 for (uint32_t m = 1; m <= 4; m++) {
305 GemmMicrokernelTester()
306 .mr(4)
307 .nr(16)
308 .kr(4)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)320 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
321 TEST_REQUIRES_ARM_NEON_DOT;
322 for (size_t k = 1; k <= 80; k += 17) {
323 GemmMicrokernelTester()
324 .mr(4)
325 .nr(16)
326 .kr(4)
327 .sr(1)
328 .m(4)
329 .n(16)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
333 }
334 }
335
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)336 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_NEON_DOT;
338 for (size_t k = 1; k <= 80; k += 17) {
339 for (uint32_t n = 1; n <= 16; n++) {
340 for (uint32_t m = 1; m <= 4; m++) {
341 GemmMicrokernelTester()
342 .mr(4)
343 .nr(16)
344 .kr(4)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)357 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
358 TEST_REQUIRES_ARM_NEON_DOT;
359 for (uint32_t n = 17; n < 32; n++) {
360 for (size_t k = 1; k <= 80; k += 17) {
361 GemmMicrokernelTester()
362 .mr(4)
363 .nr(16)
364 .kr(4)
365 .sr(1)
366 .m(4)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)375 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
376 TEST_REQUIRES_ARM_NEON_DOT;
377 for (uint32_t n = 32; n <= 48; n += 16) {
378 for (size_t k = 1; k <= 80; k += 17) {
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(16)
382 .kr(4)
383 .sr(1)
384 .m(4)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)393 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_NEON_DOT;
395 for (size_t k = 1; k <= 80; k += 17) {
396 for (uint32_t n = 1; n <= 16; n++) {
397 for (uint32_t m = 1; m <= 4; m++) {
398 GemmMicrokernelTester()
399 .mr(4)
400 .nr(16)
401 .kr(4)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(19)
407 .iterations(1)
408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)414 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
415 TEST_REQUIRES_ARM_NEON_DOT;
416 for (size_t k = 1; k <= 80; k += 17) {
417 GemmMicrokernelTester()
418 .mr(4)
419 .nr(16)
420 .kr(4)
421 .sr(1)
422 .m(4)
423 .n(16)
424 .k(k)
425 .ks(3)
426 .a_offset(331)
427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
428 }
429 }
430
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)431 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
432 TEST_REQUIRES_ARM_NEON_DOT;
433 for (size_t k = 1; k <= 80; k += 17) {
434 for (uint32_t mz = 0; mz < 4; mz++) {
435 GemmMicrokernelTester()
436 .mr(4)
437 .nr(16)
438 .kr(4)
439 .sr(1)
440 .m(4)
441 .n(16)
442 .k(k)
443 .ks(3)
444 .a_offset(331)
445 .zero_index(mz)
446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)451 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
452 TEST_REQUIRES_ARM_NEON_DOT;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(16)
456 .kr(4)
457 .sr(1)
458 .m(4)
459 .n(16)
460 .k(16)
461 .qmin(128)
462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
463 }
464
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)465 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
466 TEST_REQUIRES_ARM_NEON_DOT;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(16)
470 .kr(4)
471 .sr(1)
472 .m(4)
473 .n(16)
474 .k(16)
475 .qmax(128)
476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
477 }
478
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)479 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
480 TEST_REQUIRES_ARM_NEON_DOT;
481 GemmMicrokernelTester()
482 .mr(4)
483 .nr(16)
484 .kr(4)
485 .sr(1)
486 .m(4)
487 .n(16)
488 .k(16)
489 .cm_stride(19)
490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
491 }
492
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)493 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
494 TEST_REQUIRES_ARM_NEON_DOT;
495 for (size_t k = 1; k <= 80; k += 17) {
496 GemmMicrokernelTester()
497 .mr(4)
498 .nr(16)
499 .kr(4)
500 .sr(1)
501 .m(4)
502 .n(16)
503 .k(k)
504 .a_zero_point(0)
505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
506 }
507 }
508
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)509 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
510 TEST_REQUIRES_ARM_NEON_DOT;
511 for (size_t k = 1; k <= 80; k += 17) {
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(16)
515 .kr(4)
516 .sr(1)
517 .m(4)
518 .n(16)
519 .k(k)
520 .b_zero_point(0)
521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
522 }
523 }
524
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)525 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
526 TEST_REQUIRES_ARM_NEON_DOT;
527 for (size_t k = 1; k <= 80; k += 17) {
528 GemmMicrokernelTester()
529 .mr(4)
530 .nr(16)
531 .kr(4)
532 .sr(1)
533 .m(4)
534 .n(16)
535 .k(k)
536 .a_zero_point(0)
537 .b_zero_point(0)
538 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
539 }
540 }
541 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
542
543
544 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4)545 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4) {
546 TEST_REQUIRES_ARM_SIMD32;
547 GemmMicrokernelTester()
548 .mr(1)
549 .nr(2)
550 .kr(4)
551 .sr(1)
552 .m(1)
553 .n(2)
554 .k(4)
555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
556 }
557
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cn)558 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cn) {
559 TEST_REQUIRES_ARM_SIMD32;
560 GemmMicrokernelTester()
561 .mr(1)
562 .nr(2)
563 .kr(4)
564 .sr(1)
565 .m(1)
566 .n(2)
567 .k(4)
568 .cn_stride(5)
569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
570 }
571
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile)572 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile) {
573 TEST_REQUIRES_ARM_SIMD32;
574 for (uint32_t n = 1; n <= 2; n++) {
575 for (uint32_t m = 1; m <= 1; m++) {
576 GemmMicrokernelTester()
577 .mr(1)
578 .nr(2)
579 .kr(4)
580 .sr(1)
581 .m(m)
582 .n(n)
583 .k(4)
584 .iterations(1)
585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
586 }
587 }
588 }
589
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_m)590 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_m) {
591 TEST_REQUIRES_ARM_SIMD32;
592 for (uint32_t m = 1; m <= 1; m++) {
593 GemmMicrokernelTester()
594 .mr(1)
595 .nr(2)
596 .kr(4)
597 .sr(1)
598 .m(m)
599 .n(2)
600 .k(4)
601 .iterations(1)
602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
603 }
604 }
605
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_n)606 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_n) {
607 TEST_REQUIRES_ARM_SIMD32;
608 for (uint32_t n = 1; n <= 2; n++) {
609 GemmMicrokernelTester()
610 .mr(1)
611 .nr(2)
612 .kr(4)
613 .sr(1)
614 .m(1)
615 .n(n)
616 .k(4)
617 .iterations(1)
618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
619 }
620 }
621
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4)622 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4) {
623 TEST_REQUIRES_ARM_SIMD32;
624 for (size_t k = 1; k < 4; k++) {
625 GemmMicrokernelTester()
626 .mr(1)
627 .nr(2)
628 .kr(4)
629 .sr(1)
630 .m(1)
631 .n(2)
632 .k(k)
633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
634 }
635 }
636
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_subtile)637 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_subtile) {
638 TEST_REQUIRES_ARM_SIMD32;
639 for (size_t k = 1; k < 4; k++) {
640 for (uint32_t n = 1; n <= 2; n++) {
641 for (uint32_t m = 1; m <= 1; m++) {
642 GemmMicrokernelTester()
643 .mr(1)
644 .nr(2)
645 .kr(4)
646 .sr(1)
647 .m(m)
648 .n(n)
649 .k(k)
650 .iterations(1)
651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
652 }
653 }
654 }
655 }
656
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4)657 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4) {
658 TEST_REQUIRES_ARM_SIMD32;
659 for (size_t k = 5; k < 8; k++) {
660 GemmMicrokernelTester()
661 .mr(1)
662 .nr(2)
663 .kr(4)
664 .sr(1)
665 .m(1)
666 .n(2)
667 .k(k)
668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
669 }
670 }
671
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_subtile)672 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_subtile) {
673 TEST_REQUIRES_ARM_SIMD32;
674 for (size_t k = 5; k < 8; k++) {
675 for (uint32_t n = 1; n <= 2; n++) {
676 for (uint32_t m = 1; m <= 1; m++) {
677 GemmMicrokernelTester()
678 .mr(1)
679 .nr(2)
680 .kr(4)
681 .sr(1)
682 .m(m)
683 .n(n)
684 .k(k)
685 .iterations(1)
686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
687 }
688 }
689 }
690 }
691
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4)692 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4) {
693 TEST_REQUIRES_ARM_SIMD32;
694 for (size_t k = 8; k <= 40; k += 4) {
695 GemmMicrokernelTester()
696 .mr(1)
697 .nr(2)
698 .kr(4)
699 .sr(1)
700 .m(1)
701 .n(2)
702 .k(k)
703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
704 }
705 }
706
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_subtile)707 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_subtile) {
708 TEST_REQUIRES_ARM_SIMD32;
709 for (size_t k = 8; k <= 40; k += 4) {
710 for (uint32_t n = 1; n <= 2; n++) {
711 for (uint32_t m = 1; m <= 1; m++) {
712 GemmMicrokernelTester()
713 .mr(1)
714 .nr(2)
715 .kr(4)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
722 }
723 }
724 }
725 }
726
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2)727 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2) {
728 TEST_REQUIRES_ARM_SIMD32;
729 for (uint32_t n = 3; n < 4; n++) {
730 for (size_t k = 1; k <= 20; k += 5) {
731 GemmMicrokernelTester()
732 .mr(1)
733 .nr(2)
734 .kr(4)
735 .sr(1)
736 .m(1)
737 .n(n)
738 .k(k)
739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
740 }
741 }
742 }
743
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_cn)744 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_cn) {
745 TEST_REQUIRES_ARM_SIMD32;
746 for (uint32_t n = 3; n < 4; n++) {
747 for (size_t k = 1; k <= 20; k += 5) {
748 GemmMicrokernelTester()
749 .mr(1)
750 .nr(2)
751 .kr(4)
752 .sr(1)
753 .m(1)
754 .n(n)
755 .k(k)
756 .cn_stride(5)
757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
758 }
759 }
760 }
761
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_subtile)762 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_subtile) {
763 TEST_REQUIRES_ARM_SIMD32;
764 for (uint32_t n = 3; n < 4; n++) {
765 for (size_t k = 1; k <= 20; k += 5) {
766 for (uint32_t m = 1; m <= 1; m++) {
767 GemmMicrokernelTester()
768 .mr(1)
769 .nr(2)
770 .kr(4)
771 .sr(1)
772 .m(m)
773 .n(n)
774 .k(k)
775 .iterations(1)
776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
777 }
778 }
779 }
780 }
781
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2)782 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2) {
783 TEST_REQUIRES_ARM_SIMD32;
784 for (uint32_t n = 4; n <= 6; n += 2) {
785 for (size_t k = 1; k <= 20; k += 5) {
786 GemmMicrokernelTester()
787 .mr(1)
788 .nr(2)
789 .kr(4)
790 .sr(1)
791 .m(1)
792 .n(n)
793 .k(k)
794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
795 }
796 }
797 }
798
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_cn)799 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_cn) {
800 TEST_REQUIRES_ARM_SIMD32;
801 for (uint32_t n = 4; n <= 6; n += 2) {
802 for (size_t k = 1; k <= 20; k += 5) {
803 GemmMicrokernelTester()
804 .mr(1)
805 .nr(2)
806 .kr(4)
807 .sr(1)
808 .m(1)
809 .n(n)
810 .k(k)
811 .cn_stride(5)
812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
813 }
814 }
815 }
816
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_subtile)817 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_subtile) {
818 TEST_REQUIRES_ARM_SIMD32;
819 for (uint32_t n = 4; n <= 6; n += 2) {
820 for (size_t k = 1; k <= 20; k += 5) {
821 for (uint32_t m = 1; m <= 1; m++) {
822 GemmMicrokernelTester()
823 .mr(1)
824 .nr(2)
825 .kr(4)
826 .sr(1)
827 .m(m)
828 .n(n)
829 .k(k)
830 .iterations(1)
831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
832 }
833 }
834 }
835 }
836
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel)837 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel) {
838 TEST_REQUIRES_ARM_SIMD32;
839 for (size_t k = 1; k <= 20; k += 5) {
840 GemmMicrokernelTester()
841 .mr(1)
842 .nr(2)
843 .kr(4)
844 .sr(1)
845 .m(1)
846 .n(2)
847 .k(k)
848 .ks(3)
849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
850 }
851 }
852
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel_subtile)853 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel_subtile) {
854 TEST_REQUIRES_ARM_SIMD32;
855 for (size_t k = 1; k <= 20; k += 5) {
856 for (uint32_t n = 1; n <= 2; n++) {
857 for (uint32_t m = 1; m <= 1; m++) {
858 GemmMicrokernelTester()
859 .mr(1)
860 .nr(2)
861 .kr(4)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .ks(3)
867 .iterations(1)
868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
869 }
870 }
871 }
872 }
873
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_small_kernel)874 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_small_kernel) {
875 TEST_REQUIRES_ARM_SIMD32;
876 for (uint32_t n = 3; n < 4; n++) {
877 for (size_t k = 1; k <= 20; k += 5) {
878 GemmMicrokernelTester()
879 .mr(1)
880 .nr(2)
881 .kr(4)
882 .sr(1)
883 .m(1)
884 .n(n)
885 .k(k)
886 .ks(3)
887 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
888 }
889 }
890 }
891
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_small_kernel)892 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_small_kernel) {
893 TEST_REQUIRES_ARM_SIMD32;
894 for (uint32_t n = 4; n <= 6; n += 2) {
895 for (size_t k = 1; k <= 20; k += 5) {
896 GemmMicrokernelTester()
897 .mr(1)
898 .nr(2)
899 .kr(4)
900 .sr(1)
901 .m(1)
902 .n(n)
903 .k(k)
904 .ks(3)
905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
906 }
907 }
908 }
909
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm_subtile)910 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm_subtile) {
911 TEST_REQUIRES_ARM_SIMD32;
912 for (size_t k = 1; k <= 20; k += 5) {
913 for (uint32_t n = 1; n <= 2; n++) {
914 for (uint32_t m = 1; m <= 1; m++) {
915 GemmMicrokernelTester()
916 .mr(1)
917 .nr(2)
918 .kr(4)
919 .sr(1)
920 .m(m)
921 .n(n)
922 .k(k)
923 .cm_stride(5)
924 .iterations(1)
925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
926 }
927 }
928 }
929 }
930
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,a_offset)931 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, a_offset) {
932 TEST_REQUIRES_ARM_SIMD32;
933 for (size_t k = 1; k <= 20; k += 5) {
934 GemmMicrokernelTester()
935 .mr(1)
936 .nr(2)
937 .kr(4)
938 .sr(1)
939 .m(1)
940 .n(2)
941 .k(k)
942 .ks(3)
943 .a_offset(23)
944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
945 }
946 }
947
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,zero)948 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, zero) {
949 TEST_REQUIRES_ARM_SIMD32;
950 for (size_t k = 1; k <= 20; k += 5) {
951 for (uint32_t mz = 0; mz < 1; mz++) {
952 GemmMicrokernelTester()
953 .mr(1)
954 .nr(2)
955 .kr(4)
956 .sr(1)
957 .m(1)
958 .n(2)
959 .k(k)
960 .ks(3)
961 .a_offset(23)
962 .zero_index(mz)
963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
964 }
965 }
966 }
967
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmin)968 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmin) {
969 TEST_REQUIRES_ARM_SIMD32;
970 GemmMicrokernelTester()
971 .mr(1)
972 .nr(2)
973 .kr(4)
974 .sr(1)
975 .m(1)
976 .n(2)
977 .k(4)
978 .qmin(128)
979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
980 }
981
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmax)982 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmax) {
983 TEST_REQUIRES_ARM_SIMD32;
984 GemmMicrokernelTester()
985 .mr(1)
986 .nr(2)
987 .kr(4)
988 .sr(1)
989 .m(1)
990 .n(2)
991 .k(4)
992 .qmax(128)
993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
994 }
995
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm)996 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm) {
997 TEST_REQUIRES_ARM_SIMD32;
998 GemmMicrokernelTester()
999 .mr(1)
1000 .nr(2)
1001 .kr(4)
1002 .sr(1)
1003 .m(1)
1004 .n(2)
1005 .k(4)
1006 .cm_stride(5)
1007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1008 }
1009
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_a_zero_point)1010 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_a_zero_point) {
1011 TEST_REQUIRES_ARM_SIMD32;
1012 for (size_t k = 1; k <= 20; k += 5) {
1013 GemmMicrokernelTester()
1014 .mr(1)
1015 .nr(2)
1016 .kr(4)
1017 .sr(1)
1018 .m(1)
1019 .n(2)
1020 .k(k)
1021 .a_zero_point(0)
1022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1023 }
1024 }
1025
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_b_zero_point)1026 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_b_zero_point) {
1027 TEST_REQUIRES_ARM_SIMD32;
1028 for (size_t k = 1; k <= 20; k += 5) {
1029 GemmMicrokernelTester()
1030 .mr(1)
1031 .nr(2)
1032 .kr(4)
1033 .sr(1)
1034 .m(1)
1035 .n(2)
1036 .k(k)
1037 .b_zero_point(0)
1038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1039 }
1040 }
1041
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_zero_point)1042 TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_zero_point) {
1043 TEST_REQUIRES_ARM_SIMD32;
1044 for (size_t k = 1; k <= 20; k += 5) {
1045 GemmMicrokernelTester()
1046 .mr(1)
1047 .nr(2)
1048 .kr(4)
1049 .sr(1)
1050 .m(1)
1051 .n(2)
1052 .k(k)
1053 .a_zero_point(0)
1054 .b_zero_point(0)
1055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1056 }
1057 }
1058 #endif // XNN_ARCH_ARM
1059
1060
1061 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4)1062 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4) {
1063 TEST_REQUIRES_ARM_SIMD32;
1064 GemmMicrokernelTester()
1065 .mr(2)
1066 .nr(2)
1067 .kr(4)
1068 .sr(1)
1069 .m(2)
1070 .n(2)
1071 .k(4)
1072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1073 }
1074
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cn)1075 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cn) {
1076 TEST_REQUIRES_ARM_SIMD32;
1077 GemmMicrokernelTester()
1078 .mr(2)
1079 .nr(2)
1080 .kr(4)
1081 .sr(1)
1082 .m(2)
1083 .n(2)
1084 .k(4)
1085 .cn_stride(5)
1086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1087 }
1088
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile)1089 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile) {
1090 TEST_REQUIRES_ARM_SIMD32;
1091 for (uint32_t n = 1; n <= 2; n++) {
1092 for (uint32_t m = 1; m <= 2; m++) {
1093 GemmMicrokernelTester()
1094 .mr(2)
1095 .nr(2)
1096 .kr(4)
1097 .sr(1)
1098 .m(m)
1099 .n(n)
1100 .k(4)
1101 .iterations(1)
1102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1103 }
1104 }
1105 }
1106
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_m)1107 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_m) {
1108 TEST_REQUIRES_ARM_SIMD32;
1109 for (uint32_t m = 1; m <= 2; m++) {
1110 GemmMicrokernelTester()
1111 .mr(2)
1112 .nr(2)
1113 .kr(4)
1114 .sr(1)
1115 .m(m)
1116 .n(2)
1117 .k(4)
1118 .iterations(1)
1119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1120 }
1121 }
1122
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_n)1123 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_n) {
1124 TEST_REQUIRES_ARM_SIMD32;
1125 for (uint32_t n = 1; n <= 2; n++) {
1126 GemmMicrokernelTester()
1127 .mr(2)
1128 .nr(2)
1129 .kr(4)
1130 .sr(1)
1131 .m(2)
1132 .n(n)
1133 .k(4)
1134 .iterations(1)
1135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1136 }
1137 }
1138
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4)1139 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4) {
1140 TEST_REQUIRES_ARM_SIMD32;
1141 for (size_t k = 1; k < 4; k++) {
1142 GemmMicrokernelTester()
1143 .mr(2)
1144 .nr(2)
1145 .kr(4)
1146 .sr(1)
1147 .m(2)
1148 .n(2)
1149 .k(k)
1150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1151 }
1152 }
1153
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_subtile)1154 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_subtile) {
1155 TEST_REQUIRES_ARM_SIMD32;
1156 for (size_t k = 1; k < 4; k++) {
1157 for (uint32_t n = 1; n <= 2; n++) {
1158 for (uint32_t m = 1; m <= 2; m++) {
1159 GemmMicrokernelTester()
1160 .mr(2)
1161 .nr(2)
1162 .kr(4)
1163 .sr(1)
1164 .m(m)
1165 .n(n)
1166 .k(k)
1167 .iterations(1)
1168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1169 }
1170 }
1171 }
1172 }
1173
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4)1174 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4) {
1175 TEST_REQUIRES_ARM_SIMD32;
1176 for (size_t k = 5; k < 8; k++) {
1177 GemmMicrokernelTester()
1178 .mr(2)
1179 .nr(2)
1180 .kr(4)
1181 .sr(1)
1182 .m(2)
1183 .n(2)
1184 .k(k)
1185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1186 }
1187 }
1188
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_subtile)1189 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_subtile) {
1190 TEST_REQUIRES_ARM_SIMD32;
1191 for (size_t k = 5; k < 8; k++) {
1192 for (uint32_t n = 1; n <= 2; n++) {
1193 for (uint32_t m = 1; m <= 2; m++) {
1194 GemmMicrokernelTester()
1195 .mr(2)
1196 .nr(2)
1197 .kr(4)
1198 .sr(1)
1199 .m(m)
1200 .n(n)
1201 .k(k)
1202 .iterations(1)
1203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1204 }
1205 }
1206 }
1207 }
1208
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4)1209 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4) {
1210 TEST_REQUIRES_ARM_SIMD32;
1211 for (size_t k = 8; k <= 40; k += 4) {
1212 GemmMicrokernelTester()
1213 .mr(2)
1214 .nr(2)
1215 .kr(4)
1216 .sr(1)
1217 .m(2)
1218 .n(2)
1219 .k(k)
1220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1221 }
1222 }
1223
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_subtile)1224 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_subtile) {
1225 TEST_REQUIRES_ARM_SIMD32;
1226 for (size_t k = 8; k <= 40; k += 4) {
1227 for (uint32_t n = 1; n <= 2; n++) {
1228 for (uint32_t m = 1; m <= 2; m++) {
1229 GemmMicrokernelTester()
1230 .mr(2)
1231 .nr(2)
1232 .kr(4)
1233 .sr(1)
1234 .m(m)
1235 .n(n)
1236 .k(k)
1237 .iterations(1)
1238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1239 }
1240 }
1241 }
1242 }
1243
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2)1244 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2) {
1245 TEST_REQUIRES_ARM_SIMD32;
1246 for (uint32_t n = 3; n < 4; n++) {
1247 for (size_t k = 1; k <= 20; k += 5) {
1248 GemmMicrokernelTester()
1249 .mr(2)
1250 .nr(2)
1251 .kr(4)
1252 .sr(1)
1253 .m(2)
1254 .n(n)
1255 .k(k)
1256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1257 }
1258 }
1259 }
1260
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_cn)1261 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_cn) {
1262 TEST_REQUIRES_ARM_SIMD32;
1263 for (uint32_t n = 3; n < 4; n++) {
1264 for (size_t k = 1; k <= 20; k += 5) {
1265 GemmMicrokernelTester()
1266 .mr(2)
1267 .nr(2)
1268 .kr(4)
1269 .sr(1)
1270 .m(2)
1271 .n(n)
1272 .k(k)
1273 .cn_stride(5)
1274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1275 }
1276 }
1277 }
1278
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_subtile)1279 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_subtile) {
1280 TEST_REQUIRES_ARM_SIMD32;
1281 for (uint32_t n = 3; n < 4; n++) {
1282 for (size_t k = 1; k <= 20; k += 5) {
1283 for (uint32_t m = 1; m <= 2; m++) {
1284 GemmMicrokernelTester()
1285 .mr(2)
1286 .nr(2)
1287 .kr(4)
1288 .sr(1)
1289 .m(m)
1290 .n(n)
1291 .k(k)
1292 .iterations(1)
1293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1294 }
1295 }
1296 }
1297 }
1298
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2)1299 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2) {
1300 TEST_REQUIRES_ARM_SIMD32;
1301 for (uint32_t n = 4; n <= 6; n += 2) {
1302 for (size_t k = 1; k <= 20; k += 5) {
1303 GemmMicrokernelTester()
1304 .mr(2)
1305 .nr(2)
1306 .kr(4)
1307 .sr(1)
1308 .m(2)
1309 .n(n)
1310 .k(k)
1311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1312 }
1313 }
1314 }
1315
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_cn)1316 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_cn) {
1317 TEST_REQUIRES_ARM_SIMD32;
1318 for (uint32_t n = 4; n <= 6; n += 2) {
1319 for (size_t k = 1; k <= 20; k += 5) {
1320 GemmMicrokernelTester()
1321 .mr(2)
1322 .nr(2)
1323 .kr(4)
1324 .sr(1)
1325 .m(2)
1326 .n(n)
1327 .k(k)
1328 .cn_stride(5)
1329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1330 }
1331 }
1332 }
1333
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_subtile)1334 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_subtile) {
1335 TEST_REQUIRES_ARM_SIMD32;
1336 for (uint32_t n = 4; n <= 6; n += 2) {
1337 for (size_t k = 1; k <= 20; k += 5) {
1338 for (uint32_t m = 1; m <= 2; m++) {
1339 GemmMicrokernelTester()
1340 .mr(2)
1341 .nr(2)
1342 .kr(4)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .iterations(1)
1348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1349 }
1350 }
1351 }
1352 }
1353
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel)1354 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel) {
1355 TEST_REQUIRES_ARM_SIMD32;
1356 for (size_t k = 1; k <= 20; k += 5) {
1357 GemmMicrokernelTester()
1358 .mr(2)
1359 .nr(2)
1360 .kr(4)
1361 .sr(1)
1362 .m(2)
1363 .n(2)
1364 .k(k)
1365 .ks(3)
1366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1367 }
1368 }
1369
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel_subtile)1370 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel_subtile) {
1371 TEST_REQUIRES_ARM_SIMD32;
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 for (uint32_t n = 1; n <= 2; n++) {
1374 for (uint32_t m = 1; m <= 2; m++) {
1375 GemmMicrokernelTester()
1376 .mr(2)
1377 .nr(2)
1378 .kr(4)
1379 .sr(1)
1380 .m(m)
1381 .n(n)
1382 .k(k)
1383 .ks(3)
1384 .iterations(1)
1385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1386 }
1387 }
1388 }
1389 }
1390
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_small_kernel)1391 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_small_kernel) {
1392 TEST_REQUIRES_ARM_SIMD32;
1393 for (uint32_t n = 3; n < 4; n++) {
1394 for (size_t k = 1; k <= 20; k += 5) {
1395 GemmMicrokernelTester()
1396 .mr(2)
1397 .nr(2)
1398 .kr(4)
1399 .sr(1)
1400 .m(2)
1401 .n(n)
1402 .k(k)
1403 .ks(3)
1404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1405 }
1406 }
1407 }
1408
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_small_kernel)1409 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_small_kernel) {
1410 TEST_REQUIRES_ARM_SIMD32;
1411 for (uint32_t n = 4; n <= 6; n += 2) {
1412 for (size_t k = 1; k <= 20; k += 5) {
1413 GemmMicrokernelTester()
1414 .mr(2)
1415 .nr(2)
1416 .kr(4)
1417 .sr(1)
1418 .m(2)
1419 .n(n)
1420 .k(k)
1421 .ks(3)
1422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1423 }
1424 }
1425 }
1426
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm_subtile)1427 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm_subtile) {
1428 TEST_REQUIRES_ARM_SIMD32;
1429 for (size_t k = 1; k <= 20; k += 5) {
1430 for (uint32_t n = 1; n <= 2; n++) {
1431 for (uint32_t m = 1; m <= 2; m++) {
1432 GemmMicrokernelTester()
1433 .mr(2)
1434 .nr(2)
1435 .kr(4)
1436 .sr(1)
1437 .m(m)
1438 .n(n)
1439 .k(k)
1440 .cm_stride(5)
1441 .iterations(1)
1442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1443 }
1444 }
1445 }
1446 }
1447
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,a_offset)1448 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, a_offset) {
1449 TEST_REQUIRES_ARM_SIMD32;
1450 for (size_t k = 1; k <= 20; k += 5) {
1451 GemmMicrokernelTester()
1452 .mr(2)
1453 .nr(2)
1454 .kr(4)
1455 .sr(1)
1456 .m(2)
1457 .n(2)
1458 .k(k)
1459 .ks(3)
1460 .a_offset(43)
1461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1462 }
1463 }
1464
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,zero)1465 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, zero) {
1466 TEST_REQUIRES_ARM_SIMD32;
1467 for (size_t k = 1; k <= 20; k += 5) {
1468 for (uint32_t mz = 0; mz < 2; mz++) {
1469 GemmMicrokernelTester()
1470 .mr(2)
1471 .nr(2)
1472 .kr(4)
1473 .sr(1)
1474 .m(2)
1475 .n(2)
1476 .k(k)
1477 .ks(3)
1478 .a_offset(43)
1479 .zero_index(mz)
1480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1481 }
1482 }
1483 }
1484
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmin)1485 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmin) {
1486 TEST_REQUIRES_ARM_SIMD32;
1487 GemmMicrokernelTester()
1488 .mr(2)
1489 .nr(2)
1490 .kr(4)
1491 .sr(1)
1492 .m(2)
1493 .n(2)
1494 .k(4)
1495 .qmin(128)
1496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1497 }
1498
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmax)1499 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmax) {
1500 TEST_REQUIRES_ARM_SIMD32;
1501 GemmMicrokernelTester()
1502 .mr(2)
1503 .nr(2)
1504 .kr(4)
1505 .sr(1)
1506 .m(2)
1507 .n(2)
1508 .k(4)
1509 .qmax(128)
1510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1511 }
1512
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm)1513 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm) {
1514 TEST_REQUIRES_ARM_SIMD32;
1515 GemmMicrokernelTester()
1516 .mr(2)
1517 .nr(2)
1518 .kr(4)
1519 .sr(1)
1520 .m(2)
1521 .n(2)
1522 .k(4)
1523 .cm_stride(5)
1524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1525 }
1526
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_a_zero_point)1527 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_a_zero_point) {
1528 TEST_REQUIRES_ARM_SIMD32;
1529 for (size_t k = 1; k <= 20; k += 5) {
1530 GemmMicrokernelTester()
1531 .mr(2)
1532 .nr(2)
1533 .kr(4)
1534 .sr(1)
1535 .m(2)
1536 .n(2)
1537 .k(k)
1538 .a_zero_point(0)
1539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1540 }
1541 }
1542
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_b_zero_point)1543 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_b_zero_point) {
1544 TEST_REQUIRES_ARM_SIMD32;
1545 for (size_t k = 1; k <= 20; k += 5) {
1546 GemmMicrokernelTester()
1547 .mr(2)
1548 .nr(2)
1549 .kr(4)
1550 .sr(1)
1551 .m(2)
1552 .n(2)
1553 .k(k)
1554 .b_zero_point(0)
1555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1556 }
1557 }
1558
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_zero_point)1559 TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_zero_point) {
1560 TEST_REQUIRES_ARM_SIMD32;
1561 for (size_t k = 1; k <= 20; k += 5) {
1562 GemmMicrokernelTester()
1563 .mr(2)
1564 .nr(2)
1565 .kr(4)
1566 .sr(1)
1567 .m(2)
1568 .n(2)
1569 .k(k)
1570 .a_zero_point(0)
1571 .b_zero_point(0)
1572 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1573 }
1574 }
1575 #endif // XNN_ARCH_ARM
1576
1577
1578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8)1579 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8) {
1580 TEST_REQUIRES_ARM_NEON;
1581 GemmMicrokernelTester()
1582 .mr(1)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(1)
1587 .n(8)
1588 .k(8)
1589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1590 }
1591
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cn)1592 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cn) {
1593 TEST_REQUIRES_ARM_NEON;
1594 GemmMicrokernelTester()
1595 .mr(1)
1596 .nr(8)
1597 .kr(1)
1598 .sr(1)
1599 .m(1)
1600 .n(8)
1601 .k(8)
1602 .cn_stride(11)
1603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1604 }
1605
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile)1606 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1607 TEST_REQUIRES_ARM_NEON;
1608 for (uint32_t n = 1; n <= 8; n++) {
1609 for (uint32_t m = 1; m <= 1; m++) {
1610 GemmMicrokernelTester()
1611 .mr(1)
1612 .nr(8)
1613 .kr(1)
1614 .sr(1)
1615 .m(m)
1616 .n(n)
1617 .k(8)
1618 .iterations(1)
1619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1620 }
1621 }
1622 }
1623
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1624 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1625 TEST_REQUIRES_ARM_NEON;
1626 for (uint32_t m = 1; m <= 1; m++) {
1627 GemmMicrokernelTester()
1628 .mr(1)
1629 .nr(8)
1630 .kr(1)
1631 .sr(1)
1632 .m(m)
1633 .n(8)
1634 .k(8)
1635 .iterations(1)
1636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1637 }
1638 }
1639
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1640 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1641 TEST_REQUIRES_ARM_NEON;
1642 for (uint32_t n = 1; n <= 8; n++) {
1643 GemmMicrokernelTester()
1644 .mr(1)
1645 .nr(8)
1646 .kr(1)
1647 .sr(1)
1648 .m(1)
1649 .n(n)
1650 .k(8)
1651 .iterations(1)
1652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1653 }
1654 }
1655
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8)1656 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8) {
1657 TEST_REQUIRES_ARM_NEON;
1658 for (size_t k = 1; k < 8; k++) {
1659 GemmMicrokernelTester()
1660 .mr(1)
1661 .nr(8)
1662 .kr(1)
1663 .sr(1)
1664 .m(1)
1665 .n(8)
1666 .k(k)
1667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1668 }
1669 }
1670
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8_subtile)1671 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1672 TEST_REQUIRES_ARM_NEON;
1673 for (size_t k = 1; k < 8; k++) {
1674 for (uint32_t n = 1; n <= 8; n++) {
1675 for (uint32_t m = 1; m <= 1; m++) {
1676 GemmMicrokernelTester()
1677 .mr(1)
1678 .nr(8)
1679 .kr(1)
1680 .sr(1)
1681 .m(m)
1682 .n(n)
1683 .k(k)
1684 .iterations(1)
1685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1686 }
1687 }
1688 }
1689 }
1690
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8)1691 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8) {
1692 TEST_REQUIRES_ARM_NEON;
1693 for (size_t k = 9; k < 16; k++) {
1694 GemmMicrokernelTester()
1695 .mr(1)
1696 .nr(8)
1697 .kr(1)
1698 .sr(1)
1699 .m(1)
1700 .n(8)
1701 .k(k)
1702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1703 }
1704 }
1705
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8_subtile)1706 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1707 TEST_REQUIRES_ARM_NEON;
1708 for (size_t k = 9; k < 16; k++) {
1709 for (uint32_t n = 1; n <= 8; n++) {
1710 for (uint32_t m = 1; m <= 1; m++) {
1711 GemmMicrokernelTester()
1712 .mr(1)
1713 .nr(8)
1714 .kr(1)
1715 .sr(1)
1716 .m(m)
1717 .n(n)
1718 .k(k)
1719 .iterations(1)
1720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1721 }
1722 }
1723 }
1724 }
1725
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8)1726 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8) {
1727 TEST_REQUIRES_ARM_NEON;
1728 for (size_t k = 16; k <= 80; k += 8) {
1729 GemmMicrokernelTester()
1730 .mr(1)
1731 .nr(8)
1732 .kr(1)
1733 .sr(1)
1734 .m(1)
1735 .n(8)
1736 .k(k)
1737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1738 }
1739 }
1740
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8_subtile)1741 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
1742 TEST_REQUIRES_ARM_NEON;
1743 for (size_t k = 16; k <= 80; k += 8) {
1744 for (uint32_t n = 1; n <= 8; n++) {
1745 for (uint32_t m = 1; m <= 1; m++) {
1746 GemmMicrokernelTester()
1747 .mr(1)
1748 .nr(8)
1749 .kr(1)
1750 .sr(1)
1751 .m(m)
1752 .n(n)
1753 .k(k)
1754 .iterations(1)
1755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8)1761 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8) {
1762 TEST_REQUIRES_ARM_NEON;
1763 for (uint32_t n = 9; n < 16; n++) {
1764 for (size_t k = 1; k <= 40; k += 9) {
1765 GemmMicrokernelTester()
1766 .mr(1)
1767 .nr(8)
1768 .kr(1)
1769 .sr(1)
1770 .m(1)
1771 .n(n)
1772 .k(k)
1773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1774 }
1775 }
1776 }
1777
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1778 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1779 TEST_REQUIRES_ARM_NEON;
1780 for (uint32_t n = 9; n < 16; n++) {
1781 for (size_t k = 1; k <= 40; k += 9) {
1782 GemmMicrokernelTester()
1783 .mr(1)
1784 .nr(8)
1785 .kr(1)
1786 .sr(1)
1787 .m(1)
1788 .n(n)
1789 .k(k)
1790 .cn_stride(11)
1791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1792 }
1793 }
1794 }
1795
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_subtile)1796 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1797 TEST_REQUIRES_ARM_NEON;
1798 for (uint32_t n = 9; n < 16; n++) {
1799 for (size_t k = 1; k <= 40; k += 9) {
1800 for (uint32_t m = 1; m <= 1; m++) {
1801 GemmMicrokernelTester()
1802 .mr(1)
1803 .nr(8)
1804 .kr(1)
1805 .sr(1)
1806 .m(m)
1807 .n(n)
1808 .k(k)
1809 .iterations(1)
1810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1811 }
1812 }
1813 }
1814 }
1815
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8)1816 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8) {
1817 TEST_REQUIRES_ARM_NEON;
1818 for (uint32_t n = 16; n <= 24; n += 8) {
1819 for (size_t k = 1; k <= 40; k += 9) {
1820 GemmMicrokernelTester()
1821 .mr(1)
1822 .nr(8)
1823 .kr(1)
1824 .sr(1)
1825 .m(1)
1826 .n(n)
1827 .k(k)
1828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1829 }
1830 }
1831 }
1832
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)1833 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1834 TEST_REQUIRES_ARM_NEON;
1835 for (uint32_t n = 16; n <= 24; n += 8) {
1836 for (size_t k = 1; k <= 40; k += 9) {
1837 GemmMicrokernelTester()
1838 .mr(1)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(1)
1843 .n(n)
1844 .k(k)
1845 .cn_stride(11)
1846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1847 }
1848 }
1849 }
1850
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_subtile)1851 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
1852 TEST_REQUIRES_ARM_NEON;
1853 for (uint32_t n = 16; n <= 24; n += 8) {
1854 for (size_t k = 1; k <= 40; k += 9) {
1855 for (uint32_t m = 1; m <= 1; m++) {
1856 GemmMicrokernelTester()
1857 .mr(1)
1858 .nr(8)
1859 .kr(1)
1860 .sr(1)
1861 .m(m)
1862 .n(n)
1863 .k(k)
1864 .iterations(1)
1865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1866 }
1867 }
1868 }
1869 }
1870
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel)1871 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel) {
1872 TEST_REQUIRES_ARM_NEON;
1873 for (size_t k = 1; k <= 40; k += 9) {
1874 GemmMicrokernelTester()
1875 .mr(1)
1876 .nr(8)
1877 .kr(1)
1878 .sr(1)
1879 .m(1)
1880 .n(8)
1881 .k(k)
1882 .ks(3)
1883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1884 }
1885 }
1886
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel_subtile)1887 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
1888 TEST_REQUIRES_ARM_NEON;
1889 for (size_t k = 1; k <= 40; k += 9) {
1890 for (uint32_t n = 1; n <= 8; n++) {
1891 for (uint32_t m = 1; m <= 1; m++) {
1892 GemmMicrokernelTester()
1893 .mr(1)
1894 .nr(8)
1895 .kr(1)
1896 .sr(1)
1897 .m(m)
1898 .n(n)
1899 .k(k)
1900 .ks(3)
1901 .iterations(1)
1902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1903 }
1904 }
1905 }
1906 }
1907
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1908 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1909 TEST_REQUIRES_ARM_NEON;
1910 for (uint32_t n = 9; n < 16; n++) {
1911 for (size_t k = 1; k <= 40; k += 9) {
1912 GemmMicrokernelTester()
1913 .mr(1)
1914 .nr(8)
1915 .kr(1)
1916 .sr(1)
1917 .m(1)
1918 .n(n)
1919 .k(k)
1920 .ks(3)
1921 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1922 }
1923 }
1924 }
1925
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)1926 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1927 TEST_REQUIRES_ARM_NEON;
1928 for (uint32_t n = 16; n <= 24; n += 8) {
1929 for (size_t k = 1; k <= 40; k += 9) {
1930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(8)
1933 .kr(1)
1934 .sr(1)
1935 .m(1)
1936 .n(n)
1937 .k(k)
1938 .ks(3)
1939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1940 }
1941 }
1942 }
1943
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm_subtile)1944 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
1945 TEST_REQUIRES_ARM_NEON;
1946 for (size_t k = 1; k <= 40; k += 9) {
1947 for (uint32_t n = 1; n <= 8; n++) {
1948 for (uint32_t m = 1; m <= 1; m++) {
1949 GemmMicrokernelTester()
1950 .mr(1)
1951 .nr(8)
1952 .kr(1)
1953 .sr(1)
1954 .m(m)
1955 .n(n)
1956 .k(k)
1957 .cm_stride(11)
1958 .iterations(1)
1959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1960 }
1961 }
1962 }
1963 }
1964
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,a_offset)1965 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, a_offset) {
1966 TEST_REQUIRES_ARM_NEON;
1967 for (size_t k = 1; k <= 40; k += 9) {
1968 GemmMicrokernelTester()
1969 .mr(1)
1970 .nr(8)
1971 .kr(1)
1972 .sr(1)
1973 .m(1)
1974 .n(8)
1975 .k(k)
1976 .ks(3)
1977 .a_offset(43)
1978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1979 }
1980 }
1981
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,zero)1982 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, zero) {
1983 TEST_REQUIRES_ARM_NEON;
1984 for (size_t k = 1; k <= 40; k += 9) {
1985 for (uint32_t mz = 0; mz < 1; mz++) {
1986 GemmMicrokernelTester()
1987 .mr(1)
1988 .nr(8)
1989 .kr(1)
1990 .sr(1)
1991 .m(1)
1992 .n(8)
1993 .k(k)
1994 .ks(3)
1995 .a_offset(43)
1996 .zero_index(mz)
1997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1998 }
1999 }
2000 }
2001
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmin)2002 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmin) {
2003 TEST_REQUIRES_ARM_NEON;
2004 GemmMicrokernelTester()
2005 .mr(1)
2006 .nr(8)
2007 .kr(1)
2008 .sr(1)
2009 .m(1)
2010 .n(8)
2011 .k(8)
2012 .qmin(128)
2013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2014 }
2015
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmax)2016 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmax) {
2017 TEST_REQUIRES_ARM_NEON;
2018 GemmMicrokernelTester()
2019 .mr(1)
2020 .nr(8)
2021 .kr(1)
2022 .sr(1)
2023 .m(1)
2024 .n(8)
2025 .k(8)
2026 .qmax(128)
2027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2028 }
2029
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm)2030 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm) {
2031 TEST_REQUIRES_ARM_NEON;
2032 GemmMicrokernelTester()
2033 .mr(1)
2034 .nr(8)
2035 .kr(1)
2036 .sr(1)
2037 .m(1)
2038 .n(8)
2039 .k(8)
2040 .cm_stride(11)
2041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2042 }
2043
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_a_zero_point)2044 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_a_zero_point) {
2045 TEST_REQUIRES_ARM_NEON;
2046 for (size_t k = 1; k <= 40; k += 9) {
2047 GemmMicrokernelTester()
2048 .mr(1)
2049 .nr(8)
2050 .kr(1)
2051 .sr(1)
2052 .m(1)
2053 .n(8)
2054 .k(k)
2055 .a_zero_point(0)
2056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2057 }
2058 }
2059
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_b_zero_point)2060 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_b_zero_point) {
2061 TEST_REQUIRES_ARM_NEON;
2062 for (size_t k = 1; k <= 40; k += 9) {
2063 GemmMicrokernelTester()
2064 .mr(1)
2065 .nr(8)
2066 .kr(1)
2067 .sr(1)
2068 .m(1)
2069 .n(8)
2070 .k(k)
2071 .b_zero_point(0)
2072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2073 }
2074 }
2075
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_zero_point)2076 TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_zero_point) {
2077 TEST_REQUIRES_ARM_NEON;
2078 for (size_t k = 1; k <= 40; k += 9) {
2079 GemmMicrokernelTester()
2080 .mr(1)
2081 .nr(8)
2082 .kr(1)
2083 .sr(1)
2084 .m(1)
2085 .n(8)
2086 .k(k)
2087 .a_zero_point(0)
2088 .b_zero_point(0)
2089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2090 }
2091 }
2092 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2093
2094
2095 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)2096 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
2097 TEST_REQUIRES_ARM_NEON;
2098 GemmMicrokernelTester()
2099 .mr(1)
2100 .nr(16)
2101 .kr(1)
2102 .sr(1)
2103 .m(1)
2104 .n(16)
2105 .k(8)
2106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2107 }
2108
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)2109 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
2110 TEST_REQUIRES_ARM_NEON;
2111 GemmMicrokernelTester()
2112 .mr(1)
2113 .nr(16)
2114 .kr(1)
2115 .sr(1)
2116 .m(1)
2117 .n(16)
2118 .k(8)
2119 .cn_stride(19)
2120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2121 }
2122
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)2123 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2124 TEST_REQUIRES_ARM_NEON;
2125 for (uint32_t n = 1; n <= 16; n++) {
2126 for (uint32_t m = 1; m <= 1; m++) {
2127 GemmMicrokernelTester()
2128 .mr(1)
2129 .nr(16)
2130 .kr(1)
2131 .sr(1)
2132 .m(m)
2133 .n(n)
2134 .k(8)
2135 .iterations(1)
2136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2137 }
2138 }
2139 }
2140
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2141 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2142 TEST_REQUIRES_ARM_NEON;
2143 for (uint32_t m = 1; m <= 1; m++) {
2144 GemmMicrokernelTester()
2145 .mr(1)
2146 .nr(16)
2147 .kr(1)
2148 .sr(1)
2149 .m(m)
2150 .n(16)
2151 .k(8)
2152 .iterations(1)
2153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2154 }
2155 }
2156
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2157 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2158 TEST_REQUIRES_ARM_NEON;
2159 for (uint32_t n = 1; n <= 16; n++) {
2160 GemmMicrokernelTester()
2161 .mr(1)
2162 .nr(16)
2163 .kr(1)
2164 .sr(1)
2165 .m(1)
2166 .n(n)
2167 .k(8)
2168 .iterations(1)
2169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2170 }
2171 }
2172
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)2173 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
2174 TEST_REQUIRES_ARM_NEON;
2175 for (size_t k = 1; k < 8; k++) {
2176 GemmMicrokernelTester()
2177 .mr(1)
2178 .nr(16)
2179 .kr(1)
2180 .sr(1)
2181 .m(1)
2182 .n(16)
2183 .k(k)
2184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2185 }
2186 }
2187
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)2188 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2189 TEST_REQUIRES_ARM_NEON;
2190 for (size_t k = 1; k < 8; k++) {
2191 for (uint32_t n = 1; n <= 16; n++) {
2192 for (uint32_t m = 1; m <= 1; m++) {
2193 GemmMicrokernelTester()
2194 .mr(1)
2195 .nr(16)
2196 .kr(1)
2197 .sr(1)
2198 .m(m)
2199 .n(n)
2200 .k(k)
2201 .iterations(1)
2202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2203 }
2204 }
2205 }
2206 }
2207
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)2208 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
2209 TEST_REQUIRES_ARM_NEON;
2210 for (size_t k = 9; k < 16; k++) {
2211 GemmMicrokernelTester()
2212 .mr(1)
2213 .nr(16)
2214 .kr(1)
2215 .sr(1)
2216 .m(1)
2217 .n(16)
2218 .k(k)
2219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2220 }
2221 }
2222
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)2223 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2224 TEST_REQUIRES_ARM_NEON;
2225 for (size_t k = 9; k < 16; k++) {
2226 for (uint32_t n = 1; n <= 16; n++) {
2227 for (uint32_t m = 1; m <= 1; m++) {
2228 GemmMicrokernelTester()
2229 .mr(1)
2230 .nr(16)
2231 .kr(1)
2232 .sr(1)
2233 .m(m)
2234 .n(n)
2235 .k(k)
2236 .iterations(1)
2237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2238 }
2239 }
2240 }
2241 }
2242
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)2243 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
2244 TEST_REQUIRES_ARM_NEON;
2245 for (size_t k = 16; k <= 80; k += 8) {
2246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(16)
2249 .kr(1)
2250 .sr(1)
2251 .m(1)
2252 .n(16)
2253 .k(k)
2254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2255 }
2256 }
2257
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)2258 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
2259 TEST_REQUIRES_ARM_NEON;
2260 for (size_t k = 16; k <= 80; k += 8) {
2261 for (uint32_t n = 1; n <= 16; n++) {
2262 for (uint32_t m = 1; m <= 1; m++) {
2263 GemmMicrokernelTester()
2264 .mr(1)
2265 .nr(16)
2266 .kr(1)
2267 .sr(1)
2268 .m(m)
2269 .n(n)
2270 .k(k)
2271 .iterations(1)
2272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2273 }
2274 }
2275 }
2276 }
2277
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)2278 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
2279 TEST_REQUIRES_ARM_NEON;
2280 for (uint32_t n = 17; n < 32; n++) {
2281 for (size_t k = 1; k <= 40; k += 9) {
2282 GemmMicrokernelTester()
2283 .mr(1)
2284 .nr(16)
2285 .kr(1)
2286 .sr(1)
2287 .m(1)
2288 .n(n)
2289 .k(k)
2290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2291 }
2292 }
2293 }
2294
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2295 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2296 TEST_REQUIRES_ARM_NEON;
2297 for (uint32_t n = 17; n < 32; n++) {
2298 for (size_t k = 1; k <= 40; k += 9) {
2299 GemmMicrokernelTester()
2300 .mr(1)
2301 .nr(16)
2302 .kr(1)
2303 .sr(1)
2304 .m(1)
2305 .n(n)
2306 .k(k)
2307 .cn_stride(19)
2308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2309 }
2310 }
2311 }
2312
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)2313 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2314 TEST_REQUIRES_ARM_NEON;
2315 for (uint32_t n = 17; n < 32; n++) {
2316 for (size_t k = 1; k <= 40; k += 9) {
2317 for (uint32_t m = 1; m <= 1; m++) {
2318 GemmMicrokernelTester()
2319 .mr(1)
2320 .nr(16)
2321 .kr(1)
2322 .sr(1)
2323 .m(m)
2324 .n(n)
2325 .k(k)
2326 .iterations(1)
2327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2328 }
2329 }
2330 }
2331 }
2332
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)2333 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
2334 TEST_REQUIRES_ARM_NEON;
2335 for (uint32_t n = 32; n <= 48; n += 16) {
2336 for (size_t k = 1; k <= 40; k += 9) {
2337 GemmMicrokernelTester()
2338 .mr(1)
2339 .nr(16)
2340 .kr(1)
2341 .sr(1)
2342 .m(1)
2343 .n(n)
2344 .k(k)
2345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2346 }
2347 }
2348 }
2349
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)2350 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2351 TEST_REQUIRES_ARM_NEON;
2352 for (uint32_t n = 32; n <= 48; n += 16) {
2353 for (size_t k = 1; k <= 40; k += 9) {
2354 GemmMicrokernelTester()
2355 .mr(1)
2356 .nr(16)
2357 .kr(1)
2358 .sr(1)
2359 .m(1)
2360 .n(n)
2361 .k(k)
2362 .cn_stride(19)
2363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2364 }
2365 }
2366 }
2367
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)2368 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
2369 TEST_REQUIRES_ARM_NEON;
2370 for (uint32_t n = 32; n <= 48; n += 16) {
2371 for (size_t k = 1; k <= 40; k += 9) {
2372 for (uint32_t m = 1; m <= 1; m++) {
2373 GemmMicrokernelTester()
2374 .mr(1)
2375 .nr(16)
2376 .kr(1)
2377 .sr(1)
2378 .m(m)
2379 .n(n)
2380 .k(k)
2381 .iterations(1)
2382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2383 }
2384 }
2385 }
2386 }
2387
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel)2388 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
2389 TEST_REQUIRES_ARM_NEON;
2390 for (size_t k = 1; k <= 40; k += 9) {
2391 GemmMicrokernelTester()
2392 .mr(1)
2393 .nr(16)
2394 .kr(1)
2395 .sr(1)
2396 .m(1)
2397 .n(16)
2398 .k(k)
2399 .ks(3)
2400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2401 }
2402 }
2403
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel_subtile)2404 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
2405 TEST_REQUIRES_ARM_NEON;
2406 for (size_t k = 1; k <= 40; k += 9) {
2407 for (uint32_t n = 1; n <= 16; n++) {
2408 for (uint32_t m = 1; m <= 1; m++) {
2409 GemmMicrokernelTester()
2410 .mr(1)
2411 .nr(16)
2412 .kr(1)
2413 .sr(1)
2414 .m(m)
2415 .n(n)
2416 .k(k)
2417 .ks(3)
2418 .iterations(1)
2419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2420 }
2421 }
2422 }
2423 }
2424
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2425 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2426 TEST_REQUIRES_ARM_NEON;
2427 for (uint32_t n = 17; n < 32; n++) {
2428 for (size_t k = 1; k <= 40; k += 9) {
2429 GemmMicrokernelTester()
2430 .mr(1)
2431 .nr(16)
2432 .kr(1)
2433 .sr(1)
2434 .m(1)
2435 .n(n)
2436 .k(k)
2437 .ks(3)
2438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2439 }
2440 }
2441 }
2442
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)2443 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2444 TEST_REQUIRES_ARM_NEON;
2445 for (uint32_t n = 32; n <= 48; n += 16) {
2446 for (size_t k = 1; k <= 40; k += 9) {
2447 GemmMicrokernelTester()
2448 .mr(1)
2449 .nr(16)
2450 .kr(1)
2451 .sr(1)
2452 .m(1)
2453 .n(n)
2454 .k(k)
2455 .ks(3)
2456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2457 }
2458 }
2459 }
2460
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)2461 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
2462 TEST_REQUIRES_ARM_NEON;
2463 for (size_t k = 1; k <= 40; k += 9) {
2464 for (uint32_t n = 1; n <= 16; n++) {
2465 for (uint32_t m = 1; m <= 1; m++) {
2466 GemmMicrokernelTester()
2467 .mr(1)
2468 .nr(16)
2469 .kr(1)
2470 .sr(1)
2471 .m(m)
2472 .n(n)
2473 .k(k)
2474 .cm_stride(19)
2475 .iterations(1)
2476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2477 }
2478 }
2479 }
2480 }
2481
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,a_offset)2482 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
2483 TEST_REQUIRES_ARM_NEON;
2484 for (size_t k = 1; k <= 40; k += 9) {
2485 GemmMicrokernelTester()
2486 .mr(1)
2487 .nr(16)
2488 .kr(1)
2489 .sr(1)
2490 .m(1)
2491 .n(16)
2492 .k(k)
2493 .ks(3)
2494 .a_offset(43)
2495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2496 }
2497 }
2498
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,zero)2499 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
2500 TEST_REQUIRES_ARM_NEON;
2501 for (size_t k = 1; k <= 40; k += 9) {
2502 for (uint32_t mz = 0; mz < 1; mz++) {
2503 GemmMicrokernelTester()
2504 .mr(1)
2505 .nr(16)
2506 .kr(1)
2507 .sr(1)
2508 .m(1)
2509 .n(16)
2510 .k(k)
2511 .ks(3)
2512 .a_offset(43)
2513 .zero_index(mz)
2514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2515 }
2516 }
2517 }
2518
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)2519 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
2520 TEST_REQUIRES_ARM_NEON;
2521 GemmMicrokernelTester()
2522 .mr(1)
2523 .nr(16)
2524 .kr(1)
2525 .sr(1)
2526 .m(1)
2527 .n(16)
2528 .k(8)
2529 .qmin(128)
2530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2531 }
2532
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)2533 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
2534 TEST_REQUIRES_ARM_NEON;
2535 GemmMicrokernelTester()
2536 .mr(1)
2537 .nr(16)
2538 .kr(1)
2539 .sr(1)
2540 .m(1)
2541 .n(16)
2542 .k(8)
2543 .qmax(128)
2544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2545 }
2546
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)2547 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
2548 TEST_REQUIRES_ARM_NEON;
2549 GemmMicrokernelTester()
2550 .mr(1)
2551 .nr(16)
2552 .kr(1)
2553 .sr(1)
2554 .m(1)
2555 .n(16)
2556 .k(8)
2557 .cm_stride(19)
2558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2559 }
2560
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_a_zero_point)2561 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_a_zero_point) {
2562 TEST_REQUIRES_ARM_NEON;
2563 for (size_t k = 1; k <= 40; k += 9) {
2564 GemmMicrokernelTester()
2565 .mr(1)
2566 .nr(16)
2567 .kr(1)
2568 .sr(1)
2569 .m(1)
2570 .n(16)
2571 .k(k)
2572 .a_zero_point(0)
2573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2574 }
2575 }
2576
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_b_zero_point)2577 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_b_zero_point) {
2578 TEST_REQUIRES_ARM_NEON;
2579 for (size_t k = 1; k <= 40; k += 9) {
2580 GemmMicrokernelTester()
2581 .mr(1)
2582 .nr(16)
2583 .kr(1)
2584 .sr(1)
2585 .m(1)
2586 .n(16)
2587 .k(k)
2588 .b_zero_point(0)
2589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2590 }
2591 }
2592
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_zero_point)2593 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_zero_point) {
2594 TEST_REQUIRES_ARM_NEON;
2595 for (size_t k = 1; k <= 40; k += 9) {
2596 GemmMicrokernelTester()
2597 .mr(1)
2598 .nr(16)
2599 .kr(1)
2600 .sr(1)
2601 .m(1)
2602 .n(16)
2603 .k(k)
2604 .a_zero_point(0)
2605 .b_zero_point(0)
2606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2607 }
2608 }
2609 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2610
2611
2612 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8)2613 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8) {
2614 TEST_REQUIRES_ARM_NEON_DOT;
2615 GemmMicrokernelTester()
2616 .mr(2)
2617 .nr(16)
2618 .kr(4)
2619 .sr(1)
2620 .m(2)
2621 .n(16)
2622 .k(8)
2623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2624 }
2625
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cn)2626 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cn) {
2627 TEST_REQUIRES_ARM_NEON_DOT;
2628 GemmMicrokernelTester()
2629 .mr(2)
2630 .nr(16)
2631 .kr(4)
2632 .sr(1)
2633 .m(2)
2634 .n(16)
2635 .k(8)
2636 .cn_stride(19)
2637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2638 }
2639
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile)2640 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON_DOT;
2642 for (uint32_t n = 1; n <= 16; n++) {
2643 for (uint32_t m = 1; m <= 2; m++) {
2644 GemmMicrokernelTester()
2645 .mr(2)
2646 .nr(16)
2647 .kr(4)
2648 .sr(1)
2649 .m(m)
2650 .n(n)
2651 .k(8)
2652 .iterations(1)
2653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2654 }
2655 }
2656 }
2657
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile_m)2658 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile_m) {
2659 TEST_REQUIRES_ARM_NEON_DOT;
2660 for (uint32_t m = 1; m <= 2; m++) {
2661 GemmMicrokernelTester()
2662 .mr(2)
2663 .nr(16)
2664 .kr(4)
2665 .sr(1)
2666 .m(m)
2667 .n(16)
2668 .k(8)
2669 .iterations(1)
2670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2671 }
2672 }
2673
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile_n)2674 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile_n) {
2675 TEST_REQUIRES_ARM_NEON_DOT;
2676 for (uint32_t n = 1; n <= 16; n++) {
2677 GemmMicrokernelTester()
2678 .mr(2)
2679 .nr(16)
2680 .kr(4)
2681 .sr(1)
2682 .m(2)
2683 .n(n)
2684 .k(8)
2685 .iterations(1)
2686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2687 }
2688 }
2689
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_lt_8)2690 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_lt_8) {
2691 TEST_REQUIRES_ARM_NEON_DOT;
2692 for (size_t k = 1; k < 8; k++) {
2693 GemmMicrokernelTester()
2694 .mr(2)
2695 .nr(16)
2696 .kr(4)
2697 .sr(1)
2698 .m(2)
2699 .n(16)
2700 .k(k)
2701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2702 }
2703 }
2704
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_lt_8_subtile)2705 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_lt_8_subtile) {
2706 TEST_REQUIRES_ARM_NEON_DOT;
2707 for (size_t k = 1; k < 8; k++) {
2708 for (uint32_t n = 1; n <= 16; n++) {
2709 for (uint32_t m = 1; m <= 2; m++) {
2710 GemmMicrokernelTester()
2711 .mr(2)
2712 .nr(16)
2713 .kr(4)
2714 .sr(1)
2715 .m(m)
2716 .n(n)
2717 .k(k)
2718 .iterations(1)
2719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2720 }
2721 }
2722 }
2723 }
2724
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_gt_8)2725 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_gt_8) {
2726 TEST_REQUIRES_ARM_NEON_DOT;
2727 for (size_t k = 9; k < 16; k++) {
2728 GemmMicrokernelTester()
2729 .mr(2)
2730 .nr(16)
2731 .kr(4)
2732 .sr(1)
2733 .m(2)
2734 .n(16)
2735 .k(k)
2736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2737 }
2738 }
2739
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_gt_8_subtile)2740 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_gt_8_subtile) {
2741 TEST_REQUIRES_ARM_NEON_DOT;
2742 for (size_t k = 9; k < 16; k++) {
2743 for (uint32_t n = 1; n <= 16; n++) {
2744 for (uint32_t m = 1; m <= 2; m++) {
2745 GemmMicrokernelTester()
2746 .mr(2)
2747 .nr(16)
2748 .kr(4)
2749 .sr(1)
2750 .m(m)
2751 .n(n)
2752 .k(k)
2753 .iterations(1)
2754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2755 }
2756 }
2757 }
2758 }
2759
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_div_8)2760 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_div_8) {
2761 TEST_REQUIRES_ARM_NEON_DOT;
2762 for (size_t k = 16; k <= 80; k += 8) {
2763 GemmMicrokernelTester()
2764 .mr(2)
2765 .nr(16)
2766 .kr(4)
2767 .sr(1)
2768 .m(2)
2769 .n(16)
2770 .k(k)
2771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2772 }
2773 }
2774
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_div_8_subtile)2775 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_div_8_subtile) {
2776 TEST_REQUIRES_ARM_NEON_DOT;
2777 for (size_t k = 16; k <= 80; k += 8) {
2778 for (uint32_t n = 1; n <= 16; n++) {
2779 for (uint32_t m = 1; m <= 2; m++) {
2780 GemmMicrokernelTester()
2781 .mr(2)
2782 .nr(16)
2783 .kr(4)
2784 .sr(1)
2785 .m(m)
2786 .n(n)
2787 .k(k)
2788 .iterations(1)
2789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2790 }
2791 }
2792 }
2793 }
2794
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16)2795 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16) {
2796 TEST_REQUIRES_ARM_NEON_DOT;
2797 for (uint32_t n = 17; n < 32; n++) {
2798 for (size_t k = 1; k <= 40; k += 9) {
2799 GemmMicrokernelTester()
2800 .mr(2)
2801 .nr(16)
2802 .kr(4)
2803 .sr(1)
2804 .m(2)
2805 .n(n)
2806 .k(k)
2807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2808 }
2809 }
2810 }
2811
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_strided_cn)2812 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_strided_cn) {
2813 TEST_REQUIRES_ARM_NEON_DOT;
2814 for (uint32_t n = 17; n < 32; n++) {
2815 for (size_t k = 1; k <= 40; k += 9) {
2816 GemmMicrokernelTester()
2817 .mr(2)
2818 .nr(16)
2819 .kr(4)
2820 .sr(1)
2821 .m(2)
2822 .n(n)
2823 .k(k)
2824 .cn_stride(19)
2825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2826 }
2827 }
2828 }
2829
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_subtile)2830 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_subtile) {
2831 TEST_REQUIRES_ARM_NEON_DOT;
2832 for (uint32_t n = 17; n < 32; n++) {
2833 for (size_t k = 1; k <= 40; k += 9) {
2834 for (uint32_t m = 1; m <= 2; m++) {
2835 GemmMicrokernelTester()
2836 .mr(2)
2837 .nr(16)
2838 .kr(4)
2839 .sr(1)
2840 .m(m)
2841 .n(n)
2842 .k(k)
2843 .iterations(1)
2844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2845 }
2846 }
2847 }
2848 }
2849
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16)2850 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16) {
2851 TEST_REQUIRES_ARM_NEON_DOT;
2852 for (uint32_t n = 32; n <= 48; n += 16) {
2853 for (size_t k = 1; k <= 40; k += 9) {
2854 GemmMicrokernelTester()
2855 .mr(2)
2856 .nr(16)
2857 .kr(4)
2858 .sr(1)
2859 .m(2)
2860 .n(n)
2861 .k(k)
2862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2863 }
2864 }
2865 }
2866
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_strided_cn)2867 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_strided_cn) {
2868 TEST_REQUIRES_ARM_NEON_DOT;
2869 for (uint32_t n = 32; n <= 48; n += 16) {
2870 for (size_t k = 1; k <= 40; k += 9) {
2871 GemmMicrokernelTester()
2872 .mr(2)
2873 .nr(16)
2874 .kr(4)
2875 .sr(1)
2876 .m(2)
2877 .n(n)
2878 .k(k)
2879 .cn_stride(19)
2880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2881 }
2882 }
2883 }
2884
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_subtile)2885 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_subtile) {
2886 TEST_REQUIRES_ARM_NEON_DOT;
2887 for (uint32_t n = 32; n <= 48; n += 16) {
2888 for (size_t k = 1; k <= 40; k += 9) {
2889 for (uint32_t m = 1; m <= 2; m++) {
2890 GemmMicrokernelTester()
2891 .mr(2)
2892 .nr(16)
2893 .kr(4)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
2899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2900 }
2901 }
2902 }
2903 }
2904
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,small_kernel)2905 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, small_kernel) {
2906 TEST_REQUIRES_ARM_NEON_DOT;
2907 for (size_t k = 1; k <= 40; k += 9) {
2908 GemmMicrokernelTester()
2909 .mr(2)
2910 .nr(16)
2911 .kr(4)
2912 .sr(1)
2913 .m(2)
2914 .n(16)
2915 .k(k)
2916 .ks(3)
2917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2918 }
2919 }
2920
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,small_kernel_subtile)2921 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, small_kernel_subtile) {
2922 TEST_REQUIRES_ARM_NEON_DOT;
2923 for (size_t k = 1; k <= 40; k += 9) {
2924 for (uint32_t n = 1; n <= 16; n++) {
2925 for (uint32_t m = 1; m <= 2; m++) {
2926 GemmMicrokernelTester()
2927 .mr(2)
2928 .nr(16)
2929 .kr(4)
2930 .sr(1)
2931 .m(m)
2932 .n(n)
2933 .k(k)
2934 .ks(3)
2935 .iterations(1)
2936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2937 }
2938 }
2939 }
2940 }
2941
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_small_kernel)2942 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_small_kernel) {
2943 TEST_REQUIRES_ARM_NEON_DOT;
2944 for (uint32_t n = 17; n < 32; n++) {
2945 for (size_t k = 1; k <= 40; k += 9) {
2946 GemmMicrokernelTester()
2947 .mr(2)
2948 .nr(16)
2949 .kr(4)
2950 .sr(1)
2951 .m(2)
2952 .n(n)
2953 .k(k)
2954 .ks(3)
2955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2956 }
2957 }
2958 }
2959
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_small_kernel)2960 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_small_kernel) {
2961 TEST_REQUIRES_ARM_NEON_DOT;
2962 for (uint32_t n = 32; n <= 48; n += 16) {
2963 for (size_t k = 1; k <= 40; k += 9) {
2964 GemmMicrokernelTester()
2965 .mr(2)
2966 .nr(16)
2967 .kr(4)
2968 .sr(1)
2969 .m(2)
2970 .n(n)
2971 .k(k)
2972 .ks(3)
2973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2974 }
2975 }
2976 }
2977
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cm_subtile)2978 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cm_subtile) {
2979 TEST_REQUIRES_ARM_NEON_DOT;
2980 for (size_t k = 1; k <= 40; k += 9) {
2981 for (uint32_t n = 1; n <= 16; n++) {
2982 for (uint32_t m = 1; m <= 2; m++) {
2983 GemmMicrokernelTester()
2984 .mr(2)
2985 .nr(16)
2986 .kr(4)
2987 .sr(1)
2988 .m(m)
2989 .n(n)
2990 .k(k)
2991 .cm_stride(19)
2992 .iterations(1)
2993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2994 }
2995 }
2996 }
2997 }
2998
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,a_offset)2999 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, a_offset) {
3000 TEST_REQUIRES_ARM_NEON_DOT;
3001 for (size_t k = 1; k <= 40; k += 9) {
3002 GemmMicrokernelTester()
3003 .mr(2)
3004 .nr(16)
3005 .kr(4)
3006 .sr(1)
3007 .m(2)
3008 .n(16)
3009 .k(k)
3010 .ks(3)
3011 .a_offset(83)
3012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3013 }
3014 }
3015
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,zero)3016 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, zero) {
3017 TEST_REQUIRES_ARM_NEON_DOT;
3018 for (size_t k = 1; k <= 40; k += 9) {
3019 for (uint32_t mz = 0; mz < 2; mz++) {
3020 GemmMicrokernelTester()
3021 .mr(2)
3022 .nr(16)
3023 .kr(4)
3024 .sr(1)
3025 .m(2)
3026 .n(16)
3027 .k(k)
3028 .ks(3)
3029 .a_offset(83)
3030 .zero_index(mz)
3031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3032 }
3033 }
3034 }
3035
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,qmin)3036 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, qmin) {
3037 TEST_REQUIRES_ARM_NEON_DOT;
3038 GemmMicrokernelTester()
3039 .mr(2)
3040 .nr(16)
3041 .kr(4)
3042 .sr(1)
3043 .m(2)
3044 .n(16)
3045 .k(8)
3046 .qmin(128)
3047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3048 }
3049
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,qmax)3050 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, qmax) {
3051 TEST_REQUIRES_ARM_NEON_DOT;
3052 GemmMicrokernelTester()
3053 .mr(2)
3054 .nr(16)
3055 .kr(4)
3056 .sr(1)
3057 .m(2)
3058 .n(16)
3059 .k(8)
3060 .qmax(128)
3061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3062 }
3063
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cm)3064 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cm) {
3065 TEST_REQUIRES_ARM_NEON_DOT;
3066 GemmMicrokernelTester()
3067 .mr(2)
3068 .nr(16)
3069 .kr(4)
3070 .sr(1)
3071 .m(2)
3072 .n(16)
3073 .k(8)
3074 .cm_stride(19)
3075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3076 }
3077
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_a_zero_point)3078 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_a_zero_point) {
3079 TEST_REQUIRES_ARM_NEON_DOT;
3080 for (size_t k = 1; k <= 40; k += 9) {
3081 GemmMicrokernelTester()
3082 .mr(2)
3083 .nr(16)
3084 .kr(4)
3085 .sr(1)
3086 .m(2)
3087 .n(16)
3088 .k(k)
3089 .a_zero_point(0)
3090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3091 }
3092 }
3093
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_b_zero_point)3094 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_b_zero_point) {
3095 TEST_REQUIRES_ARM_NEON_DOT;
3096 for (size_t k = 1; k <= 40; k += 9) {
3097 GemmMicrokernelTester()
3098 .mr(2)
3099 .nr(16)
3100 .kr(4)
3101 .sr(1)
3102 .m(2)
3103 .n(16)
3104 .k(k)
3105 .b_zero_point(0)
3106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3107 }
3108 }
3109
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_zero_point)3110 TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_zero_point) {
3111 TEST_REQUIRES_ARM_NEON_DOT;
3112 for (size_t k = 1; k <= 40; k += 9) {
3113 GemmMicrokernelTester()
3114 .mr(2)
3115 .nr(16)
3116 .kr(4)
3117 .sr(1)
3118 .m(2)
3119 .n(16)
3120 .k(k)
3121 .a_zero_point(0)
3122 .b_zero_point(0)
3123 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3124 }
3125 }
3126 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
3127
3128
3129 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8)3130 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8) {
3131 TEST_REQUIRES_ARM_NEON;
3132 GemmMicrokernelTester()
3133 .mr(4)
3134 .nr(8)
3135 .kr(1)
3136 .sr(1)
3137 .m(4)
3138 .n(8)
3139 .k(8)
3140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3141 }
3142
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cn)3143 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cn) {
3144 TEST_REQUIRES_ARM_NEON;
3145 GemmMicrokernelTester()
3146 .mr(4)
3147 .nr(8)
3148 .kr(1)
3149 .sr(1)
3150 .m(4)
3151 .n(8)
3152 .k(8)
3153 .cn_stride(11)
3154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3155 }
3156
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile)3157 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
3158 TEST_REQUIRES_ARM_NEON;
3159 for (uint32_t n = 1; n <= 8; n++) {
3160 for (uint32_t m = 1; m <= 4; m++) {
3161 GemmMicrokernelTester()
3162 .mr(4)
3163 .nr(8)
3164 .kr(1)
3165 .sr(1)
3166 .m(m)
3167 .n(n)
3168 .k(8)
3169 .iterations(1)
3170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3171 }
3172 }
3173 }
3174
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)3175 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
3176 TEST_REQUIRES_ARM_NEON;
3177 for (uint32_t m = 1; m <= 4; m++) {
3178 GemmMicrokernelTester()
3179 .mr(4)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(8)
3185 .k(8)
3186 .iterations(1)
3187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3188 }
3189 }
3190
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)3191 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
3192 TEST_REQUIRES_ARM_NEON;
3193 for (uint32_t n = 1; n <= 8; n++) {
3194 GemmMicrokernelTester()
3195 .mr(4)
3196 .nr(8)
3197 .kr(1)
3198 .sr(1)
3199 .m(4)
3200 .n(n)
3201 .k(8)
3202 .iterations(1)
3203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3204 }
3205 }
3206
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8)3207 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8) {
3208 TEST_REQUIRES_ARM_NEON;
3209 for (size_t k = 1; k < 8; k++) {
3210 GemmMicrokernelTester()
3211 .mr(4)
3212 .nr(8)
3213 .kr(1)
3214 .sr(1)
3215 .m(4)
3216 .n(8)
3217 .k(k)
3218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3219 }
3220 }
3221
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_subtile)3222 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
3223 TEST_REQUIRES_ARM_NEON;
3224 for (size_t k = 1; k < 8; k++) {
3225 for (uint32_t n = 1; n <= 8; n++) {
3226 for (uint32_t m = 1; m <= 4; m++) {
3227 GemmMicrokernelTester()
3228 .mr(4)
3229 .nr(8)
3230 .kr(1)
3231 .sr(1)
3232 .m(m)
3233 .n(n)
3234 .k(k)
3235 .iterations(1)
3236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3237 }
3238 }
3239 }
3240 }
3241
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8)3242 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8) {
3243 TEST_REQUIRES_ARM_NEON;
3244 for (size_t k = 9; k < 16; k++) {
3245 GemmMicrokernelTester()
3246 .mr(4)
3247 .nr(8)
3248 .kr(1)
3249 .sr(1)
3250 .m(4)
3251 .n(8)
3252 .k(k)
3253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3254 }
3255 }
3256
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_subtile)3257 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
3258 TEST_REQUIRES_ARM_NEON;
3259 for (size_t k = 9; k < 16; k++) {
3260 for (uint32_t n = 1; n <= 8; n++) {
3261 for (uint32_t m = 1; m <= 4; m++) {
3262 GemmMicrokernelTester()
3263 .mr(4)
3264 .nr(8)
3265 .kr(1)
3266 .sr(1)
3267 .m(m)
3268 .n(n)
3269 .k(k)
3270 .iterations(1)
3271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3272 }
3273 }
3274 }
3275 }
3276
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8)3277 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8) {
3278 TEST_REQUIRES_ARM_NEON;
3279 for (size_t k = 16; k <= 80; k += 8) {
3280 GemmMicrokernelTester()
3281 .mr(4)
3282 .nr(8)
3283 .kr(1)
3284 .sr(1)
3285 .m(4)
3286 .n(8)
3287 .k(k)
3288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3289 }
3290 }
3291
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_subtile)3292 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
3293 TEST_REQUIRES_ARM_NEON;
3294 for (size_t k = 16; k <= 80; k += 8) {
3295 for (uint32_t n = 1; n <= 8; n++) {
3296 for (uint32_t m = 1; m <= 4; m++) {
3297 GemmMicrokernelTester()
3298 .mr(4)
3299 .nr(8)
3300 .kr(1)
3301 .sr(1)
3302 .m(m)
3303 .n(n)
3304 .k(k)
3305 .iterations(1)
3306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3307 }
3308 }
3309 }
3310 }
3311
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8)3312 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8) {
3313 TEST_REQUIRES_ARM_NEON;
3314 for (uint32_t n = 9; n < 16; n++) {
3315 for (size_t k = 1; k <= 40; k += 9) {
3316 GemmMicrokernelTester()
3317 .mr(4)
3318 .nr(8)
3319 .kr(1)
3320 .sr(1)
3321 .m(4)
3322 .n(n)
3323 .k(k)
3324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3325 }
3326 }
3327 }
3328
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)3329 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
3330 TEST_REQUIRES_ARM_NEON;
3331 for (uint32_t n = 9; n < 16; n++) {
3332 for (size_t k = 1; k <= 40; k += 9) {
3333 GemmMicrokernelTester()
3334 .mr(4)
3335 .nr(8)
3336 .kr(1)
3337 .sr(1)
3338 .m(4)
3339 .n(n)
3340 .k(k)
3341 .cn_stride(11)
3342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3343 }
3344 }
3345 }
3346
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_subtile)3347 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
3348 TEST_REQUIRES_ARM_NEON;
3349 for (uint32_t n = 9; n < 16; n++) {
3350 for (size_t k = 1; k <= 40; k += 9) {
3351 for (uint32_t m = 1; m <= 4; m++) {
3352 GemmMicrokernelTester()
3353 .mr(4)
3354 .nr(8)
3355 .kr(1)
3356 .sr(1)
3357 .m(m)
3358 .n(n)
3359 .k(k)
3360 .iterations(1)
3361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3362 }
3363 }
3364 }
3365 }
3366
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8)3367 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8) {
3368 TEST_REQUIRES_ARM_NEON;
3369 for (uint32_t n = 16; n <= 24; n += 8) {
3370 for (size_t k = 1; k <= 40; k += 9) {
3371 GemmMicrokernelTester()
3372 .mr(4)
3373 .nr(8)
3374 .kr(1)
3375 .sr(1)
3376 .m(4)
3377 .n(n)
3378 .k(k)
3379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3380 }
3381 }
3382 }
3383
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)3384 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
3385 TEST_REQUIRES_ARM_NEON;
3386 for (uint32_t n = 16; n <= 24; n += 8) {
3387 for (size_t k = 1; k <= 40; k += 9) {
3388 GemmMicrokernelTester()
3389 .mr(4)
3390 .nr(8)
3391 .kr(1)
3392 .sr(1)
3393 .m(4)
3394 .n(n)
3395 .k(k)
3396 .cn_stride(11)
3397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3398 }
3399 }
3400 }
3401
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_subtile)3402 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
3403 TEST_REQUIRES_ARM_NEON;
3404 for (uint32_t n = 16; n <= 24; n += 8) {
3405 for (size_t k = 1; k <= 40; k += 9) {
3406 for (uint32_t m = 1; m <= 4; m++) {
3407 GemmMicrokernelTester()
3408 .mr(4)
3409 .nr(8)
3410 .kr(1)
3411 .sr(1)
3412 .m(m)
3413 .n(n)
3414 .k(k)
3415 .iterations(1)
3416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3417 }
3418 }
3419 }
3420 }
3421
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel)3422 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel) {
3423 TEST_REQUIRES_ARM_NEON;
3424 for (size_t k = 1; k <= 40; k += 9) {
3425 GemmMicrokernelTester()
3426 .mr(4)
3427 .nr(8)
3428 .kr(1)
3429 .sr(1)
3430 .m(4)
3431 .n(8)
3432 .k(k)
3433 .ks(3)
3434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3435 }
3436 }
3437
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel_subtile)3438 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
3439 TEST_REQUIRES_ARM_NEON;
3440 for (size_t k = 1; k <= 40; k += 9) {
3441 for (uint32_t n = 1; n <= 8; n++) {
3442 for (uint32_t m = 1; m <= 4; m++) {
3443 GemmMicrokernelTester()
3444 .mr(4)
3445 .nr(8)
3446 .kr(1)
3447 .sr(1)
3448 .m(m)
3449 .n(n)
3450 .k(k)
3451 .ks(3)
3452 .iterations(1)
3453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3454 }
3455 }
3456 }
3457 }
3458
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)3459 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
3460 TEST_REQUIRES_ARM_NEON;
3461 for (uint32_t n = 9; n < 16; n++) {
3462 for (size_t k = 1; k <= 40; k += 9) {
3463 GemmMicrokernelTester()
3464 .mr(4)
3465 .nr(8)
3466 .kr(1)
3467 .sr(1)
3468 .m(4)
3469 .n(n)
3470 .k(k)
3471 .ks(3)
3472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3473 }
3474 }
3475 }
3476
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)3477 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
3478 TEST_REQUIRES_ARM_NEON;
3479 for (uint32_t n = 16; n <= 24; n += 8) {
3480 for (size_t k = 1; k <= 40; k += 9) {
3481 GemmMicrokernelTester()
3482 .mr(4)
3483 .nr(8)
3484 .kr(1)
3485 .sr(1)
3486 .m(4)
3487 .n(n)
3488 .k(k)
3489 .ks(3)
3490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3491 }
3492 }
3493 }
3494
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm_subtile)3495 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
3496 TEST_REQUIRES_ARM_NEON;
3497 for (size_t k = 1; k <= 40; k += 9) {
3498 for (uint32_t n = 1; n <= 8; n++) {
3499 for (uint32_t m = 1; m <= 4; m++) {
3500 GemmMicrokernelTester()
3501 .mr(4)
3502 .nr(8)
3503 .kr(1)
3504 .sr(1)
3505 .m(m)
3506 .n(n)
3507 .k(k)
3508 .cm_stride(11)
3509 .iterations(1)
3510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3511 }
3512 }
3513 }
3514 }
3515
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,a_offset)3516 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, a_offset) {
3517 TEST_REQUIRES_ARM_NEON;
3518 for (size_t k = 1; k <= 40; k += 9) {
3519 GemmMicrokernelTester()
3520 .mr(4)
3521 .nr(8)
3522 .kr(1)
3523 .sr(1)
3524 .m(4)
3525 .n(8)
3526 .k(k)
3527 .ks(3)
3528 .a_offset(163)
3529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3530 }
3531 }
3532
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,zero)3533 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, zero) {
3534 TEST_REQUIRES_ARM_NEON;
3535 for (size_t k = 1; k <= 40; k += 9) {
3536 for (uint32_t mz = 0; mz < 4; mz++) {
3537 GemmMicrokernelTester()
3538 .mr(4)
3539 .nr(8)
3540 .kr(1)
3541 .sr(1)
3542 .m(4)
3543 .n(8)
3544 .k(k)
3545 .ks(3)
3546 .a_offset(163)
3547 .zero_index(mz)
3548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3549 }
3550 }
3551 }
3552
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmin)3553 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmin) {
3554 TEST_REQUIRES_ARM_NEON;
3555 GemmMicrokernelTester()
3556 .mr(4)
3557 .nr(8)
3558 .kr(1)
3559 .sr(1)
3560 .m(4)
3561 .n(8)
3562 .k(8)
3563 .qmin(128)
3564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3565 }
3566
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmax)3567 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmax) {
3568 TEST_REQUIRES_ARM_NEON;
3569 GemmMicrokernelTester()
3570 .mr(4)
3571 .nr(8)
3572 .kr(1)
3573 .sr(1)
3574 .m(4)
3575 .n(8)
3576 .k(8)
3577 .qmax(128)
3578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3579 }
3580
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm)3581 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm) {
3582 TEST_REQUIRES_ARM_NEON;
3583 GemmMicrokernelTester()
3584 .mr(4)
3585 .nr(8)
3586 .kr(1)
3587 .sr(1)
3588 .m(4)
3589 .n(8)
3590 .k(8)
3591 .cm_stride(11)
3592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3593 }
3594
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_a_zero_point)3595 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_a_zero_point) {
3596 TEST_REQUIRES_ARM_NEON;
3597 for (size_t k = 1; k <= 40; k += 9) {
3598 GemmMicrokernelTester()
3599 .mr(4)
3600 .nr(8)
3601 .kr(1)
3602 .sr(1)
3603 .m(4)
3604 .n(8)
3605 .k(k)
3606 .a_zero_point(0)
3607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3608 }
3609 }
3610
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_b_zero_point)3611 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_b_zero_point) {
3612 TEST_REQUIRES_ARM_NEON;
3613 for (size_t k = 1; k <= 40; k += 9) {
3614 GemmMicrokernelTester()
3615 .mr(4)
3616 .nr(8)
3617 .kr(1)
3618 .sr(1)
3619 .m(4)
3620 .n(8)
3621 .k(k)
3622 .b_zero_point(0)
3623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3624 }
3625 }
3626
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_zero_point)3627 TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_zero_point) {
3628 TEST_REQUIRES_ARM_NEON;
3629 for (size_t k = 1; k <= 40; k += 9) {
3630 GemmMicrokernelTester()
3631 .mr(4)
3632 .nr(8)
3633 .kr(1)
3634 .sr(1)
3635 .m(4)
3636 .n(8)
3637 .k(k)
3638 .a_zero_point(0)
3639 .b_zero_point(0)
3640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3641 }
3642 }
3643 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3644
3645
3646 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8)3647 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8) {
3648 TEST_REQUIRES_ARM_NEON_DOT;
3649 GemmMicrokernelTester()
3650 .mr(4)
3651 .nr(16)
3652 .kr(4)
3653 .sr(1)
3654 .m(4)
3655 .n(16)
3656 .k(8)
3657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3658 }
3659
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cn)3660 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cn) {
3661 TEST_REQUIRES_ARM_NEON_DOT;
3662 GemmMicrokernelTester()
3663 .mr(4)
3664 .nr(16)
3665 .kr(4)
3666 .sr(1)
3667 .m(4)
3668 .n(16)
3669 .k(8)
3670 .cn_stride(19)
3671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3672 }
3673
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile)3674 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile) {
3675 TEST_REQUIRES_ARM_NEON_DOT;
3676 for (uint32_t n = 1; n <= 16; n++) {
3677 for (uint32_t m = 1; m <= 4; m++) {
3678 GemmMicrokernelTester()
3679 .mr(4)
3680 .nr(16)
3681 .kr(4)
3682 .sr(1)
3683 .m(m)
3684 .n(n)
3685 .k(8)
3686 .iterations(1)
3687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3688 }
3689 }
3690 }
3691
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_m)3692 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_m) {
3693 TEST_REQUIRES_ARM_NEON_DOT;
3694 for (uint32_t m = 1; m <= 4; m++) {
3695 GemmMicrokernelTester()
3696 .mr(4)
3697 .nr(16)
3698 .kr(4)
3699 .sr(1)
3700 .m(m)
3701 .n(16)
3702 .k(8)
3703 .iterations(1)
3704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3705 }
3706 }
3707
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_n)3708 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_n) {
3709 TEST_REQUIRES_ARM_NEON_DOT;
3710 for (uint32_t n = 1; n <= 16; n++) {
3711 GemmMicrokernelTester()
3712 .mr(4)
3713 .nr(16)
3714 .kr(4)
3715 .sr(1)
3716 .m(4)
3717 .n(n)
3718 .k(8)
3719 .iterations(1)
3720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3721 }
3722 }
3723
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8)3724 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8) {
3725 TEST_REQUIRES_ARM_NEON_DOT;
3726 for (size_t k = 1; k < 8; k++) {
3727 GemmMicrokernelTester()
3728 .mr(4)
3729 .nr(16)
3730 .kr(4)
3731 .sr(1)
3732 .m(4)
3733 .n(16)
3734 .k(k)
3735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3736 }
3737 }
3738
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_subtile)3739 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_subtile) {
3740 TEST_REQUIRES_ARM_NEON_DOT;
3741 for (size_t k = 1; k < 8; k++) {
3742 for (uint32_t n = 1; n <= 16; n++) {
3743 for (uint32_t m = 1; m <= 4; m++) {
3744 GemmMicrokernelTester()
3745 .mr(4)
3746 .nr(16)
3747 .kr(4)
3748 .sr(1)
3749 .m(m)
3750 .n(n)
3751 .k(k)
3752 .iterations(1)
3753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3754 }
3755 }
3756 }
3757 }
3758
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8)3759 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8) {
3760 TEST_REQUIRES_ARM_NEON_DOT;
3761 for (size_t k = 9; k < 16; k++) {
3762 GemmMicrokernelTester()
3763 .mr(4)
3764 .nr(16)
3765 .kr(4)
3766 .sr(1)
3767 .m(4)
3768 .n(16)
3769 .k(k)
3770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3771 }
3772 }
3773
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_subtile)3774 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_subtile) {
3775 TEST_REQUIRES_ARM_NEON_DOT;
3776 for (size_t k = 9; k < 16; k++) {
3777 for (uint32_t n = 1; n <= 16; n++) {
3778 for (uint32_t m = 1; m <= 4; m++) {
3779 GemmMicrokernelTester()
3780 .mr(4)
3781 .nr(16)
3782 .kr(4)
3783 .sr(1)
3784 .m(m)
3785 .n(n)
3786 .k(k)
3787 .iterations(1)
3788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3789 }
3790 }
3791 }
3792 }
3793
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8)3794 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8) {
3795 TEST_REQUIRES_ARM_NEON_DOT;
3796 for (size_t k = 16; k <= 80; k += 8) {
3797 GemmMicrokernelTester()
3798 .mr(4)
3799 .nr(16)
3800 .kr(4)
3801 .sr(1)
3802 .m(4)
3803 .n(16)
3804 .k(k)
3805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3806 }
3807 }
3808
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_subtile)3809 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_subtile) {
3810 TEST_REQUIRES_ARM_NEON_DOT;
3811 for (size_t k = 16; k <= 80; k += 8) {
3812 for (uint32_t n = 1; n <= 16; n++) {
3813 for (uint32_t m = 1; m <= 4; m++) {
3814 GemmMicrokernelTester()
3815 .mr(4)
3816 .nr(16)
3817 .kr(4)
3818 .sr(1)
3819 .m(m)
3820 .n(n)
3821 .k(k)
3822 .iterations(1)
3823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3824 }
3825 }
3826 }
3827 }
3828
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16)3829 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16) {
3830 TEST_REQUIRES_ARM_NEON_DOT;
3831 for (uint32_t n = 17; n < 32; n++) {
3832 for (size_t k = 1; k <= 40; k += 9) {
3833 GemmMicrokernelTester()
3834 .mr(4)
3835 .nr(16)
3836 .kr(4)
3837 .sr(1)
3838 .m(4)
3839 .n(n)
3840 .k(k)
3841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3842 }
3843 }
3844 }
3845
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_cn)3846 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_cn) {
3847 TEST_REQUIRES_ARM_NEON_DOT;
3848 for (uint32_t n = 17; n < 32; n++) {
3849 for (size_t k = 1; k <= 40; k += 9) {
3850 GemmMicrokernelTester()
3851 .mr(4)
3852 .nr(16)
3853 .kr(4)
3854 .sr(1)
3855 .m(4)
3856 .n(n)
3857 .k(k)
3858 .cn_stride(19)
3859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3860 }
3861 }
3862 }
3863
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_subtile)3864 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_subtile) {
3865 TEST_REQUIRES_ARM_NEON_DOT;
3866 for (uint32_t n = 17; n < 32; n++) {
3867 for (size_t k = 1; k <= 40; k += 9) {
3868 for (uint32_t m = 1; m <= 4; m++) {
3869 GemmMicrokernelTester()
3870 .mr(4)
3871 .nr(16)
3872 .kr(4)
3873 .sr(1)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16)3884 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16) {
3885 TEST_REQUIRES_ARM_NEON_DOT;
3886 for (uint32_t n = 32; n <= 48; n += 16) {
3887 for (size_t k = 1; k <= 40; k += 9) {
3888 GemmMicrokernelTester()
3889 .mr(4)
3890 .nr(16)
3891 .kr(4)
3892 .sr(1)
3893 .m(4)
3894 .n(n)
3895 .k(k)
3896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3897 }
3898 }
3899 }
3900
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_cn)3901 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_cn) {
3902 TEST_REQUIRES_ARM_NEON_DOT;
3903 for (uint32_t n = 32; n <= 48; n += 16) {
3904 for (size_t k = 1; k <= 40; k += 9) {
3905 GemmMicrokernelTester()
3906 .mr(4)
3907 .nr(16)
3908 .kr(4)
3909 .sr(1)
3910 .m(4)
3911 .n(n)
3912 .k(k)
3913 .cn_stride(19)
3914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3915 }
3916 }
3917 }
3918
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_subtile)3919 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_subtile) {
3920 TEST_REQUIRES_ARM_NEON_DOT;
3921 for (uint32_t n = 32; n <= 48; n += 16) {
3922 for (size_t k = 1; k <= 40; k += 9) {
3923 for (uint32_t m = 1; m <= 4; m++) {
3924 GemmMicrokernelTester()
3925 .mr(4)
3926 .nr(16)
3927 .kr(4)
3928 .sr(1)
3929 .m(m)
3930 .n(n)
3931 .k(k)
3932 .iterations(1)
3933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3934 }
3935 }
3936 }
3937 }
3938
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel)3939 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel) {
3940 TEST_REQUIRES_ARM_NEON_DOT;
3941 for (size_t k = 1; k <= 40; k += 9) {
3942 GemmMicrokernelTester()
3943 .mr(4)
3944 .nr(16)
3945 .kr(4)
3946 .sr(1)
3947 .m(4)
3948 .n(16)
3949 .k(k)
3950 .ks(3)
3951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3952 }
3953 }
3954
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel_subtile)3955 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel_subtile) {
3956 TEST_REQUIRES_ARM_NEON_DOT;
3957 for (size_t k = 1; k <= 40; k += 9) {
3958 for (uint32_t n = 1; n <= 16; n++) {
3959 for (uint32_t m = 1; m <= 4; m++) {
3960 GemmMicrokernelTester()
3961 .mr(4)
3962 .nr(16)
3963 .kr(4)
3964 .sr(1)
3965 .m(m)
3966 .n(n)
3967 .k(k)
3968 .ks(3)
3969 .iterations(1)
3970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3971 }
3972 }
3973 }
3974 }
3975
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_small_kernel)3976 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_small_kernel) {
3977 TEST_REQUIRES_ARM_NEON_DOT;
3978 for (uint32_t n = 17; n < 32; n++) {
3979 for (size_t k = 1; k <= 40; k += 9) {
3980 GemmMicrokernelTester()
3981 .mr(4)
3982 .nr(16)
3983 .kr(4)
3984 .sr(1)
3985 .m(4)
3986 .n(n)
3987 .k(k)
3988 .ks(3)
3989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3990 }
3991 }
3992 }
3993
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_small_kernel)3994 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_small_kernel) {
3995 TEST_REQUIRES_ARM_NEON_DOT;
3996 for (uint32_t n = 32; n <= 48; n += 16) {
3997 for (size_t k = 1; k <= 40; k += 9) {
3998 GemmMicrokernelTester()
3999 .mr(4)
4000 .nr(16)
4001 .kr(4)
4002 .sr(1)
4003 .m(4)
4004 .n(n)
4005 .k(k)
4006 .ks(3)
4007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4008 }
4009 }
4010 }
4011
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm_subtile)4012 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm_subtile) {
4013 TEST_REQUIRES_ARM_NEON_DOT;
4014 for (size_t k = 1; k <= 40; k += 9) {
4015 for (uint32_t n = 1; n <= 16; n++) {
4016 for (uint32_t m = 1; m <= 4; m++) {
4017 GemmMicrokernelTester()
4018 .mr(4)
4019 .nr(16)
4020 .kr(4)
4021 .sr(1)
4022 .m(m)
4023 .n(n)
4024 .k(k)
4025 .cm_stride(19)
4026 .iterations(1)
4027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4028 }
4029 }
4030 }
4031 }
4032
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,a_offset)4033 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, a_offset) {
4034 TEST_REQUIRES_ARM_NEON_DOT;
4035 for (size_t k = 1; k <= 40; k += 9) {
4036 GemmMicrokernelTester()
4037 .mr(4)
4038 .nr(16)
4039 .kr(4)
4040 .sr(1)
4041 .m(4)
4042 .n(16)
4043 .k(k)
4044 .ks(3)
4045 .a_offset(163)
4046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4047 }
4048 }
4049
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,zero)4050 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, zero) {
4051 TEST_REQUIRES_ARM_NEON_DOT;
4052 for (size_t k = 1; k <= 40; k += 9) {
4053 for (uint32_t mz = 0; mz < 4; mz++) {
4054 GemmMicrokernelTester()
4055 .mr(4)
4056 .nr(16)
4057 .kr(4)
4058 .sr(1)
4059 .m(4)
4060 .n(16)
4061 .k(k)
4062 .ks(3)
4063 .a_offset(163)
4064 .zero_index(mz)
4065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4066 }
4067 }
4068 }
4069
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmin)4070 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmin) {
4071 TEST_REQUIRES_ARM_NEON_DOT;
4072 GemmMicrokernelTester()
4073 .mr(4)
4074 .nr(16)
4075 .kr(4)
4076 .sr(1)
4077 .m(4)
4078 .n(16)
4079 .k(8)
4080 .qmin(128)
4081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4082 }
4083
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmax)4084 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmax) {
4085 TEST_REQUIRES_ARM_NEON_DOT;
4086 GemmMicrokernelTester()
4087 .mr(4)
4088 .nr(16)
4089 .kr(4)
4090 .sr(1)
4091 .m(4)
4092 .n(16)
4093 .k(8)
4094 .qmax(128)
4095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4096 }
4097
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm)4098 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm) {
4099 TEST_REQUIRES_ARM_NEON_DOT;
4100 GemmMicrokernelTester()
4101 .mr(4)
4102 .nr(16)
4103 .kr(4)
4104 .sr(1)
4105 .m(4)
4106 .n(16)
4107 .k(8)
4108 .cm_stride(19)
4109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4110 }
4111
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_a_zero_point)4112 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_a_zero_point) {
4113 TEST_REQUIRES_ARM_NEON_DOT;
4114 for (size_t k = 1; k <= 40; k += 9) {
4115 GemmMicrokernelTester()
4116 .mr(4)
4117 .nr(16)
4118 .kr(4)
4119 .sr(1)
4120 .m(4)
4121 .n(16)
4122 .k(k)
4123 .a_zero_point(0)
4124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4125 }
4126 }
4127
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_b_zero_point)4128 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_b_zero_point) {
4129 TEST_REQUIRES_ARM_NEON_DOT;
4130 for (size_t k = 1; k <= 40; k += 9) {
4131 GemmMicrokernelTester()
4132 .mr(4)
4133 .nr(16)
4134 .kr(4)
4135 .sr(1)
4136 .m(4)
4137 .n(16)
4138 .k(k)
4139 .b_zero_point(0)
4140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4141 }
4142 }
4143
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_zero_point)4144 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_zero_point) {
4145 TEST_REQUIRES_ARM_NEON_DOT;
4146 for (size_t k = 1; k <= 40; k += 9) {
4147 GemmMicrokernelTester()
4148 .mr(4)
4149 .nr(16)
4150 .kr(4)
4151 .sr(1)
4152 .m(4)
4153 .n(16)
4154 .k(k)
4155 .a_zero_point(0)
4156 .b_zero_point(0)
4157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4158 }
4159 }
4160 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
4161
4162
4163 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)4164 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
4165 TEST_REQUIRES_X86_SSE2;
4166 GemmMicrokernelTester()
4167 .mr(1)
4168 .nr(4)
4169 .kr(2)
4170 .sr(1)
4171 .m(1)
4172 .n(4)
4173 .k(8)
4174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4175 }
4176
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)4177 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
4178 TEST_REQUIRES_X86_SSE2;
4179 GemmMicrokernelTester()
4180 .mr(1)
4181 .nr(4)
4182 .kr(2)
4183 .sr(1)
4184 .m(1)
4185 .n(4)
4186 .k(8)
4187 .cn_stride(7)
4188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4189 }
4190
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)4191 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
4192 TEST_REQUIRES_X86_SSE2;
4193 for (uint32_t n = 1; n <= 4; n++) {
4194 for (uint32_t m = 1; m <= 1; m++) {
4195 GemmMicrokernelTester()
4196 .mr(1)
4197 .nr(4)
4198 .kr(2)
4199 .sr(1)
4200 .m(m)
4201 .n(n)
4202 .k(8)
4203 .iterations(1)
4204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4205 }
4206 }
4207 }
4208
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)4209 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4210 TEST_REQUIRES_X86_SSE2;
4211 for (uint32_t m = 1; m <= 1; m++) {
4212 GemmMicrokernelTester()
4213 .mr(1)
4214 .nr(4)
4215 .kr(2)
4216 .sr(1)
4217 .m(m)
4218 .n(4)
4219 .k(8)
4220 .iterations(1)
4221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4222 }
4223 }
4224
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)4225 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4226 TEST_REQUIRES_X86_SSE2;
4227 for (uint32_t n = 1; n <= 4; n++) {
4228 GemmMicrokernelTester()
4229 .mr(1)
4230 .nr(4)
4231 .kr(2)
4232 .sr(1)
4233 .m(1)
4234 .n(n)
4235 .k(8)
4236 .iterations(1)
4237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4238 }
4239 }
4240
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)4241 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
4242 TEST_REQUIRES_X86_SSE2;
4243 for (size_t k = 1; k < 8; k++) {
4244 GemmMicrokernelTester()
4245 .mr(1)
4246 .nr(4)
4247 .kr(2)
4248 .sr(1)
4249 .m(1)
4250 .n(4)
4251 .k(k)
4252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4253 }
4254 }
4255
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)4256 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
4257 TEST_REQUIRES_X86_SSE2;
4258 for (size_t k = 1; k < 8; k++) {
4259 for (uint32_t n = 1; n <= 4; n++) {
4260 for (uint32_t m = 1; m <= 1; m++) {
4261 GemmMicrokernelTester()
4262 .mr(1)
4263 .nr(4)
4264 .kr(2)
4265 .sr(1)
4266 .m(m)
4267 .n(n)
4268 .k(k)
4269 .iterations(1)
4270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4271 }
4272 }
4273 }
4274 }
4275
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)4276 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
4277 TEST_REQUIRES_X86_SSE2;
4278 for (size_t k = 9; k < 16; k++) {
4279 GemmMicrokernelTester()
4280 .mr(1)
4281 .nr(4)
4282 .kr(2)
4283 .sr(1)
4284 .m(1)
4285 .n(4)
4286 .k(k)
4287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4288 }
4289 }
4290
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)4291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
4292 TEST_REQUIRES_X86_SSE2;
4293 for (size_t k = 9; k < 16; k++) {
4294 for (uint32_t n = 1; n <= 4; n++) {
4295 for (uint32_t m = 1; m <= 1; m++) {
4296 GemmMicrokernelTester()
4297 .mr(1)
4298 .nr(4)
4299 .kr(2)
4300 .sr(1)
4301 .m(m)
4302 .n(n)
4303 .k(k)
4304 .iterations(1)
4305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4306 }
4307 }
4308 }
4309 }
4310
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)4311 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
4312 TEST_REQUIRES_X86_SSE2;
4313 for (size_t k = 16; k <= 80; k += 8) {
4314 GemmMicrokernelTester()
4315 .mr(1)
4316 .nr(4)
4317 .kr(2)
4318 .sr(1)
4319 .m(1)
4320 .n(4)
4321 .k(k)
4322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4323 }
4324 }
4325
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)4326 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
4327 TEST_REQUIRES_X86_SSE2;
4328 for (size_t k = 16; k <= 80; k += 8) {
4329 for (uint32_t n = 1; n <= 4; n++) {
4330 for (uint32_t m = 1; m <= 1; m++) {
4331 GemmMicrokernelTester()
4332 .mr(1)
4333 .nr(4)
4334 .kr(2)
4335 .sr(1)
4336 .m(m)
4337 .n(n)
4338 .k(k)
4339 .iterations(1)
4340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4341 }
4342 }
4343 }
4344 }
4345
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)4346 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
4347 TEST_REQUIRES_X86_SSE2;
4348 for (uint32_t n = 5; n < 8; n++) {
4349 for (size_t k = 1; k <= 40; k += 9) {
4350 GemmMicrokernelTester()
4351 .mr(1)
4352 .nr(4)
4353 .kr(2)
4354 .sr(1)
4355 .m(1)
4356 .n(n)
4357 .k(k)
4358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4359 }
4360 }
4361 }
4362
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)4363 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4364 TEST_REQUIRES_X86_SSE2;
4365 for (uint32_t n = 5; n < 8; n++) {
4366 for (size_t k = 1; k <= 40; k += 9) {
4367 GemmMicrokernelTester()
4368 .mr(1)
4369 .nr(4)
4370 .kr(2)
4371 .sr(1)
4372 .m(1)
4373 .n(n)
4374 .k(k)
4375 .cn_stride(7)
4376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4377 }
4378 }
4379 }
4380
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)4381 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
4382 TEST_REQUIRES_X86_SSE2;
4383 for (uint32_t n = 5; n < 8; n++) {
4384 for (size_t k = 1; k <= 40; k += 9) {
4385 for (uint32_t m = 1; m <= 1; m++) {
4386 GemmMicrokernelTester()
4387 .mr(1)
4388 .nr(4)
4389 .kr(2)
4390 .sr(1)
4391 .m(m)
4392 .n(n)
4393 .k(k)
4394 .iterations(1)
4395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4396 }
4397 }
4398 }
4399 }
4400
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)4401 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
4402 TEST_REQUIRES_X86_SSE2;
4403 for (uint32_t n = 8; n <= 12; n += 4) {
4404 for (size_t k = 1; k <= 40; k += 9) {
4405 GemmMicrokernelTester()
4406 .mr(1)
4407 .nr(4)
4408 .kr(2)
4409 .sr(1)
4410 .m(1)
4411 .n(n)
4412 .k(k)
4413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4414 }
4415 }
4416 }
4417
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)4418 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
4419 TEST_REQUIRES_X86_SSE2;
4420 for (uint32_t n = 8; n <= 12; n += 4) {
4421 for (size_t k = 1; k <= 40; k += 9) {
4422 GemmMicrokernelTester()
4423 .mr(1)
4424 .nr(4)
4425 .kr(2)
4426 .sr(1)
4427 .m(1)
4428 .n(n)
4429 .k(k)
4430 .cn_stride(7)
4431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4432 }
4433 }
4434 }
4435
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)4436 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
4437 TEST_REQUIRES_X86_SSE2;
4438 for (uint32_t n = 8; n <= 12; n += 4) {
4439 for (size_t k = 1; k <= 40; k += 9) {
4440 for (uint32_t m = 1; m <= 1; m++) {
4441 GemmMicrokernelTester()
4442 .mr(1)
4443 .nr(4)
4444 .kr(2)
4445 .sr(1)
4446 .m(m)
4447 .n(n)
4448 .k(k)
4449 .iterations(1)
4450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4451 }
4452 }
4453 }
4454 }
4455
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel)4456 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
4457 TEST_REQUIRES_X86_SSE2;
4458 for (size_t k = 1; k <= 40; k += 9) {
4459 GemmMicrokernelTester()
4460 .mr(1)
4461 .nr(4)
4462 .kr(2)
4463 .sr(1)
4464 .m(1)
4465 .n(4)
4466 .k(k)
4467 .ks(3)
4468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4469 }
4470 }
4471
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel_subtile)4472 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
4473 TEST_REQUIRES_X86_SSE2;
4474 for (size_t k = 1; k <= 40; k += 9) {
4475 for (uint32_t n = 1; n <= 4; n++) {
4476 for (uint32_t m = 1; m <= 1; m++) {
4477 GemmMicrokernelTester()
4478 .mr(1)
4479 .nr(4)
4480 .kr(2)
4481 .sr(1)
4482 .m(m)
4483 .n(n)
4484 .k(k)
4485 .ks(3)
4486 .iterations(1)
4487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4488 }
4489 }
4490 }
4491 }
4492
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_small_kernel)4493 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
4494 TEST_REQUIRES_X86_SSE2;
4495 for (uint32_t n = 5; n < 8; n++) {
4496 for (size_t k = 1; k <= 40; k += 9) {
4497 GemmMicrokernelTester()
4498 .mr(1)
4499 .nr(4)
4500 .kr(2)
4501 .sr(1)
4502 .m(1)
4503 .n(n)
4504 .k(k)
4505 .ks(3)
4506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4507 }
4508 }
4509 }
4510
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_small_kernel)4511 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
4512 TEST_REQUIRES_X86_SSE2;
4513 for (uint32_t n = 8; n <= 12; n += 4) {
4514 for (size_t k = 1; k <= 40; k += 9) {
4515 GemmMicrokernelTester()
4516 .mr(1)
4517 .nr(4)
4518 .kr(2)
4519 .sr(1)
4520 .m(1)
4521 .n(n)
4522 .k(k)
4523 .ks(3)
4524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4525 }
4526 }
4527 }
4528
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)4529 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
4530 TEST_REQUIRES_X86_SSE2;
4531 for (size_t k = 1; k <= 40; k += 9) {
4532 for (uint32_t n = 1; n <= 4; n++) {
4533 for (uint32_t m = 1; m <= 1; m++) {
4534 GemmMicrokernelTester()
4535 .mr(1)
4536 .nr(4)
4537 .kr(2)
4538 .sr(1)
4539 .m(m)
4540 .n(n)
4541 .k(k)
4542 .cm_stride(7)
4543 .iterations(1)
4544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4545 }
4546 }
4547 }
4548 }
4549
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,a_offset)4550 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
4551 TEST_REQUIRES_X86_SSE2;
4552 for (size_t k = 1; k <= 40; k += 9) {
4553 GemmMicrokernelTester()
4554 .mr(1)
4555 .nr(4)
4556 .kr(2)
4557 .sr(1)
4558 .m(1)
4559 .n(4)
4560 .k(k)
4561 .ks(3)
4562 .a_offset(43)
4563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4564 }
4565 }
4566
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,zero)4567 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
4568 TEST_REQUIRES_X86_SSE2;
4569 for (size_t k = 1; k <= 40; k += 9) {
4570 for (uint32_t mz = 0; mz < 1; mz++) {
4571 GemmMicrokernelTester()
4572 .mr(1)
4573 .nr(4)
4574 .kr(2)
4575 .sr(1)
4576 .m(1)
4577 .n(4)
4578 .k(k)
4579 .ks(3)
4580 .a_offset(43)
4581 .zero_index(mz)
4582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)4587 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
4588 TEST_REQUIRES_X86_SSE2;
4589 GemmMicrokernelTester()
4590 .mr(1)
4591 .nr(4)
4592 .kr(2)
4593 .sr(1)
4594 .m(1)
4595 .n(4)
4596 .k(8)
4597 .qmin(128)
4598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4599 }
4600
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)4601 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
4602 TEST_REQUIRES_X86_SSE2;
4603 GemmMicrokernelTester()
4604 .mr(1)
4605 .nr(4)
4606 .kr(2)
4607 .sr(1)
4608 .m(1)
4609 .n(4)
4610 .k(8)
4611 .qmax(128)
4612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4613 }
4614
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)4615 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
4616 TEST_REQUIRES_X86_SSE2;
4617 GemmMicrokernelTester()
4618 .mr(1)
4619 .nr(4)
4620 .kr(2)
4621 .sr(1)
4622 .m(1)
4623 .n(4)
4624 .k(8)
4625 .cm_stride(7)
4626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4627 }
4628
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_a_zero_point)4629 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_a_zero_point) {
4630 TEST_REQUIRES_X86_SSE2;
4631 for (size_t k = 1; k <= 40; k += 9) {
4632 GemmMicrokernelTester()
4633 .mr(1)
4634 .nr(4)
4635 .kr(2)
4636 .sr(1)
4637 .m(1)
4638 .n(4)
4639 .k(k)
4640 .a_zero_point(0)
4641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4642 }
4643 }
4644
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_b_zero_point)4645 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_b_zero_point) {
4646 TEST_REQUIRES_X86_SSE2;
4647 for (size_t k = 1; k <= 40; k += 9) {
4648 GemmMicrokernelTester()
4649 .mr(1)
4650 .nr(4)
4651 .kr(2)
4652 .sr(1)
4653 .m(1)
4654 .n(4)
4655 .k(k)
4656 .b_zero_point(0)
4657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4658 }
4659 }
4660
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_zero_point)4661 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_zero_point) {
4662 TEST_REQUIRES_X86_SSE2;
4663 for (size_t k = 1; k <= 40; k += 9) {
4664 GemmMicrokernelTester()
4665 .mr(1)
4666 .nr(4)
4667 .kr(2)
4668 .sr(1)
4669 .m(1)
4670 .n(4)
4671 .k(k)
4672 .a_zero_point(0)
4673 .b_zero_point(0)
4674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4675 }
4676 }
4677 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4678
4679
4680 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8)4681 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
4682 TEST_REQUIRES_X86_SSE2;
4683 GemmMicrokernelTester()
4684 .mr(2)
4685 .nr(4)
4686 .kr(2)
4687 .sr(1)
4688 .m(2)
4689 .n(4)
4690 .k(8)
4691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4692 }
4693
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cn)4694 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
4695 TEST_REQUIRES_X86_SSE2;
4696 GemmMicrokernelTester()
4697 .mr(2)
4698 .nr(4)
4699 .kr(2)
4700 .sr(1)
4701 .m(2)
4702 .n(4)
4703 .k(8)
4704 .cn_stride(7)
4705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4706 }
4707
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile)4708 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
4709 TEST_REQUIRES_X86_SSE2;
4710 for (uint32_t n = 1; n <= 4; n++) {
4711 for (uint32_t m = 1; m <= 2; m++) {
4712 GemmMicrokernelTester()
4713 .mr(2)
4714 .nr(4)
4715 .kr(2)
4716 .sr(1)
4717 .m(m)
4718 .n(n)
4719 .k(8)
4720 .iterations(1)
4721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4722 }
4723 }
4724 }
4725
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_m)4726 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4727 TEST_REQUIRES_X86_SSE2;
4728 for (uint32_t m = 1; m <= 2; m++) {
4729 GemmMicrokernelTester()
4730 .mr(2)
4731 .nr(4)
4732 .kr(2)
4733 .sr(1)
4734 .m(m)
4735 .n(4)
4736 .k(8)
4737 .iterations(1)
4738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4739 }
4740 }
4741
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_n)4742 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4743 TEST_REQUIRES_X86_SSE2;
4744 for (uint32_t n = 1; n <= 4; n++) {
4745 GemmMicrokernelTester()
4746 .mr(2)
4747 .nr(4)
4748 .kr(2)
4749 .sr(1)
4750 .m(2)
4751 .n(n)
4752 .k(8)
4753 .iterations(1)
4754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4755 }
4756 }
4757
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8)4758 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
4759 TEST_REQUIRES_X86_SSE2;
4760 for (size_t k = 1; k < 8; k++) {
4761 GemmMicrokernelTester()
4762 .mr(2)
4763 .nr(4)
4764 .kr(2)
4765 .sr(1)
4766 .m(2)
4767 .n(4)
4768 .k(k)
4769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4770 }
4771 }
4772
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_subtile)4773 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
4774 TEST_REQUIRES_X86_SSE2;
4775 for (size_t k = 1; k < 8; k++) {
4776 for (uint32_t n = 1; n <= 4; n++) {
4777 for (uint32_t m = 1; m <= 2; m++) {
4778 GemmMicrokernelTester()
4779 .mr(2)
4780 .nr(4)
4781 .kr(2)
4782 .sr(1)
4783 .m(m)
4784 .n(n)
4785 .k(k)
4786 .iterations(1)
4787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4788 }
4789 }
4790 }
4791 }
4792
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8)4793 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
4794 TEST_REQUIRES_X86_SSE2;
4795 for (size_t k = 9; k < 16; k++) {
4796 GemmMicrokernelTester()
4797 .mr(2)
4798 .nr(4)
4799 .kr(2)
4800 .sr(1)
4801 .m(2)
4802 .n(4)
4803 .k(k)
4804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4805 }
4806 }
4807
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_subtile)4808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
4809 TEST_REQUIRES_X86_SSE2;
4810 for (size_t k = 9; k < 16; k++) {
4811 for (uint32_t n = 1; n <= 4; n++) {
4812 for (uint32_t m = 1; m <= 2; m++) {
4813 GemmMicrokernelTester()
4814 .mr(2)
4815 .nr(4)
4816 .kr(2)
4817 .sr(1)
4818 .m(m)
4819 .n(n)
4820 .k(k)
4821 .iterations(1)
4822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4823 }
4824 }
4825 }
4826 }
4827
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8)4828 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
4829 TEST_REQUIRES_X86_SSE2;
4830 for (size_t k = 16; k <= 80; k += 8) {
4831 GemmMicrokernelTester()
4832 .mr(2)
4833 .nr(4)
4834 .kr(2)
4835 .sr(1)
4836 .m(2)
4837 .n(4)
4838 .k(k)
4839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4840 }
4841 }
4842
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_subtile)4843 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
4844 TEST_REQUIRES_X86_SSE2;
4845 for (size_t k = 16; k <= 80; k += 8) {
4846 for (uint32_t n = 1; n <= 4; n++) {
4847 for (uint32_t m = 1; m <= 2; m++) {
4848 GemmMicrokernelTester()
4849 .mr(2)
4850 .nr(4)
4851 .kr(2)
4852 .sr(1)
4853 .m(m)
4854 .n(n)
4855 .k(k)
4856 .iterations(1)
4857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4858 }
4859 }
4860 }
4861 }
4862
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4)4863 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
4864 TEST_REQUIRES_X86_SSE2;
4865 for (uint32_t n = 5; n < 8; n++) {
4866 for (size_t k = 1; k <= 40; k += 9) {
4867 GemmMicrokernelTester()
4868 .mr(2)
4869 .nr(4)
4870 .kr(2)
4871 .sr(1)
4872 .m(2)
4873 .n(n)
4874 .k(k)
4875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4876 }
4877 }
4878 }
4879
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_cn)4880 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4881 TEST_REQUIRES_X86_SSE2;
4882 for (uint32_t n = 5; n < 8; n++) {
4883 for (size_t k = 1; k <= 40; k += 9) {
4884 GemmMicrokernelTester()
4885 .mr(2)
4886 .nr(4)
4887 .kr(2)
4888 .sr(1)
4889 .m(2)
4890 .n(n)
4891 .k(k)
4892 .cn_stride(7)
4893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4894 }
4895 }
4896 }
4897
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_subtile)4898 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
4899 TEST_REQUIRES_X86_SSE2;
4900 for (uint32_t n = 5; n < 8; n++) {
4901 for (size_t k = 1; k <= 40; k += 9) {
4902 for (uint32_t m = 1; m <= 2; m++) {
4903 GemmMicrokernelTester()
4904 .mr(2)
4905 .nr(4)
4906 .kr(2)
4907 .sr(1)
4908 .m(m)
4909 .n(n)
4910 .k(k)
4911 .iterations(1)
4912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4913 }
4914 }
4915 }
4916 }
4917
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4)4918 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
4919 TEST_REQUIRES_X86_SSE2;
4920 for (uint32_t n = 8; n <= 12; n += 4) {
4921 for (size_t k = 1; k <= 40; k += 9) {
4922 GemmMicrokernelTester()
4923 .mr(2)
4924 .nr(4)
4925 .kr(2)
4926 .sr(1)
4927 .m(2)
4928 .n(n)
4929 .k(k)
4930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4931 }
4932 }
4933 }
4934
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_cn)4935 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
4936 TEST_REQUIRES_X86_SSE2;
4937 for (uint32_t n = 8; n <= 12; n += 4) {
4938 for (size_t k = 1; k <= 40; k += 9) {
4939 GemmMicrokernelTester()
4940 .mr(2)
4941 .nr(4)
4942 .kr(2)
4943 .sr(1)
4944 .m(2)
4945 .n(n)
4946 .k(k)
4947 .cn_stride(7)
4948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4949 }
4950 }
4951 }
4952
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_subtile)4953 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
4954 TEST_REQUIRES_X86_SSE2;
4955 for (uint32_t n = 8; n <= 12; n += 4) {
4956 for (size_t k = 1; k <= 40; k += 9) {
4957 for (uint32_t m = 1; m <= 2; m++) {
4958 GemmMicrokernelTester()
4959 .mr(2)
4960 .nr(4)
4961 .kr(2)
4962 .sr(1)
4963 .m(m)
4964 .n(n)
4965 .k(k)
4966 .iterations(1)
4967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4968 }
4969 }
4970 }
4971 }
4972
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel)4973 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel) {
4974 TEST_REQUIRES_X86_SSE2;
4975 for (size_t k = 1; k <= 40; k += 9) {
4976 GemmMicrokernelTester()
4977 .mr(2)
4978 .nr(4)
4979 .kr(2)
4980 .sr(1)
4981 .m(2)
4982 .n(4)
4983 .k(k)
4984 .ks(3)
4985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4986 }
4987 }
4988
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel_subtile)4989 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel_subtile) {
4990 TEST_REQUIRES_X86_SSE2;
4991 for (size_t k = 1; k <= 40; k += 9) {
4992 for (uint32_t n = 1; n <= 4; n++) {
4993 for (uint32_t m = 1; m <= 2; m++) {
4994 GemmMicrokernelTester()
4995 .mr(2)
4996 .nr(4)
4997 .kr(2)
4998 .sr(1)
4999 .m(m)
5000 .n(n)
5001 .k(k)
5002 .ks(3)
5003 .iterations(1)
5004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5005 }
5006 }
5007 }
5008 }
5009
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_small_kernel)5010 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
5011 TEST_REQUIRES_X86_SSE2;
5012 for (uint32_t n = 5; n < 8; n++) {
5013 for (size_t k = 1; k <= 40; k += 9) {
5014 GemmMicrokernelTester()
5015 .mr(2)
5016 .nr(4)
5017 .kr(2)
5018 .sr(1)
5019 .m(2)
5020 .n(n)
5021 .k(k)
5022 .ks(3)
5023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5024 }
5025 }
5026 }
5027
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_small_kernel)5028 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
5029 TEST_REQUIRES_X86_SSE2;
5030 for (uint32_t n = 8; n <= 12; n += 4) {
5031 for (size_t k = 1; k <= 40; k += 9) {
5032 GemmMicrokernelTester()
5033 .mr(2)
5034 .nr(4)
5035 .kr(2)
5036 .sr(1)
5037 .m(2)
5038 .n(n)
5039 .k(k)
5040 .ks(3)
5041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5042 }
5043 }
5044 }
5045
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm_subtile)5046 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
5047 TEST_REQUIRES_X86_SSE2;
5048 for (size_t k = 1; k <= 40; k += 9) {
5049 for (uint32_t n = 1; n <= 4; n++) {
5050 for (uint32_t m = 1; m <= 2; m++) {
5051 GemmMicrokernelTester()
5052 .mr(2)
5053 .nr(4)
5054 .kr(2)
5055 .sr(1)
5056 .m(m)
5057 .n(n)
5058 .k(k)
5059 .cm_stride(7)
5060 .iterations(1)
5061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5062 }
5063 }
5064 }
5065 }
5066
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,a_offset)5067 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, a_offset) {
5068 TEST_REQUIRES_X86_SSE2;
5069 for (size_t k = 1; k <= 40; k += 9) {
5070 GemmMicrokernelTester()
5071 .mr(2)
5072 .nr(4)
5073 .kr(2)
5074 .sr(1)
5075 .m(2)
5076 .n(4)
5077 .k(k)
5078 .ks(3)
5079 .a_offset(83)
5080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5081 }
5082 }
5083
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,zero)5084 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, zero) {
5085 TEST_REQUIRES_X86_SSE2;
5086 for (size_t k = 1; k <= 40; k += 9) {
5087 for (uint32_t mz = 0; mz < 2; mz++) {
5088 GemmMicrokernelTester()
5089 .mr(2)
5090 .nr(4)
5091 .kr(2)
5092 .sr(1)
5093 .m(2)
5094 .n(4)
5095 .k(k)
5096 .ks(3)
5097 .a_offset(83)
5098 .zero_index(mz)
5099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5100 }
5101 }
5102 }
5103
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmin)5104 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
5105 TEST_REQUIRES_X86_SSE2;
5106 GemmMicrokernelTester()
5107 .mr(2)
5108 .nr(4)
5109 .kr(2)
5110 .sr(1)
5111 .m(2)
5112 .n(4)
5113 .k(8)
5114 .qmin(128)
5115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5116 }
5117
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmax)5118 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
5119 TEST_REQUIRES_X86_SSE2;
5120 GemmMicrokernelTester()
5121 .mr(2)
5122 .nr(4)
5123 .kr(2)
5124 .sr(1)
5125 .m(2)
5126 .n(4)
5127 .k(8)
5128 .qmax(128)
5129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5130 }
5131
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm)5132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
5133 TEST_REQUIRES_X86_SSE2;
5134 GemmMicrokernelTester()
5135 .mr(2)
5136 .nr(4)
5137 .kr(2)
5138 .sr(1)
5139 .m(2)
5140 .n(4)
5141 .k(8)
5142 .cm_stride(7)
5143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5144 }
5145
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_a_zero_point)5146 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_a_zero_point) {
5147 TEST_REQUIRES_X86_SSE2;
5148 for (size_t k = 1; k <= 40; k += 9) {
5149 GemmMicrokernelTester()
5150 .mr(2)
5151 .nr(4)
5152 .kr(2)
5153 .sr(1)
5154 .m(2)
5155 .n(4)
5156 .k(k)
5157 .a_zero_point(0)
5158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5159 }
5160 }
5161
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_b_zero_point)5162 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_b_zero_point) {
5163 TEST_REQUIRES_X86_SSE2;
5164 for (size_t k = 1; k <= 40; k += 9) {
5165 GemmMicrokernelTester()
5166 .mr(2)
5167 .nr(4)
5168 .kr(2)
5169 .sr(1)
5170 .m(2)
5171 .n(4)
5172 .k(k)
5173 .b_zero_point(0)
5174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5175 }
5176 }
5177
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_zero_point)5178 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_zero_point) {
5179 TEST_REQUIRES_X86_SSE2;
5180 for (size_t k = 1; k <= 40; k += 9) {
5181 GemmMicrokernelTester()
5182 .mr(2)
5183 .nr(4)
5184 .kr(2)
5185 .sr(1)
5186 .m(2)
5187 .n(4)
5188 .k(k)
5189 .a_zero_point(0)
5190 .b_zero_point(0)
5191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5192 }
5193 }
5194 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5195
5196
5197 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)5198 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
5199 TEST_REQUIRES_X86_SSE41;
5200 GemmMicrokernelTester()
5201 .mr(3)
5202 .nr(4)
5203 .kr(2)
5204 .sr(1)
5205 .m(3)
5206 .n(4)
5207 .k(8)
5208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5209 }
5210
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)5211 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
5212 TEST_REQUIRES_X86_SSE41;
5213 GemmMicrokernelTester()
5214 .mr(3)
5215 .nr(4)
5216 .kr(2)
5217 .sr(1)
5218 .m(3)
5219 .n(4)
5220 .k(8)
5221 .cn_stride(7)
5222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5223 }
5224
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)5225 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
5226 TEST_REQUIRES_X86_SSE41;
5227 for (uint32_t n = 1; n <= 4; n++) {
5228 for (uint32_t m = 1; m <= 3; m++) {
5229 GemmMicrokernelTester()
5230 .mr(3)
5231 .nr(4)
5232 .kr(2)
5233 .sr(1)
5234 .m(m)
5235 .n(n)
5236 .k(8)
5237 .iterations(1)
5238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5239 }
5240 }
5241 }
5242
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)5243 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5244 TEST_REQUIRES_X86_SSE41;
5245 for (uint32_t m = 1; m <= 3; m++) {
5246 GemmMicrokernelTester()
5247 .mr(3)
5248 .nr(4)
5249 .kr(2)
5250 .sr(1)
5251 .m(m)
5252 .n(4)
5253 .k(8)
5254 .iterations(1)
5255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5256 }
5257 }
5258
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)5259 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5260 TEST_REQUIRES_X86_SSE41;
5261 for (uint32_t n = 1; n <= 4; n++) {
5262 GemmMicrokernelTester()
5263 .mr(3)
5264 .nr(4)
5265 .kr(2)
5266 .sr(1)
5267 .m(3)
5268 .n(n)
5269 .k(8)
5270 .iterations(1)
5271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5272 }
5273 }
5274
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)5275 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
5276 TEST_REQUIRES_X86_SSE41;
5277 for (size_t k = 1; k < 8; k++) {
5278 GemmMicrokernelTester()
5279 .mr(3)
5280 .nr(4)
5281 .kr(2)
5282 .sr(1)
5283 .m(3)
5284 .n(4)
5285 .k(k)
5286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5287 }
5288 }
5289
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)5290 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
5291 TEST_REQUIRES_X86_SSE41;
5292 for (size_t k = 1; k < 8; k++) {
5293 for (uint32_t n = 1; n <= 4; n++) {
5294 for (uint32_t m = 1; m <= 3; m++) {
5295 GemmMicrokernelTester()
5296 .mr(3)
5297 .nr(4)
5298 .kr(2)
5299 .sr(1)
5300 .m(m)
5301 .n(n)
5302 .k(k)
5303 .iterations(1)
5304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5305 }
5306 }
5307 }
5308 }
5309
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)5310 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
5311 TEST_REQUIRES_X86_SSE41;
5312 for (size_t k = 9; k < 16; k++) {
5313 GemmMicrokernelTester()
5314 .mr(3)
5315 .nr(4)
5316 .kr(2)
5317 .sr(1)
5318 .m(3)
5319 .n(4)
5320 .k(k)
5321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5322 }
5323 }
5324
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)5325 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
5326 TEST_REQUIRES_X86_SSE41;
5327 for (size_t k = 9; k < 16; k++) {
5328 for (uint32_t n = 1; n <= 4; n++) {
5329 for (uint32_t m = 1; m <= 3; m++) {
5330 GemmMicrokernelTester()
5331 .mr(3)
5332 .nr(4)
5333 .kr(2)
5334 .sr(1)
5335 .m(m)
5336 .n(n)
5337 .k(k)
5338 .iterations(1)
5339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5340 }
5341 }
5342 }
5343 }
5344
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)5345 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
5346 TEST_REQUIRES_X86_SSE41;
5347 for (size_t k = 16; k <= 80; k += 8) {
5348 GemmMicrokernelTester()
5349 .mr(3)
5350 .nr(4)
5351 .kr(2)
5352 .sr(1)
5353 .m(3)
5354 .n(4)
5355 .k(k)
5356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5357 }
5358 }
5359
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)5360 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
5361 TEST_REQUIRES_X86_SSE41;
5362 for (size_t k = 16; k <= 80; k += 8) {
5363 for (uint32_t n = 1; n <= 4; n++) {
5364 for (uint32_t m = 1; m <= 3; m++) {
5365 GemmMicrokernelTester()
5366 .mr(3)
5367 .nr(4)
5368 .kr(2)
5369 .sr(1)
5370 .m(m)
5371 .n(n)
5372 .k(k)
5373 .iterations(1)
5374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5375 }
5376 }
5377 }
5378 }
5379
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)5380 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
5381 TEST_REQUIRES_X86_SSE41;
5382 for (uint32_t n = 5; n < 8; n++) {
5383 for (size_t k = 1; k <= 40; k += 9) {
5384 GemmMicrokernelTester()
5385 .mr(3)
5386 .nr(4)
5387 .kr(2)
5388 .sr(1)
5389 .m(3)
5390 .n(n)
5391 .k(k)
5392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5393 }
5394 }
5395 }
5396
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)5397 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5398 TEST_REQUIRES_X86_SSE41;
5399 for (uint32_t n = 5; n < 8; n++) {
5400 for (size_t k = 1; k <= 40; k += 9) {
5401 GemmMicrokernelTester()
5402 .mr(3)
5403 .nr(4)
5404 .kr(2)
5405 .sr(1)
5406 .m(3)
5407 .n(n)
5408 .k(k)
5409 .cn_stride(7)
5410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5411 }
5412 }
5413 }
5414
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)5415 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
5416 TEST_REQUIRES_X86_SSE41;
5417 for (uint32_t n = 5; n < 8; n++) {
5418 for (size_t k = 1; k <= 40; k += 9) {
5419 for (uint32_t m = 1; m <= 3; m++) {
5420 GemmMicrokernelTester()
5421 .mr(3)
5422 .nr(4)
5423 .kr(2)
5424 .sr(1)
5425 .m(m)
5426 .n(n)
5427 .k(k)
5428 .iterations(1)
5429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5430 }
5431 }
5432 }
5433 }
5434
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)5435 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
5436 TEST_REQUIRES_X86_SSE41;
5437 for (uint32_t n = 8; n <= 12; n += 4) {
5438 for (size_t k = 1; k <= 40; k += 9) {
5439 GemmMicrokernelTester()
5440 .mr(3)
5441 .nr(4)
5442 .kr(2)
5443 .sr(1)
5444 .m(3)
5445 .n(n)
5446 .k(k)
5447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5448 }
5449 }
5450 }
5451
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)5452 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
5453 TEST_REQUIRES_X86_SSE41;
5454 for (uint32_t n = 8; n <= 12; n += 4) {
5455 for (size_t k = 1; k <= 40; k += 9) {
5456 GemmMicrokernelTester()
5457 .mr(3)
5458 .nr(4)
5459 .kr(2)
5460 .sr(1)
5461 .m(3)
5462 .n(n)
5463 .k(k)
5464 .cn_stride(7)
5465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5466 }
5467 }
5468 }
5469
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)5470 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
5471 TEST_REQUIRES_X86_SSE41;
5472 for (uint32_t n = 8; n <= 12; n += 4) {
5473 for (size_t k = 1; k <= 40; k += 9) {
5474 for (uint32_t m = 1; m <= 3; m++) {
5475 GemmMicrokernelTester()
5476 .mr(3)
5477 .nr(4)
5478 .kr(2)
5479 .sr(1)
5480 .m(m)
5481 .n(n)
5482 .k(k)
5483 .iterations(1)
5484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5485 }
5486 }
5487 }
5488 }
5489
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel)5490 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
5491 TEST_REQUIRES_X86_SSE41;
5492 for (size_t k = 1; k <= 40; k += 9) {
5493 GemmMicrokernelTester()
5494 .mr(3)
5495 .nr(4)
5496 .kr(2)
5497 .sr(1)
5498 .m(3)
5499 .n(4)
5500 .k(k)
5501 .ks(3)
5502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5503 }
5504 }
5505
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel_subtile)5506 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
5507 TEST_REQUIRES_X86_SSE41;
5508 for (size_t k = 1; k <= 40; k += 9) {
5509 for (uint32_t n = 1; n <= 4; n++) {
5510 for (uint32_t m = 1; m <= 3; m++) {
5511 GemmMicrokernelTester()
5512 .mr(3)
5513 .nr(4)
5514 .kr(2)
5515 .sr(1)
5516 .m(m)
5517 .n(n)
5518 .k(k)
5519 .ks(3)
5520 .iterations(1)
5521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5522 }
5523 }
5524 }
5525 }
5526
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_small_kernel)5527 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5528 TEST_REQUIRES_X86_SSE41;
5529 for (uint32_t n = 5; n < 8; n++) {
5530 for (size_t k = 1; k <= 40; k += 9) {
5531 GemmMicrokernelTester()
5532 .mr(3)
5533 .nr(4)
5534 .kr(2)
5535 .sr(1)
5536 .m(3)
5537 .n(n)
5538 .k(k)
5539 .ks(3)
5540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5541 }
5542 }
5543 }
5544
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_small_kernel)5545 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
5546 TEST_REQUIRES_X86_SSE41;
5547 for (uint32_t n = 8; n <= 12; n += 4) {
5548 for (size_t k = 1; k <= 40; k += 9) {
5549 GemmMicrokernelTester()
5550 .mr(3)
5551 .nr(4)
5552 .kr(2)
5553 .sr(1)
5554 .m(3)
5555 .n(n)
5556 .k(k)
5557 .ks(3)
5558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5559 }
5560 }
5561 }
5562
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)5563 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
5564 TEST_REQUIRES_X86_SSE41;
5565 for (size_t k = 1; k <= 40; k += 9) {
5566 for (uint32_t n = 1; n <= 4; n++) {
5567 for (uint32_t m = 1; m <= 3; m++) {
5568 GemmMicrokernelTester()
5569 .mr(3)
5570 .nr(4)
5571 .kr(2)
5572 .sr(1)
5573 .m(m)
5574 .n(n)
5575 .k(k)
5576 .cm_stride(7)
5577 .iterations(1)
5578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5579 }
5580 }
5581 }
5582 }
5583
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,a_offset)5584 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
5585 TEST_REQUIRES_X86_SSE41;
5586 for (size_t k = 1; k <= 40; k += 9) {
5587 GemmMicrokernelTester()
5588 .mr(3)
5589 .nr(4)
5590 .kr(2)
5591 .sr(1)
5592 .m(3)
5593 .n(4)
5594 .k(k)
5595 .ks(3)
5596 .a_offset(127)
5597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5598 }
5599 }
5600
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,zero)5601 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
5602 TEST_REQUIRES_X86_SSE41;
5603 for (size_t k = 1; k <= 40; k += 9) {
5604 for (uint32_t mz = 0; mz < 3; mz++) {
5605 GemmMicrokernelTester()
5606 .mr(3)
5607 .nr(4)
5608 .kr(2)
5609 .sr(1)
5610 .m(3)
5611 .n(4)
5612 .k(k)
5613 .ks(3)
5614 .a_offset(127)
5615 .zero_index(mz)
5616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5617 }
5618 }
5619 }
5620
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)5621 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
5622 TEST_REQUIRES_X86_SSE41;
5623 GemmMicrokernelTester()
5624 .mr(3)
5625 .nr(4)
5626 .kr(2)
5627 .sr(1)
5628 .m(3)
5629 .n(4)
5630 .k(8)
5631 .qmin(128)
5632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5633 }
5634
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)5635 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
5636 TEST_REQUIRES_X86_SSE41;
5637 GemmMicrokernelTester()
5638 .mr(3)
5639 .nr(4)
5640 .kr(2)
5641 .sr(1)
5642 .m(3)
5643 .n(4)
5644 .k(8)
5645 .qmax(128)
5646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5647 }
5648
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)5649 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
5650 TEST_REQUIRES_X86_SSE41;
5651 GemmMicrokernelTester()
5652 .mr(3)
5653 .nr(4)
5654 .kr(2)
5655 .sr(1)
5656 .m(3)
5657 .n(4)
5658 .k(8)
5659 .cm_stride(7)
5660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5661 }
5662
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_a_zero_point)5663 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_a_zero_point) {
5664 TEST_REQUIRES_X86_SSE41;
5665 for (size_t k = 1; k <= 40; k += 9) {
5666 GemmMicrokernelTester()
5667 .mr(3)
5668 .nr(4)
5669 .kr(2)
5670 .sr(1)
5671 .m(3)
5672 .n(4)
5673 .k(k)
5674 .a_zero_point(0)
5675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5676 }
5677 }
5678
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_b_zero_point)5679 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_b_zero_point) {
5680 TEST_REQUIRES_X86_SSE41;
5681 for (size_t k = 1; k <= 40; k += 9) {
5682 GemmMicrokernelTester()
5683 .mr(3)
5684 .nr(4)
5685 .kr(2)
5686 .sr(1)
5687 .m(3)
5688 .n(4)
5689 .k(k)
5690 .b_zero_point(0)
5691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5692 }
5693 }
5694
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_zero_point)5695 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_zero_point) {
5696 TEST_REQUIRES_X86_SSE41;
5697 for (size_t k = 1; k <= 40; k += 9) {
5698 GemmMicrokernelTester()
5699 .mr(3)
5700 .nr(4)
5701 .kr(2)
5702 .sr(1)
5703 .m(3)
5704 .n(4)
5705 .k(k)
5706 .a_zero_point(0)
5707 .b_zero_point(0)
5708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5709 }
5710 }
5711 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5712
5713
5714 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8)5715 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
5716 TEST_REQUIRES_X86_SSE2;
5717 GemmMicrokernelTester()
5718 .mr(4)
5719 .nr(4)
5720 .kr(2)
5721 .sr(1)
5722 .m(4)
5723 .n(4)
5724 .k(8)
5725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5726 }
5727
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cn)5728 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
5729 TEST_REQUIRES_X86_SSE2;
5730 GemmMicrokernelTester()
5731 .mr(4)
5732 .nr(4)
5733 .kr(2)
5734 .sr(1)
5735 .m(4)
5736 .n(4)
5737 .k(8)
5738 .cn_stride(7)
5739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5740 }
5741
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile)5742 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
5743 TEST_REQUIRES_X86_SSE2;
5744 for (uint32_t n = 1; n <= 4; n++) {
5745 for (uint32_t m = 1; m <= 4; m++) {
5746 GemmMicrokernelTester()
5747 .mr(4)
5748 .nr(4)
5749 .kr(2)
5750 .sr(1)
5751 .m(m)
5752 .n(n)
5753 .k(8)
5754 .iterations(1)
5755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5756 }
5757 }
5758 }
5759
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_m)5760 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
5761 TEST_REQUIRES_X86_SSE2;
5762 for (uint32_t m = 1; m <= 4; m++) {
5763 GemmMicrokernelTester()
5764 .mr(4)
5765 .nr(4)
5766 .kr(2)
5767 .sr(1)
5768 .m(m)
5769 .n(4)
5770 .k(8)
5771 .iterations(1)
5772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5773 }
5774 }
5775
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_n)5776 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
5777 TEST_REQUIRES_X86_SSE2;
5778 for (uint32_t n = 1; n <= 4; n++) {
5779 GemmMicrokernelTester()
5780 .mr(4)
5781 .nr(4)
5782 .kr(2)
5783 .sr(1)
5784 .m(4)
5785 .n(n)
5786 .k(8)
5787 .iterations(1)
5788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5789 }
5790 }
5791
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8)5792 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
5793 TEST_REQUIRES_X86_SSE2;
5794 for (size_t k = 1; k < 8; k++) {
5795 GemmMicrokernelTester()
5796 .mr(4)
5797 .nr(4)
5798 .kr(2)
5799 .sr(1)
5800 .m(4)
5801 .n(4)
5802 .k(k)
5803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5804 }
5805 }
5806
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_subtile)5807 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
5808 TEST_REQUIRES_X86_SSE2;
5809 for (size_t k = 1; k < 8; k++) {
5810 for (uint32_t n = 1; n <= 4; n++) {
5811 for (uint32_t m = 1; m <= 4; m++) {
5812 GemmMicrokernelTester()
5813 .mr(4)
5814 .nr(4)
5815 .kr(2)
5816 .sr(1)
5817 .m(m)
5818 .n(n)
5819 .k(k)
5820 .iterations(1)
5821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5822 }
5823 }
5824 }
5825 }
5826
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8)5827 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
5828 TEST_REQUIRES_X86_SSE2;
5829 for (size_t k = 9; k < 16; k++) {
5830 GemmMicrokernelTester()
5831 .mr(4)
5832 .nr(4)
5833 .kr(2)
5834 .sr(1)
5835 .m(4)
5836 .n(4)
5837 .k(k)
5838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5839 }
5840 }
5841
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_subtile)5842 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
5843 TEST_REQUIRES_X86_SSE2;
5844 for (size_t k = 9; k < 16; k++) {
5845 for (uint32_t n = 1; n <= 4; n++) {
5846 for (uint32_t m = 1; m <= 4; m++) {
5847 GemmMicrokernelTester()
5848 .mr(4)
5849 .nr(4)
5850 .kr(2)
5851 .sr(1)
5852 .m(m)
5853 .n(n)
5854 .k(k)
5855 .iterations(1)
5856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5857 }
5858 }
5859 }
5860 }
5861
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8)5862 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
5863 TEST_REQUIRES_X86_SSE2;
5864 for (size_t k = 16; k <= 80; k += 8) {
5865 GemmMicrokernelTester()
5866 .mr(4)
5867 .nr(4)
5868 .kr(2)
5869 .sr(1)
5870 .m(4)
5871 .n(4)
5872 .k(k)
5873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5874 }
5875 }
5876
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_subtile)5877 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
5878 TEST_REQUIRES_X86_SSE2;
5879 for (size_t k = 16; k <= 80; k += 8) {
5880 for (uint32_t n = 1; n <= 4; n++) {
5881 for (uint32_t m = 1; m <= 4; m++) {
5882 GemmMicrokernelTester()
5883 .mr(4)
5884 .nr(4)
5885 .kr(2)
5886 .sr(1)
5887 .m(m)
5888 .n(n)
5889 .k(k)
5890 .iterations(1)
5891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5892 }
5893 }
5894 }
5895 }
5896
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4)5897 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
5898 TEST_REQUIRES_X86_SSE2;
5899 for (uint32_t n = 5; n < 8; n++) {
5900 for (size_t k = 1; k <= 40; k += 9) {
5901 GemmMicrokernelTester()
5902 .mr(4)
5903 .nr(4)
5904 .kr(2)
5905 .sr(1)
5906 .m(4)
5907 .n(n)
5908 .k(k)
5909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5910 }
5911 }
5912 }
5913
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_cn)5914 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
5915 TEST_REQUIRES_X86_SSE2;
5916 for (uint32_t n = 5; n < 8; n++) {
5917 for (size_t k = 1; k <= 40; k += 9) {
5918 GemmMicrokernelTester()
5919 .mr(4)
5920 .nr(4)
5921 .kr(2)
5922 .sr(1)
5923 .m(4)
5924 .n(n)
5925 .k(k)
5926 .cn_stride(7)
5927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5928 }
5929 }
5930 }
5931
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_subtile)5932 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
5933 TEST_REQUIRES_X86_SSE2;
5934 for (uint32_t n = 5; n < 8; n++) {
5935 for (size_t k = 1; k <= 40; k += 9) {
5936 for (uint32_t m = 1; m <= 4; m++) {
5937 GemmMicrokernelTester()
5938 .mr(4)
5939 .nr(4)
5940 .kr(2)
5941 .sr(1)
5942 .m(m)
5943 .n(n)
5944 .k(k)
5945 .iterations(1)
5946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5947 }
5948 }
5949 }
5950 }
5951
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4)5952 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
5953 TEST_REQUIRES_X86_SSE2;
5954 for (uint32_t n = 8; n <= 12; n += 4) {
5955 for (size_t k = 1; k <= 40; k += 9) {
5956 GemmMicrokernelTester()
5957 .mr(4)
5958 .nr(4)
5959 .kr(2)
5960 .sr(1)
5961 .m(4)
5962 .n(n)
5963 .k(k)
5964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5965 }
5966 }
5967 }
5968
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_cn)5969 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
5970 TEST_REQUIRES_X86_SSE2;
5971 for (uint32_t n = 8; n <= 12; n += 4) {
5972 for (size_t k = 1; k <= 40; k += 9) {
5973 GemmMicrokernelTester()
5974 .mr(4)
5975 .nr(4)
5976 .kr(2)
5977 .sr(1)
5978 .m(4)
5979 .n(n)
5980 .k(k)
5981 .cn_stride(7)
5982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5983 }
5984 }
5985 }
5986
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_subtile)5987 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
5988 TEST_REQUIRES_X86_SSE2;
5989 for (uint32_t n = 8; n <= 12; n += 4) {
5990 for (size_t k = 1; k <= 40; k += 9) {
5991 for (uint32_t m = 1; m <= 4; m++) {
5992 GemmMicrokernelTester()
5993 .mr(4)
5994 .nr(4)
5995 .kr(2)
5996 .sr(1)
5997 .m(m)
5998 .n(n)
5999 .k(k)
6000 .iterations(1)
6001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6002 }
6003 }
6004 }
6005 }
6006
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel)6007 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel) {
6008 TEST_REQUIRES_X86_SSE2;
6009 for (size_t k = 1; k <= 40; k += 9) {
6010 GemmMicrokernelTester()
6011 .mr(4)
6012 .nr(4)
6013 .kr(2)
6014 .sr(1)
6015 .m(4)
6016 .n(4)
6017 .k(k)
6018 .ks(3)
6019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6020 }
6021 }
6022
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel_subtile)6023 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel_subtile) {
6024 TEST_REQUIRES_X86_SSE2;
6025 for (size_t k = 1; k <= 40; k += 9) {
6026 for (uint32_t n = 1; n <= 4; n++) {
6027 for (uint32_t m = 1; m <= 4; m++) {
6028 GemmMicrokernelTester()
6029 .mr(4)
6030 .nr(4)
6031 .kr(2)
6032 .sr(1)
6033 .m(m)
6034 .n(n)
6035 .k(k)
6036 .ks(3)
6037 .iterations(1)
6038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6039 }
6040 }
6041 }
6042 }
6043
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_small_kernel)6044 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
6045 TEST_REQUIRES_X86_SSE2;
6046 for (uint32_t n = 5; n < 8; n++) {
6047 for (size_t k = 1; k <= 40; k += 9) {
6048 GemmMicrokernelTester()
6049 .mr(4)
6050 .nr(4)
6051 .kr(2)
6052 .sr(1)
6053 .m(4)
6054 .n(n)
6055 .k(k)
6056 .ks(3)
6057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6058 }
6059 }
6060 }
6061
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_small_kernel)6062 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
6063 TEST_REQUIRES_X86_SSE2;
6064 for (uint32_t n = 8; n <= 12; n += 4) {
6065 for (size_t k = 1; k <= 40; k += 9) {
6066 GemmMicrokernelTester()
6067 .mr(4)
6068 .nr(4)
6069 .kr(2)
6070 .sr(1)
6071 .m(4)
6072 .n(n)
6073 .k(k)
6074 .ks(3)
6075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6076 }
6077 }
6078 }
6079
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm_subtile)6080 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
6081 TEST_REQUIRES_X86_SSE2;
6082 for (size_t k = 1; k <= 40; k += 9) {
6083 for (uint32_t n = 1; n <= 4; n++) {
6084 for (uint32_t m = 1; m <= 4; m++) {
6085 GemmMicrokernelTester()
6086 .mr(4)
6087 .nr(4)
6088 .kr(2)
6089 .sr(1)
6090 .m(m)
6091 .n(n)
6092 .k(k)
6093 .cm_stride(7)
6094 .iterations(1)
6095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6096 }
6097 }
6098 }
6099 }
6100
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,a_offset)6101 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, a_offset) {
6102 TEST_REQUIRES_X86_SSE2;
6103 for (size_t k = 1; k <= 40; k += 9) {
6104 GemmMicrokernelTester()
6105 .mr(4)
6106 .nr(4)
6107 .kr(2)
6108 .sr(1)
6109 .m(4)
6110 .n(4)
6111 .k(k)
6112 .ks(3)
6113 .a_offset(163)
6114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6115 }
6116 }
6117
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,zero)6118 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, zero) {
6119 TEST_REQUIRES_X86_SSE2;
6120 for (size_t k = 1; k <= 40; k += 9) {
6121 for (uint32_t mz = 0; mz < 4; mz++) {
6122 GemmMicrokernelTester()
6123 .mr(4)
6124 .nr(4)
6125 .kr(2)
6126 .sr(1)
6127 .m(4)
6128 .n(4)
6129 .k(k)
6130 .ks(3)
6131 .a_offset(163)
6132 .zero_index(mz)
6133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6134 }
6135 }
6136 }
6137
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmin)6138 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
6139 TEST_REQUIRES_X86_SSE2;
6140 GemmMicrokernelTester()
6141 .mr(4)
6142 .nr(4)
6143 .kr(2)
6144 .sr(1)
6145 .m(4)
6146 .n(4)
6147 .k(8)
6148 .qmin(128)
6149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6150 }
6151
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmax)6152 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
6153 TEST_REQUIRES_X86_SSE2;
6154 GemmMicrokernelTester()
6155 .mr(4)
6156 .nr(4)
6157 .kr(2)
6158 .sr(1)
6159 .m(4)
6160 .n(4)
6161 .k(8)
6162 .qmax(128)
6163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6164 }
6165
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm)6166 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
6167 TEST_REQUIRES_X86_SSE2;
6168 GemmMicrokernelTester()
6169 .mr(4)
6170 .nr(4)
6171 .kr(2)
6172 .sr(1)
6173 .m(4)
6174 .n(4)
6175 .k(8)
6176 .cm_stride(7)
6177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6178 }
6179
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_a_zero_point)6180 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_a_zero_point) {
6181 TEST_REQUIRES_X86_SSE2;
6182 for (size_t k = 1; k <= 40; k += 9) {
6183 GemmMicrokernelTester()
6184 .mr(4)
6185 .nr(4)
6186 .kr(2)
6187 .sr(1)
6188 .m(4)
6189 .n(4)
6190 .k(k)
6191 .a_zero_point(0)
6192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6193 }
6194 }
6195
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_b_zero_point)6196 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_b_zero_point) {
6197 TEST_REQUIRES_X86_SSE2;
6198 for (size_t k = 1; k <= 40; k += 9) {
6199 GemmMicrokernelTester()
6200 .mr(4)
6201 .nr(4)
6202 .kr(2)
6203 .sr(1)
6204 .m(4)
6205 .n(4)
6206 .k(k)
6207 .b_zero_point(0)
6208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6209 }
6210 }
6211
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_zero_point)6212 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_zero_point) {
6213 TEST_REQUIRES_X86_SSE2;
6214 for (size_t k = 1; k <= 40; k += 9) {
6215 GemmMicrokernelTester()
6216 .mr(4)
6217 .nr(4)
6218 .kr(2)
6219 .sr(1)
6220 .m(4)
6221 .n(4)
6222 .k(k)
6223 .a_zero_point(0)
6224 .b_zero_point(0)
6225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6226 }
6227 }
6228 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6229
6230
6231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)6232 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
6233 TEST_REQUIRES_X86_AVX;
6234 GemmMicrokernelTester()
6235 .mr(1)
6236 .nr(4)
6237 .kr(2)
6238 .sr(1)
6239 .m(1)
6240 .n(4)
6241 .k(8)
6242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6243 }
6244
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)6245 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
6246 TEST_REQUIRES_X86_AVX;
6247 GemmMicrokernelTester()
6248 .mr(1)
6249 .nr(4)
6250 .kr(2)
6251 .sr(1)
6252 .m(1)
6253 .n(4)
6254 .k(8)
6255 .cn_stride(7)
6256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6257 }
6258
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)6259 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
6260 TEST_REQUIRES_X86_AVX;
6261 for (uint32_t n = 1; n <= 4; n++) {
6262 for (uint32_t m = 1; m <= 1; m++) {
6263 GemmMicrokernelTester()
6264 .mr(1)
6265 .nr(4)
6266 .kr(2)
6267 .sr(1)
6268 .m(m)
6269 .n(n)
6270 .k(8)
6271 .iterations(1)
6272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6273 }
6274 }
6275 }
6276
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)6277 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
6278 TEST_REQUIRES_X86_AVX;
6279 for (uint32_t m = 1; m <= 1; m++) {
6280 GemmMicrokernelTester()
6281 .mr(1)
6282 .nr(4)
6283 .kr(2)
6284 .sr(1)
6285 .m(m)
6286 .n(4)
6287 .k(8)
6288 .iterations(1)
6289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6290 }
6291 }
6292
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)6293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
6294 TEST_REQUIRES_X86_AVX;
6295 for (uint32_t n = 1; n <= 4; n++) {
6296 GemmMicrokernelTester()
6297 .mr(1)
6298 .nr(4)
6299 .kr(2)
6300 .sr(1)
6301 .m(1)
6302 .n(n)
6303 .k(8)
6304 .iterations(1)
6305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6306 }
6307 }
6308
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)6309 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
6310 TEST_REQUIRES_X86_AVX;
6311 for (size_t k = 1; k < 8; k++) {
6312 GemmMicrokernelTester()
6313 .mr(1)
6314 .nr(4)
6315 .kr(2)
6316 .sr(1)
6317 .m(1)
6318 .n(4)
6319 .k(k)
6320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6321 }
6322 }
6323
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)6324 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
6325 TEST_REQUIRES_X86_AVX;
6326 for (size_t k = 1; k < 8; k++) {
6327 for (uint32_t n = 1; n <= 4; n++) {
6328 for (uint32_t m = 1; m <= 1; m++) {
6329 GemmMicrokernelTester()
6330 .mr(1)
6331 .nr(4)
6332 .kr(2)
6333 .sr(1)
6334 .m(m)
6335 .n(n)
6336 .k(k)
6337 .iterations(1)
6338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6339 }
6340 }
6341 }
6342 }
6343
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)6344 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
6345 TEST_REQUIRES_X86_AVX;
6346 for (size_t k = 9; k < 16; k++) {
6347 GemmMicrokernelTester()
6348 .mr(1)
6349 .nr(4)
6350 .kr(2)
6351 .sr(1)
6352 .m(1)
6353 .n(4)
6354 .k(k)
6355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6356 }
6357 }
6358
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)6359 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
6360 TEST_REQUIRES_X86_AVX;
6361 for (size_t k = 9; k < 16; k++) {
6362 for (uint32_t n = 1; n <= 4; n++) {
6363 for (uint32_t m = 1; m <= 1; m++) {
6364 GemmMicrokernelTester()
6365 .mr(1)
6366 .nr(4)
6367 .kr(2)
6368 .sr(1)
6369 .m(m)
6370 .n(n)
6371 .k(k)
6372 .iterations(1)
6373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6374 }
6375 }
6376 }
6377 }
6378
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)6379 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
6380 TEST_REQUIRES_X86_AVX;
6381 for (size_t k = 16; k <= 80; k += 8) {
6382 GemmMicrokernelTester()
6383 .mr(1)
6384 .nr(4)
6385 .kr(2)
6386 .sr(1)
6387 .m(1)
6388 .n(4)
6389 .k(k)
6390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6391 }
6392 }
6393
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)6394 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
6395 TEST_REQUIRES_X86_AVX;
6396 for (size_t k = 16; k <= 80; k += 8) {
6397 for (uint32_t n = 1; n <= 4; n++) {
6398 for (uint32_t m = 1; m <= 1; m++) {
6399 GemmMicrokernelTester()
6400 .mr(1)
6401 .nr(4)
6402 .kr(2)
6403 .sr(1)
6404 .m(m)
6405 .n(n)
6406 .k(k)
6407 .iterations(1)
6408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6409 }
6410 }
6411 }
6412 }
6413
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)6414 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
6415 TEST_REQUIRES_X86_AVX;
6416 for (uint32_t n = 5; n < 8; n++) {
6417 for (size_t k = 1; k <= 40; k += 9) {
6418 GemmMicrokernelTester()
6419 .mr(1)
6420 .nr(4)
6421 .kr(2)
6422 .sr(1)
6423 .m(1)
6424 .n(n)
6425 .k(k)
6426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6427 }
6428 }
6429 }
6430
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)6431 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
6432 TEST_REQUIRES_X86_AVX;
6433 for (uint32_t n = 5; n < 8; n++) {
6434 for (size_t k = 1; k <= 40; k += 9) {
6435 GemmMicrokernelTester()
6436 .mr(1)
6437 .nr(4)
6438 .kr(2)
6439 .sr(1)
6440 .m(1)
6441 .n(n)
6442 .k(k)
6443 .cn_stride(7)
6444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6445 }
6446 }
6447 }
6448
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)6449 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
6450 TEST_REQUIRES_X86_AVX;
6451 for (uint32_t n = 5; n < 8; n++) {
6452 for (size_t k = 1; k <= 40; k += 9) {
6453 for (uint32_t m = 1; m <= 1; m++) {
6454 GemmMicrokernelTester()
6455 .mr(1)
6456 .nr(4)
6457 .kr(2)
6458 .sr(1)
6459 .m(m)
6460 .n(n)
6461 .k(k)
6462 .iterations(1)
6463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6464 }
6465 }
6466 }
6467 }
6468
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)6469 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
6470 TEST_REQUIRES_X86_AVX;
6471 for (uint32_t n = 8; n <= 12; n += 4) {
6472 for (size_t k = 1; k <= 40; k += 9) {
6473 GemmMicrokernelTester()
6474 .mr(1)
6475 .nr(4)
6476 .kr(2)
6477 .sr(1)
6478 .m(1)
6479 .n(n)
6480 .k(k)
6481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6482 }
6483 }
6484 }
6485
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)6486 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
6487 TEST_REQUIRES_X86_AVX;
6488 for (uint32_t n = 8; n <= 12; n += 4) {
6489 for (size_t k = 1; k <= 40; k += 9) {
6490 GemmMicrokernelTester()
6491 .mr(1)
6492 .nr(4)
6493 .kr(2)
6494 .sr(1)
6495 .m(1)
6496 .n(n)
6497 .k(k)
6498 .cn_stride(7)
6499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6500 }
6501 }
6502 }
6503
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)6504 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
6505 TEST_REQUIRES_X86_AVX;
6506 for (uint32_t n = 8; n <= 12; n += 4) {
6507 for (size_t k = 1; k <= 40; k += 9) {
6508 for (uint32_t m = 1; m <= 1; m++) {
6509 GemmMicrokernelTester()
6510 .mr(1)
6511 .nr(4)
6512 .kr(2)
6513 .sr(1)
6514 .m(m)
6515 .n(n)
6516 .k(k)
6517 .iterations(1)
6518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6519 }
6520 }
6521 }
6522 }
6523
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel)6524 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
6525 TEST_REQUIRES_X86_AVX;
6526 for (size_t k = 1; k <= 40; k += 9) {
6527 GemmMicrokernelTester()
6528 .mr(1)
6529 .nr(4)
6530 .kr(2)
6531 .sr(1)
6532 .m(1)
6533 .n(4)
6534 .k(k)
6535 .ks(3)
6536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6537 }
6538 }
6539
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel_subtile)6540 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
6541 TEST_REQUIRES_X86_AVX;
6542 for (size_t k = 1; k <= 40; k += 9) {
6543 for (uint32_t n = 1; n <= 4; n++) {
6544 for (uint32_t m = 1; m <= 1; m++) {
6545 GemmMicrokernelTester()
6546 .mr(1)
6547 .nr(4)
6548 .kr(2)
6549 .sr(1)
6550 .m(m)
6551 .n(n)
6552 .k(k)
6553 .ks(3)
6554 .iterations(1)
6555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6556 }
6557 }
6558 }
6559 }
6560
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_small_kernel)6561 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
6562 TEST_REQUIRES_X86_AVX;
6563 for (uint32_t n = 5; n < 8; n++) {
6564 for (size_t k = 1; k <= 40; k += 9) {
6565 GemmMicrokernelTester()
6566 .mr(1)
6567 .nr(4)
6568 .kr(2)
6569 .sr(1)
6570 .m(1)
6571 .n(n)
6572 .k(k)
6573 .ks(3)
6574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6575 }
6576 }
6577 }
6578
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_small_kernel)6579 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
6580 TEST_REQUIRES_X86_AVX;
6581 for (uint32_t n = 8; n <= 12; n += 4) {
6582 for (size_t k = 1; k <= 40; k += 9) {
6583 GemmMicrokernelTester()
6584 .mr(1)
6585 .nr(4)
6586 .kr(2)
6587 .sr(1)
6588 .m(1)
6589 .n(n)
6590 .k(k)
6591 .ks(3)
6592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6593 }
6594 }
6595 }
6596
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)6597 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
6598 TEST_REQUIRES_X86_AVX;
6599 for (size_t k = 1; k <= 40; k += 9) {
6600 for (uint32_t n = 1; n <= 4; n++) {
6601 for (uint32_t m = 1; m <= 1; m++) {
6602 GemmMicrokernelTester()
6603 .mr(1)
6604 .nr(4)
6605 .kr(2)
6606 .sr(1)
6607 .m(m)
6608 .n(n)
6609 .k(k)
6610 .cm_stride(7)
6611 .iterations(1)
6612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6613 }
6614 }
6615 }
6616 }
6617
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,a_offset)6618 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
6619 TEST_REQUIRES_X86_AVX;
6620 for (size_t k = 1; k <= 40; k += 9) {
6621 GemmMicrokernelTester()
6622 .mr(1)
6623 .nr(4)
6624 .kr(2)
6625 .sr(1)
6626 .m(1)
6627 .n(4)
6628 .k(k)
6629 .ks(3)
6630 .a_offset(43)
6631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6632 }
6633 }
6634
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,zero)6635 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
6636 TEST_REQUIRES_X86_AVX;
6637 for (size_t k = 1; k <= 40; k += 9) {
6638 for (uint32_t mz = 0; mz < 1; mz++) {
6639 GemmMicrokernelTester()
6640 .mr(1)
6641 .nr(4)
6642 .kr(2)
6643 .sr(1)
6644 .m(1)
6645 .n(4)
6646 .k(k)
6647 .ks(3)
6648 .a_offset(43)
6649 .zero_index(mz)
6650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6651 }
6652 }
6653 }
6654
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)6655 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
6656 TEST_REQUIRES_X86_AVX;
6657 GemmMicrokernelTester()
6658 .mr(1)
6659 .nr(4)
6660 .kr(2)
6661 .sr(1)
6662 .m(1)
6663 .n(4)
6664 .k(8)
6665 .qmin(128)
6666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6667 }
6668
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)6669 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
6670 TEST_REQUIRES_X86_AVX;
6671 GemmMicrokernelTester()
6672 .mr(1)
6673 .nr(4)
6674 .kr(2)
6675 .sr(1)
6676 .m(1)
6677 .n(4)
6678 .k(8)
6679 .qmax(128)
6680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6681 }
6682
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)6683 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
6684 TEST_REQUIRES_X86_AVX;
6685 GemmMicrokernelTester()
6686 .mr(1)
6687 .nr(4)
6688 .kr(2)
6689 .sr(1)
6690 .m(1)
6691 .n(4)
6692 .k(8)
6693 .cm_stride(7)
6694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6695 }
6696
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_a_zero_point)6697 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_a_zero_point) {
6698 TEST_REQUIRES_X86_AVX;
6699 for (size_t k = 1; k <= 40; k += 9) {
6700 GemmMicrokernelTester()
6701 .mr(1)
6702 .nr(4)
6703 .kr(2)
6704 .sr(1)
6705 .m(1)
6706 .n(4)
6707 .k(k)
6708 .a_zero_point(0)
6709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6710 }
6711 }
6712
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_b_zero_point)6713 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_b_zero_point) {
6714 TEST_REQUIRES_X86_AVX;
6715 for (size_t k = 1; k <= 40; k += 9) {
6716 GemmMicrokernelTester()
6717 .mr(1)
6718 .nr(4)
6719 .kr(2)
6720 .sr(1)
6721 .m(1)
6722 .n(4)
6723 .k(k)
6724 .b_zero_point(0)
6725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6726 }
6727 }
6728
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_zero_point)6729 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_zero_point) {
6730 TEST_REQUIRES_X86_AVX;
6731 for (size_t k = 1; k <= 40; k += 9) {
6732 GemmMicrokernelTester()
6733 .mr(1)
6734 .nr(4)
6735 .kr(2)
6736 .sr(1)
6737 .m(1)
6738 .n(4)
6739 .k(k)
6740 .a_zero_point(0)
6741 .b_zero_point(0)
6742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6743 }
6744 }
6745 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6746
6747
6748 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8)6749 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
6750 TEST_REQUIRES_X86_XOP;
6751 GemmMicrokernelTester()
6752 .mr(1)
6753 .nr(4)
6754 .kr(2)
6755 .sr(1)
6756 .m(1)
6757 .n(4)
6758 .k(8)
6759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6760 }
6761
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cn)6762 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
6763 TEST_REQUIRES_X86_XOP;
6764 GemmMicrokernelTester()
6765 .mr(1)
6766 .nr(4)
6767 .kr(2)
6768 .sr(1)
6769 .m(1)
6770 .n(4)
6771 .k(8)
6772 .cn_stride(7)
6773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6774 }
6775
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile)6776 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
6777 TEST_REQUIRES_X86_XOP;
6778 for (uint32_t n = 1; n <= 4; n++) {
6779 for (uint32_t m = 1; m <= 1; m++) {
6780 GemmMicrokernelTester()
6781 .mr(1)
6782 .nr(4)
6783 .kr(2)
6784 .sr(1)
6785 .m(m)
6786 .n(n)
6787 .k(8)
6788 .iterations(1)
6789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6790 }
6791 }
6792 }
6793
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_m)6794 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
6795 TEST_REQUIRES_X86_XOP;
6796 for (uint32_t m = 1; m <= 1; m++) {
6797 GemmMicrokernelTester()
6798 .mr(1)
6799 .nr(4)
6800 .kr(2)
6801 .sr(1)
6802 .m(m)
6803 .n(4)
6804 .k(8)
6805 .iterations(1)
6806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6807 }
6808 }
6809
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_n)6810 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
6811 TEST_REQUIRES_X86_XOP;
6812 for (uint32_t n = 1; n <= 4; n++) {
6813 GemmMicrokernelTester()
6814 .mr(1)
6815 .nr(4)
6816 .kr(2)
6817 .sr(1)
6818 .m(1)
6819 .n(n)
6820 .k(8)
6821 .iterations(1)
6822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6823 }
6824 }
6825
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8)6826 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
6827 TEST_REQUIRES_X86_XOP;
6828 for (size_t k = 1; k < 8; k++) {
6829 GemmMicrokernelTester()
6830 .mr(1)
6831 .nr(4)
6832 .kr(2)
6833 .sr(1)
6834 .m(1)
6835 .n(4)
6836 .k(k)
6837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6838 }
6839 }
6840
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_subtile)6841 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
6842 TEST_REQUIRES_X86_XOP;
6843 for (size_t k = 1; k < 8; k++) {
6844 for (uint32_t n = 1; n <= 4; n++) {
6845 for (uint32_t m = 1; m <= 1; m++) {
6846 GemmMicrokernelTester()
6847 .mr(1)
6848 .nr(4)
6849 .kr(2)
6850 .sr(1)
6851 .m(m)
6852 .n(n)
6853 .k(k)
6854 .iterations(1)
6855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6856 }
6857 }
6858 }
6859 }
6860
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8)6861 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
6862 TEST_REQUIRES_X86_XOP;
6863 for (size_t k = 9; k < 16; k++) {
6864 GemmMicrokernelTester()
6865 .mr(1)
6866 .nr(4)
6867 .kr(2)
6868 .sr(1)
6869 .m(1)
6870 .n(4)
6871 .k(k)
6872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6873 }
6874 }
6875
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_subtile)6876 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
6877 TEST_REQUIRES_X86_XOP;
6878 for (size_t k = 9; k < 16; k++) {
6879 for (uint32_t n = 1; n <= 4; n++) {
6880 for (uint32_t m = 1; m <= 1; m++) {
6881 GemmMicrokernelTester()
6882 .mr(1)
6883 .nr(4)
6884 .kr(2)
6885 .sr(1)
6886 .m(m)
6887 .n(n)
6888 .k(k)
6889 .iterations(1)
6890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6891 }
6892 }
6893 }
6894 }
6895
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8)6896 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
6897 TEST_REQUIRES_X86_XOP;
6898 for (size_t k = 16; k <= 80; k += 8) {
6899 GemmMicrokernelTester()
6900 .mr(1)
6901 .nr(4)
6902 .kr(2)
6903 .sr(1)
6904 .m(1)
6905 .n(4)
6906 .k(k)
6907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6908 }
6909 }
6910
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_subtile)6911 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
6912 TEST_REQUIRES_X86_XOP;
6913 for (size_t k = 16; k <= 80; k += 8) {
6914 for (uint32_t n = 1; n <= 4; n++) {
6915 for (uint32_t m = 1; m <= 1; m++) {
6916 GemmMicrokernelTester()
6917 .mr(1)
6918 .nr(4)
6919 .kr(2)
6920 .sr(1)
6921 .m(m)
6922 .n(n)
6923 .k(k)
6924 .iterations(1)
6925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6926 }
6927 }
6928 }
6929 }
6930
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4)6931 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
6932 TEST_REQUIRES_X86_XOP;
6933 for (uint32_t n = 5; n < 8; n++) {
6934 for (size_t k = 1; k <= 40; k += 9) {
6935 GemmMicrokernelTester()
6936 .mr(1)
6937 .nr(4)
6938 .kr(2)
6939 .sr(1)
6940 .m(1)
6941 .n(n)
6942 .k(k)
6943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6944 }
6945 }
6946 }
6947
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_cn)6948 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
6949 TEST_REQUIRES_X86_XOP;
6950 for (uint32_t n = 5; n < 8; n++) {
6951 for (size_t k = 1; k <= 40; k += 9) {
6952 GemmMicrokernelTester()
6953 .mr(1)
6954 .nr(4)
6955 .kr(2)
6956 .sr(1)
6957 .m(1)
6958 .n(n)
6959 .k(k)
6960 .cn_stride(7)
6961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6962 }
6963 }
6964 }
6965
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_subtile)6966 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
6967 TEST_REQUIRES_X86_XOP;
6968 for (uint32_t n = 5; n < 8; n++) {
6969 for (size_t k = 1; k <= 40; k += 9) {
6970 for (uint32_t m = 1; m <= 1; m++) {
6971 GemmMicrokernelTester()
6972 .mr(1)
6973 .nr(4)
6974 .kr(2)
6975 .sr(1)
6976 .m(m)
6977 .n(n)
6978 .k(k)
6979 .iterations(1)
6980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6981 }
6982 }
6983 }
6984 }
6985
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4)6986 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
6987 TEST_REQUIRES_X86_XOP;
6988 for (uint32_t n = 8; n <= 12; n += 4) {
6989 for (size_t k = 1; k <= 40; k += 9) {
6990 GemmMicrokernelTester()
6991 .mr(1)
6992 .nr(4)
6993 .kr(2)
6994 .sr(1)
6995 .m(1)
6996 .n(n)
6997 .k(k)
6998 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_cn)7003 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
7004 TEST_REQUIRES_X86_XOP;
7005 for (uint32_t n = 8; n <= 12; n += 4) {
7006 for (size_t k = 1; k <= 40; k += 9) {
7007 GemmMicrokernelTester()
7008 .mr(1)
7009 .nr(4)
7010 .kr(2)
7011 .sr(1)
7012 .m(1)
7013 .n(n)
7014 .k(k)
7015 .cn_stride(7)
7016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7017 }
7018 }
7019 }
7020
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_subtile)7021 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
7022 TEST_REQUIRES_X86_XOP;
7023 for (uint32_t n = 8; n <= 12; n += 4) {
7024 for (size_t k = 1; k <= 40; k += 9) {
7025 for (uint32_t m = 1; m <= 1; m++) {
7026 GemmMicrokernelTester()
7027 .mr(1)
7028 .nr(4)
7029 .kr(2)
7030 .sr(1)
7031 .m(m)
7032 .n(n)
7033 .k(k)
7034 .iterations(1)
7035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7036 }
7037 }
7038 }
7039 }
7040
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel)7041 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel) {
7042 TEST_REQUIRES_X86_XOP;
7043 for (size_t k = 1; k <= 40; k += 9) {
7044 GemmMicrokernelTester()
7045 .mr(1)
7046 .nr(4)
7047 .kr(2)
7048 .sr(1)
7049 .m(1)
7050 .n(4)
7051 .k(k)
7052 .ks(3)
7053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7054 }
7055 }
7056
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel_subtile)7057 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel_subtile) {
7058 TEST_REQUIRES_X86_XOP;
7059 for (size_t k = 1; k <= 40; k += 9) {
7060 for (uint32_t n = 1; n <= 4; n++) {
7061 for (uint32_t m = 1; m <= 1; m++) {
7062 GemmMicrokernelTester()
7063 .mr(1)
7064 .nr(4)
7065 .kr(2)
7066 .sr(1)
7067 .m(m)
7068 .n(n)
7069 .k(k)
7070 .ks(3)
7071 .iterations(1)
7072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7073 }
7074 }
7075 }
7076 }
7077
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_small_kernel)7078 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
7079 TEST_REQUIRES_X86_XOP;
7080 for (uint32_t n = 5; n < 8; n++) {
7081 for (size_t k = 1; k <= 40; k += 9) {
7082 GemmMicrokernelTester()
7083 .mr(1)
7084 .nr(4)
7085 .kr(2)
7086 .sr(1)
7087 .m(1)
7088 .n(n)
7089 .k(k)
7090 .ks(3)
7091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7092 }
7093 }
7094 }
7095
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_small_kernel)7096 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_small_kernel) {
7097 TEST_REQUIRES_X86_XOP;
7098 for (uint32_t n = 8; n <= 12; n += 4) {
7099 for (size_t k = 1; k <= 40; k += 9) {
7100 GemmMicrokernelTester()
7101 .mr(1)
7102 .nr(4)
7103 .kr(2)
7104 .sr(1)
7105 .m(1)
7106 .n(n)
7107 .k(k)
7108 .ks(3)
7109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7110 }
7111 }
7112 }
7113
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm_subtile)7114 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
7115 TEST_REQUIRES_X86_XOP;
7116 for (size_t k = 1; k <= 40; k += 9) {
7117 for (uint32_t n = 1; n <= 4; n++) {
7118 for (uint32_t m = 1; m <= 1; m++) {
7119 GemmMicrokernelTester()
7120 .mr(1)
7121 .nr(4)
7122 .kr(2)
7123 .sr(1)
7124 .m(m)
7125 .n(n)
7126 .k(k)
7127 .cm_stride(7)
7128 .iterations(1)
7129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7130 }
7131 }
7132 }
7133 }
7134
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,a_offset)7135 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, a_offset) {
7136 TEST_REQUIRES_X86_XOP;
7137 for (size_t k = 1; k <= 40; k += 9) {
7138 GemmMicrokernelTester()
7139 .mr(1)
7140 .nr(4)
7141 .kr(2)
7142 .sr(1)
7143 .m(1)
7144 .n(4)
7145 .k(k)
7146 .ks(3)
7147 .a_offset(43)
7148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7149 }
7150 }
7151
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,zero)7152 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, zero) {
7153 TEST_REQUIRES_X86_XOP;
7154 for (size_t k = 1; k <= 40; k += 9) {
7155 for (uint32_t mz = 0; mz < 1; mz++) {
7156 GemmMicrokernelTester()
7157 .mr(1)
7158 .nr(4)
7159 .kr(2)
7160 .sr(1)
7161 .m(1)
7162 .n(4)
7163 .k(k)
7164 .ks(3)
7165 .a_offset(43)
7166 .zero_index(mz)
7167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7168 }
7169 }
7170 }
7171
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmin)7172 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
7173 TEST_REQUIRES_X86_XOP;
7174 GemmMicrokernelTester()
7175 .mr(1)
7176 .nr(4)
7177 .kr(2)
7178 .sr(1)
7179 .m(1)
7180 .n(4)
7181 .k(8)
7182 .qmin(128)
7183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7184 }
7185
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmax)7186 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
7187 TEST_REQUIRES_X86_XOP;
7188 GemmMicrokernelTester()
7189 .mr(1)
7190 .nr(4)
7191 .kr(2)
7192 .sr(1)
7193 .m(1)
7194 .n(4)
7195 .k(8)
7196 .qmax(128)
7197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7198 }
7199
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm)7200 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
7201 TEST_REQUIRES_X86_XOP;
7202 GemmMicrokernelTester()
7203 .mr(1)
7204 .nr(4)
7205 .kr(2)
7206 .sr(1)
7207 .m(1)
7208 .n(4)
7209 .k(8)
7210 .cm_stride(7)
7211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7212 }
7213
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_a_zero_point)7214 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_a_zero_point) {
7215 TEST_REQUIRES_X86_XOP;
7216 for (size_t k = 1; k <= 40; k += 9) {
7217 GemmMicrokernelTester()
7218 .mr(1)
7219 .nr(4)
7220 .kr(2)
7221 .sr(1)
7222 .m(1)
7223 .n(4)
7224 .k(k)
7225 .a_zero_point(0)
7226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7227 }
7228 }
7229
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_b_zero_point)7230 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_b_zero_point) {
7231 TEST_REQUIRES_X86_XOP;
7232 for (size_t k = 1; k <= 40; k += 9) {
7233 GemmMicrokernelTester()
7234 .mr(1)
7235 .nr(4)
7236 .kr(2)
7237 .sr(1)
7238 .m(1)
7239 .n(4)
7240 .k(k)
7241 .b_zero_point(0)
7242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7243 }
7244 }
7245
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_zero_point)7246 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_zero_point) {
7247 TEST_REQUIRES_X86_XOP;
7248 for (size_t k = 1; k <= 40; k += 9) {
7249 GemmMicrokernelTester()
7250 .mr(1)
7251 .nr(4)
7252 .kr(2)
7253 .sr(1)
7254 .m(1)
7255 .n(4)
7256 .k(k)
7257 .a_zero_point(0)
7258 .b_zero_point(0)
7259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7260 }
7261 }
7262 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7263
7264
7265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)7266 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
7267 TEST_REQUIRES_X86_SSE2;
7268 GemmMicrokernelTester()
7269 .mr(1)
7270 .nr(4)
7271 .kr(2)
7272 .sr(1)
7273 .m(1)
7274 .n(4)
7275 .k(8)
7276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7277 }
7278
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)7279 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
7280 TEST_REQUIRES_X86_SSE2;
7281 GemmMicrokernelTester()
7282 .mr(1)
7283 .nr(4)
7284 .kr(2)
7285 .sr(1)
7286 .m(1)
7287 .n(4)
7288 .k(8)
7289 .cn_stride(7)
7290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7291 }
7292
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)7293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
7294 TEST_REQUIRES_X86_SSE2;
7295 for (uint32_t n = 1; n <= 4; n++) {
7296 for (uint32_t m = 1; m <= 1; m++) {
7297 GemmMicrokernelTester()
7298 .mr(1)
7299 .nr(4)
7300 .kr(2)
7301 .sr(1)
7302 .m(m)
7303 .n(n)
7304 .k(8)
7305 .iterations(1)
7306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7307 }
7308 }
7309 }
7310
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)7311 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7312 TEST_REQUIRES_X86_SSE2;
7313 for (uint32_t m = 1; m <= 1; m++) {
7314 GemmMicrokernelTester()
7315 .mr(1)
7316 .nr(4)
7317 .kr(2)
7318 .sr(1)
7319 .m(m)
7320 .n(4)
7321 .k(8)
7322 .iterations(1)
7323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7324 }
7325 }
7326
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)7327 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7328 TEST_REQUIRES_X86_SSE2;
7329 for (uint32_t n = 1; n <= 4; n++) {
7330 GemmMicrokernelTester()
7331 .mr(1)
7332 .nr(4)
7333 .kr(2)
7334 .sr(1)
7335 .m(1)
7336 .n(n)
7337 .k(8)
7338 .iterations(1)
7339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7340 }
7341 }
7342
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)7343 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
7344 TEST_REQUIRES_X86_SSE2;
7345 for (size_t k = 1; k < 8; k++) {
7346 GemmMicrokernelTester()
7347 .mr(1)
7348 .nr(4)
7349 .kr(2)
7350 .sr(1)
7351 .m(1)
7352 .n(4)
7353 .k(k)
7354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7355 }
7356 }
7357
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)7358 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
7359 TEST_REQUIRES_X86_SSE2;
7360 for (size_t k = 1; k < 8; k++) {
7361 for (uint32_t n = 1; n <= 4; n++) {
7362 for (uint32_t m = 1; m <= 1; m++) {
7363 GemmMicrokernelTester()
7364 .mr(1)
7365 .nr(4)
7366 .kr(2)
7367 .sr(1)
7368 .m(m)
7369 .n(n)
7370 .k(k)
7371 .iterations(1)
7372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7373 }
7374 }
7375 }
7376 }
7377
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)7378 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
7379 TEST_REQUIRES_X86_SSE2;
7380 for (size_t k = 9; k < 16; k++) {
7381 GemmMicrokernelTester()
7382 .mr(1)
7383 .nr(4)
7384 .kr(2)
7385 .sr(1)
7386 .m(1)
7387 .n(4)
7388 .k(k)
7389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7390 }
7391 }
7392
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)7393 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
7394 TEST_REQUIRES_X86_SSE2;
7395 for (size_t k = 9; k < 16; k++) {
7396 for (uint32_t n = 1; n <= 4; n++) {
7397 for (uint32_t m = 1; m <= 1; m++) {
7398 GemmMicrokernelTester()
7399 .mr(1)
7400 .nr(4)
7401 .kr(2)
7402 .sr(1)
7403 .m(m)
7404 .n(n)
7405 .k(k)
7406 .iterations(1)
7407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7408 }
7409 }
7410 }
7411 }
7412
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)7413 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
7414 TEST_REQUIRES_X86_SSE2;
7415 for (size_t k = 16; k <= 80; k += 8) {
7416 GemmMicrokernelTester()
7417 .mr(1)
7418 .nr(4)
7419 .kr(2)
7420 .sr(1)
7421 .m(1)
7422 .n(4)
7423 .k(k)
7424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7425 }
7426 }
7427
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)7428 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
7429 TEST_REQUIRES_X86_SSE2;
7430 for (size_t k = 16; k <= 80; k += 8) {
7431 for (uint32_t n = 1; n <= 4; n++) {
7432 for (uint32_t m = 1; m <= 1; m++) {
7433 GemmMicrokernelTester()
7434 .mr(1)
7435 .nr(4)
7436 .kr(2)
7437 .sr(1)
7438 .m(m)
7439 .n(n)
7440 .k(k)
7441 .iterations(1)
7442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7443 }
7444 }
7445 }
7446 }
7447
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)7448 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
7449 TEST_REQUIRES_X86_SSE2;
7450 for (uint32_t n = 5; n < 8; n++) {
7451 for (size_t k = 1; k <= 40; k += 9) {
7452 GemmMicrokernelTester()
7453 .mr(1)
7454 .nr(4)
7455 .kr(2)
7456 .sr(1)
7457 .m(1)
7458 .n(n)
7459 .k(k)
7460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7461 }
7462 }
7463 }
7464
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)7465 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7466 TEST_REQUIRES_X86_SSE2;
7467 for (uint32_t n = 5; n < 8; n++) {
7468 for (size_t k = 1; k <= 40; k += 9) {
7469 GemmMicrokernelTester()
7470 .mr(1)
7471 .nr(4)
7472 .kr(2)
7473 .sr(1)
7474 .m(1)
7475 .n(n)
7476 .k(k)
7477 .cn_stride(7)
7478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7479 }
7480 }
7481 }
7482
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)7483 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
7484 TEST_REQUIRES_X86_SSE2;
7485 for (uint32_t n = 5; n < 8; n++) {
7486 for (size_t k = 1; k <= 40; k += 9) {
7487 for (uint32_t m = 1; m <= 1; m++) {
7488 GemmMicrokernelTester()
7489 .mr(1)
7490 .nr(4)
7491 .kr(2)
7492 .sr(1)
7493 .m(m)
7494 .n(n)
7495 .k(k)
7496 .iterations(1)
7497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7498 }
7499 }
7500 }
7501 }
7502
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)7503 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
7504 TEST_REQUIRES_X86_SSE2;
7505 for (uint32_t n = 8; n <= 12; n += 4) {
7506 for (size_t k = 1; k <= 40; k += 9) {
7507 GemmMicrokernelTester()
7508 .mr(1)
7509 .nr(4)
7510 .kr(2)
7511 .sr(1)
7512 .m(1)
7513 .n(n)
7514 .k(k)
7515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7516 }
7517 }
7518 }
7519
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)7520 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
7521 TEST_REQUIRES_X86_SSE2;
7522 for (uint32_t n = 8; n <= 12; n += 4) {
7523 for (size_t k = 1; k <= 40; k += 9) {
7524 GemmMicrokernelTester()
7525 .mr(1)
7526 .nr(4)
7527 .kr(2)
7528 .sr(1)
7529 .m(1)
7530 .n(n)
7531 .k(k)
7532 .cn_stride(7)
7533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7534 }
7535 }
7536 }
7537
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)7538 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
7539 TEST_REQUIRES_X86_SSE2;
7540 for (uint32_t n = 8; n <= 12; n += 4) {
7541 for (size_t k = 1; k <= 40; k += 9) {
7542 for (uint32_t m = 1; m <= 1; m++) {
7543 GemmMicrokernelTester()
7544 .mr(1)
7545 .nr(4)
7546 .kr(2)
7547 .sr(1)
7548 .m(m)
7549 .n(n)
7550 .k(k)
7551 .iterations(1)
7552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7553 }
7554 }
7555 }
7556 }
7557
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)7558 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
7559 TEST_REQUIRES_X86_SSE2;
7560 for (size_t k = 1; k <= 40; k += 9) {
7561 GemmMicrokernelTester()
7562 .mr(1)
7563 .nr(4)
7564 .kr(2)
7565 .sr(1)
7566 .m(1)
7567 .n(4)
7568 .k(k)
7569 .ks(3)
7570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7571 }
7572 }
7573
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)7574 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
7575 TEST_REQUIRES_X86_SSE2;
7576 for (size_t k = 1; k <= 40; k += 9) {
7577 for (uint32_t n = 1; n <= 4; n++) {
7578 for (uint32_t m = 1; m <= 1; m++) {
7579 GemmMicrokernelTester()
7580 .mr(1)
7581 .nr(4)
7582 .kr(2)
7583 .sr(1)
7584 .m(m)
7585 .n(n)
7586 .k(k)
7587 .ks(3)
7588 .iterations(1)
7589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7590 }
7591 }
7592 }
7593 }
7594
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)7595 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
7596 TEST_REQUIRES_X86_SSE2;
7597 for (uint32_t n = 5; n < 8; n++) {
7598 for (size_t k = 1; k <= 40; k += 9) {
7599 GemmMicrokernelTester()
7600 .mr(1)
7601 .nr(4)
7602 .kr(2)
7603 .sr(1)
7604 .m(1)
7605 .n(n)
7606 .k(k)
7607 .ks(3)
7608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7609 }
7610 }
7611 }
7612
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)7613 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
7614 TEST_REQUIRES_X86_SSE2;
7615 for (uint32_t n = 8; n <= 12; n += 4) {
7616 for (size_t k = 1; k <= 40; k += 9) {
7617 GemmMicrokernelTester()
7618 .mr(1)
7619 .nr(4)
7620 .kr(2)
7621 .sr(1)
7622 .m(1)
7623 .n(n)
7624 .k(k)
7625 .ks(3)
7626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7627 }
7628 }
7629 }
7630
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)7631 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
7632 TEST_REQUIRES_X86_SSE2;
7633 for (size_t k = 1; k <= 40; k += 9) {
7634 for (uint32_t n = 1; n <= 4; n++) {
7635 for (uint32_t m = 1; m <= 1; m++) {
7636 GemmMicrokernelTester()
7637 .mr(1)
7638 .nr(4)
7639 .kr(2)
7640 .sr(1)
7641 .m(m)
7642 .n(n)
7643 .k(k)
7644 .cm_stride(7)
7645 .iterations(1)
7646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7647 }
7648 }
7649 }
7650 }
7651
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)7652 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
7653 TEST_REQUIRES_X86_SSE2;
7654 for (size_t k = 1; k <= 40; k += 9) {
7655 GemmMicrokernelTester()
7656 .mr(1)
7657 .nr(4)
7658 .kr(2)
7659 .sr(1)
7660 .m(1)
7661 .n(4)
7662 .k(k)
7663 .ks(3)
7664 .a_offset(43)
7665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7666 }
7667 }
7668
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)7669 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
7670 TEST_REQUIRES_X86_SSE2;
7671 for (size_t k = 1; k <= 40; k += 9) {
7672 for (uint32_t mz = 0; mz < 1; mz++) {
7673 GemmMicrokernelTester()
7674 .mr(1)
7675 .nr(4)
7676 .kr(2)
7677 .sr(1)
7678 .m(1)
7679 .n(4)
7680 .k(k)
7681 .ks(3)
7682 .a_offset(43)
7683 .zero_index(mz)
7684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7685 }
7686 }
7687 }
7688
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)7689 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
7690 TEST_REQUIRES_X86_SSE2;
7691 GemmMicrokernelTester()
7692 .mr(1)
7693 .nr(4)
7694 .kr(2)
7695 .sr(1)
7696 .m(1)
7697 .n(4)
7698 .k(8)
7699 .qmin(128)
7700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7701 }
7702
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)7703 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
7704 TEST_REQUIRES_X86_SSE2;
7705 GemmMicrokernelTester()
7706 .mr(1)
7707 .nr(4)
7708 .kr(2)
7709 .sr(1)
7710 .m(1)
7711 .n(4)
7712 .k(8)
7713 .qmax(128)
7714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7715 }
7716
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)7717 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
7718 TEST_REQUIRES_X86_SSE2;
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(4)
7722 .kr(2)
7723 .sr(1)
7724 .m(1)
7725 .n(4)
7726 .k(8)
7727 .cm_stride(7)
7728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7729 }
7730
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_a_zero_point)7731 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_a_zero_point) {
7732 TEST_REQUIRES_X86_SSE2;
7733 for (size_t k = 1; k <= 40; k += 9) {
7734 GemmMicrokernelTester()
7735 .mr(1)
7736 .nr(4)
7737 .kr(2)
7738 .sr(1)
7739 .m(1)
7740 .n(4)
7741 .k(k)
7742 .a_zero_point(0)
7743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7744 }
7745 }
7746
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_b_zero_point)7747 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_b_zero_point) {
7748 TEST_REQUIRES_X86_SSE2;
7749 for (size_t k = 1; k <= 40; k += 9) {
7750 GemmMicrokernelTester()
7751 .mr(1)
7752 .nr(4)
7753 .kr(2)
7754 .sr(1)
7755 .m(1)
7756 .n(4)
7757 .k(k)
7758 .b_zero_point(0)
7759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7760 }
7761 }
7762
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_zero_point)7763 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_zero_point) {
7764 TEST_REQUIRES_X86_SSE2;
7765 for (size_t k = 1; k <= 40; k += 9) {
7766 GemmMicrokernelTester()
7767 .mr(1)
7768 .nr(4)
7769 .kr(2)
7770 .sr(1)
7771 .m(1)
7772 .n(4)
7773 .k(k)
7774 .a_zero_point(0)
7775 .b_zero_point(0)
7776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7777 }
7778 }
7779 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7780
7781
7782 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)7783 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
7784 TEST_REQUIRES_X86_SSE41;
7785 GemmMicrokernelTester()
7786 .mr(1)
7787 .nr(4)
7788 .kr(2)
7789 .sr(1)
7790 .m(1)
7791 .n(4)
7792 .k(8)
7793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7794 }
7795
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)7796 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
7797 TEST_REQUIRES_X86_SSE41;
7798 GemmMicrokernelTester()
7799 .mr(1)
7800 .nr(4)
7801 .kr(2)
7802 .sr(1)
7803 .m(1)
7804 .n(4)
7805 .k(8)
7806 .cn_stride(7)
7807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7808 }
7809
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)7810 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
7811 TEST_REQUIRES_X86_SSE41;
7812 for (uint32_t n = 1; n <= 4; n++) {
7813 for (uint32_t m = 1; m <= 1; m++) {
7814 GemmMicrokernelTester()
7815 .mr(1)
7816 .nr(4)
7817 .kr(2)
7818 .sr(1)
7819 .m(m)
7820 .n(n)
7821 .k(8)
7822 .iterations(1)
7823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7824 }
7825 }
7826 }
7827
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)7828 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
7829 TEST_REQUIRES_X86_SSE41;
7830 for (uint32_t m = 1; m <= 1; m++) {
7831 GemmMicrokernelTester()
7832 .mr(1)
7833 .nr(4)
7834 .kr(2)
7835 .sr(1)
7836 .m(m)
7837 .n(4)
7838 .k(8)
7839 .iterations(1)
7840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7841 }
7842 }
7843
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)7844 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
7845 TEST_REQUIRES_X86_SSE41;
7846 for (uint32_t n = 1; n <= 4; n++) {
7847 GemmMicrokernelTester()
7848 .mr(1)
7849 .nr(4)
7850 .kr(2)
7851 .sr(1)
7852 .m(1)
7853 .n(n)
7854 .k(8)
7855 .iterations(1)
7856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7857 }
7858 }
7859
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)7860 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
7861 TEST_REQUIRES_X86_SSE41;
7862 for (size_t k = 1; k < 8; k++) {
7863 GemmMicrokernelTester()
7864 .mr(1)
7865 .nr(4)
7866 .kr(2)
7867 .sr(1)
7868 .m(1)
7869 .n(4)
7870 .k(k)
7871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7872 }
7873 }
7874
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)7875 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
7876 TEST_REQUIRES_X86_SSE41;
7877 for (size_t k = 1; k < 8; k++) {
7878 for (uint32_t n = 1; n <= 4; n++) {
7879 for (uint32_t m = 1; m <= 1; m++) {
7880 GemmMicrokernelTester()
7881 .mr(1)
7882 .nr(4)
7883 .kr(2)
7884 .sr(1)
7885 .m(m)
7886 .n(n)
7887 .k(k)
7888 .iterations(1)
7889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7890 }
7891 }
7892 }
7893 }
7894
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)7895 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
7896 TEST_REQUIRES_X86_SSE41;
7897 for (size_t k = 9; k < 16; k++) {
7898 GemmMicrokernelTester()
7899 .mr(1)
7900 .nr(4)
7901 .kr(2)
7902 .sr(1)
7903 .m(1)
7904 .n(4)
7905 .k(k)
7906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7907 }
7908 }
7909
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)7910 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
7911 TEST_REQUIRES_X86_SSE41;
7912 for (size_t k = 9; k < 16; k++) {
7913 for (uint32_t n = 1; n <= 4; n++) {
7914 for (uint32_t m = 1; m <= 1; m++) {
7915 GemmMicrokernelTester()
7916 .mr(1)
7917 .nr(4)
7918 .kr(2)
7919 .sr(1)
7920 .m(m)
7921 .n(n)
7922 .k(k)
7923 .iterations(1)
7924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7925 }
7926 }
7927 }
7928 }
7929
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)7930 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
7931 TEST_REQUIRES_X86_SSE41;
7932 for (size_t k = 16; k <= 80; k += 8) {
7933 GemmMicrokernelTester()
7934 .mr(1)
7935 .nr(4)
7936 .kr(2)
7937 .sr(1)
7938 .m(1)
7939 .n(4)
7940 .k(k)
7941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7942 }
7943 }
7944
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)7945 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
7946 TEST_REQUIRES_X86_SSE41;
7947 for (size_t k = 16; k <= 80; k += 8) {
7948 for (uint32_t n = 1; n <= 4; n++) {
7949 for (uint32_t m = 1; m <= 1; m++) {
7950 GemmMicrokernelTester()
7951 .mr(1)
7952 .nr(4)
7953 .kr(2)
7954 .sr(1)
7955 .m(m)
7956 .n(n)
7957 .k(k)
7958 .iterations(1)
7959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7960 }
7961 }
7962 }
7963 }
7964
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)7965 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
7966 TEST_REQUIRES_X86_SSE41;
7967 for (uint32_t n = 5; n < 8; n++) {
7968 for (size_t k = 1; k <= 40; k += 9) {
7969 GemmMicrokernelTester()
7970 .mr(1)
7971 .nr(4)
7972 .kr(2)
7973 .sr(1)
7974 .m(1)
7975 .n(n)
7976 .k(k)
7977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7978 }
7979 }
7980 }
7981
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)7982 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
7983 TEST_REQUIRES_X86_SSE41;
7984 for (uint32_t n = 5; n < 8; n++) {
7985 for (size_t k = 1; k <= 40; k += 9) {
7986 GemmMicrokernelTester()
7987 .mr(1)
7988 .nr(4)
7989 .kr(2)
7990 .sr(1)
7991 .m(1)
7992 .n(n)
7993 .k(k)
7994 .cn_stride(7)
7995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7996 }
7997 }
7998 }
7999
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)8000 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
8001 TEST_REQUIRES_X86_SSE41;
8002 for (uint32_t n = 5; n < 8; n++) {
8003 for (size_t k = 1; k <= 40; k += 9) {
8004 for (uint32_t m = 1; m <= 1; m++) {
8005 GemmMicrokernelTester()
8006 .mr(1)
8007 .nr(4)
8008 .kr(2)
8009 .sr(1)
8010 .m(m)
8011 .n(n)
8012 .k(k)
8013 .iterations(1)
8014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8015 }
8016 }
8017 }
8018 }
8019
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)8020 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
8021 TEST_REQUIRES_X86_SSE41;
8022 for (uint32_t n = 8; n <= 12; n += 4) {
8023 for (size_t k = 1; k <= 40; k += 9) {
8024 GemmMicrokernelTester()
8025 .mr(1)
8026 .nr(4)
8027 .kr(2)
8028 .sr(1)
8029 .m(1)
8030 .n(n)
8031 .k(k)
8032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8033 }
8034 }
8035 }
8036
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)8037 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
8038 TEST_REQUIRES_X86_SSE41;
8039 for (uint32_t n = 8; n <= 12; n += 4) {
8040 for (size_t k = 1; k <= 40; k += 9) {
8041 GemmMicrokernelTester()
8042 .mr(1)
8043 .nr(4)
8044 .kr(2)
8045 .sr(1)
8046 .m(1)
8047 .n(n)
8048 .k(k)
8049 .cn_stride(7)
8050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8051 }
8052 }
8053 }
8054
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)8055 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
8056 TEST_REQUIRES_X86_SSE41;
8057 for (uint32_t n = 8; n <= 12; n += 4) {
8058 for (size_t k = 1; k <= 40; k += 9) {
8059 for (uint32_t m = 1; m <= 1; m++) {
8060 GemmMicrokernelTester()
8061 .mr(1)
8062 .nr(4)
8063 .kr(2)
8064 .sr(1)
8065 .m(m)
8066 .n(n)
8067 .k(k)
8068 .iterations(1)
8069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8070 }
8071 }
8072 }
8073 }
8074
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel)8075 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
8076 TEST_REQUIRES_X86_SSE41;
8077 for (size_t k = 1; k <= 40; k += 9) {
8078 GemmMicrokernelTester()
8079 .mr(1)
8080 .nr(4)
8081 .kr(2)
8082 .sr(1)
8083 .m(1)
8084 .n(4)
8085 .k(k)
8086 .ks(3)
8087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8088 }
8089 }
8090
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel_subtile)8091 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
8092 TEST_REQUIRES_X86_SSE41;
8093 for (size_t k = 1; k <= 40; k += 9) {
8094 for (uint32_t n = 1; n <= 4; n++) {
8095 for (uint32_t m = 1; m <= 1; m++) {
8096 GemmMicrokernelTester()
8097 .mr(1)
8098 .nr(4)
8099 .kr(2)
8100 .sr(1)
8101 .m(m)
8102 .n(n)
8103 .k(k)
8104 .ks(3)
8105 .iterations(1)
8106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8107 }
8108 }
8109 }
8110 }
8111
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_small_kernel)8112 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
8113 TEST_REQUIRES_X86_SSE41;
8114 for (uint32_t n = 5; n < 8; n++) {
8115 for (size_t k = 1; k <= 40; k += 9) {
8116 GemmMicrokernelTester()
8117 .mr(1)
8118 .nr(4)
8119 .kr(2)
8120 .sr(1)
8121 .m(1)
8122 .n(n)
8123 .k(k)
8124 .ks(3)
8125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8126 }
8127 }
8128 }
8129
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_small_kernel)8130 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
8131 TEST_REQUIRES_X86_SSE41;
8132 for (uint32_t n = 8; n <= 12; n += 4) {
8133 for (size_t k = 1; k <= 40; k += 9) {
8134 GemmMicrokernelTester()
8135 .mr(1)
8136 .nr(4)
8137 .kr(2)
8138 .sr(1)
8139 .m(1)
8140 .n(n)
8141 .k(k)
8142 .ks(3)
8143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8144 }
8145 }
8146 }
8147
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)8148 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
8149 TEST_REQUIRES_X86_SSE41;
8150 for (size_t k = 1; k <= 40; k += 9) {
8151 for (uint32_t n = 1; n <= 4; n++) {
8152 for (uint32_t m = 1; m <= 1; m++) {
8153 GemmMicrokernelTester()
8154 .mr(1)
8155 .nr(4)
8156 .kr(2)
8157 .sr(1)
8158 .m(m)
8159 .n(n)
8160 .k(k)
8161 .cm_stride(7)
8162 .iterations(1)
8163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8164 }
8165 }
8166 }
8167 }
8168
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,a_offset)8169 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
8170 TEST_REQUIRES_X86_SSE41;
8171 for (size_t k = 1; k <= 40; k += 9) {
8172 GemmMicrokernelTester()
8173 .mr(1)
8174 .nr(4)
8175 .kr(2)
8176 .sr(1)
8177 .m(1)
8178 .n(4)
8179 .k(k)
8180 .ks(3)
8181 .a_offset(43)
8182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8183 }
8184 }
8185
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,zero)8186 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
8187 TEST_REQUIRES_X86_SSE41;
8188 for (size_t k = 1; k <= 40; k += 9) {
8189 for (uint32_t mz = 0; mz < 1; mz++) {
8190 GemmMicrokernelTester()
8191 .mr(1)
8192 .nr(4)
8193 .kr(2)
8194 .sr(1)
8195 .m(1)
8196 .n(4)
8197 .k(k)
8198 .ks(3)
8199 .a_offset(43)
8200 .zero_index(mz)
8201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8202 }
8203 }
8204 }
8205
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)8206 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
8207 TEST_REQUIRES_X86_SSE41;
8208 GemmMicrokernelTester()
8209 .mr(1)
8210 .nr(4)
8211 .kr(2)
8212 .sr(1)
8213 .m(1)
8214 .n(4)
8215 .k(8)
8216 .qmin(128)
8217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8218 }
8219
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)8220 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
8221 TEST_REQUIRES_X86_SSE41;
8222 GemmMicrokernelTester()
8223 .mr(1)
8224 .nr(4)
8225 .kr(2)
8226 .sr(1)
8227 .m(1)
8228 .n(4)
8229 .k(8)
8230 .qmax(128)
8231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8232 }
8233
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)8234 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
8235 TEST_REQUIRES_X86_SSE41;
8236 GemmMicrokernelTester()
8237 .mr(1)
8238 .nr(4)
8239 .kr(2)
8240 .sr(1)
8241 .m(1)
8242 .n(4)
8243 .k(8)
8244 .cm_stride(7)
8245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8246 }
8247
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_a_zero_point)8248 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_a_zero_point) {
8249 TEST_REQUIRES_X86_SSE41;
8250 for (size_t k = 1; k <= 40; k += 9) {
8251 GemmMicrokernelTester()
8252 .mr(1)
8253 .nr(4)
8254 .kr(2)
8255 .sr(1)
8256 .m(1)
8257 .n(4)
8258 .k(k)
8259 .a_zero_point(0)
8260 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8261 }
8262 }
8263
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_b_zero_point)8264 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_b_zero_point) {
8265 TEST_REQUIRES_X86_SSE41;
8266 for (size_t k = 1; k <= 40; k += 9) {
8267 GemmMicrokernelTester()
8268 .mr(1)
8269 .nr(4)
8270 .kr(2)
8271 .sr(1)
8272 .m(1)
8273 .n(4)
8274 .k(k)
8275 .b_zero_point(0)
8276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8277 }
8278 }
8279
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_zero_point)8280 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_zero_point) {
8281 TEST_REQUIRES_X86_SSE41;
8282 for (size_t k = 1; k <= 40; k += 9) {
8283 GemmMicrokernelTester()
8284 .mr(1)
8285 .nr(4)
8286 .kr(2)
8287 .sr(1)
8288 .m(1)
8289 .n(4)
8290 .k(k)
8291 .a_zero_point(0)
8292 .b_zero_point(0)
8293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8294 }
8295 }
8296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8297
8298
8299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)8300 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
8301 TEST_REQUIRES_X86_SSE2;
8302 GemmMicrokernelTester()
8303 .mr(2)
8304 .nr(4)
8305 .kr(2)
8306 .sr(1)
8307 .m(2)
8308 .n(4)
8309 .k(8)
8310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8311 }
8312
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)8313 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
8314 TEST_REQUIRES_X86_SSE2;
8315 GemmMicrokernelTester()
8316 .mr(2)
8317 .nr(4)
8318 .kr(2)
8319 .sr(1)
8320 .m(2)
8321 .n(4)
8322 .k(8)
8323 .cn_stride(7)
8324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8325 }
8326
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)8327 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
8328 TEST_REQUIRES_X86_SSE2;
8329 for (uint32_t n = 1; n <= 4; n++) {
8330 for (uint32_t m = 1; m <= 2; m++) {
8331 GemmMicrokernelTester()
8332 .mr(2)
8333 .nr(4)
8334 .kr(2)
8335 .sr(1)
8336 .m(m)
8337 .n(n)
8338 .k(8)
8339 .iterations(1)
8340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8341 }
8342 }
8343 }
8344
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)8345 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
8346 TEST_REQUIRES_X86_SSE2;
8347 for (uint32_t m = 1; m <= 2; m++) {
8348 GemmMicrokernelTester()
8349 .mr(2)
8350 .nr(4)
8351 .kr(2)
8352 .sr(1)
8353 .m(m)
8354 .n(4)
8355 .k(8)
8356 .iterations(1)
8357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8358 }
8359 }
8360
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)8361 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
8362 TEST_REQUIRES_X86_SSE2;
8363 for (uint32_t n = 1; n <= 4; n++) {
8364 GemmMicrokernelTester()
8365 .mr(2)
8366 .nr(4)
8367 .kr(2)
8368 .sr(1)
8369 .m(2)
8370 .n(n)
8371 .k(8)
8372 .iterations(1)
8373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8374 }
8375 }
8376
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)8377 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
8378 TEST_REQUIRES_X86_SSE2;
8379 for (size_t k = 1; k < 8; k++) {
8380 GemmMicrokernelTester()
8381 .mr(2)
8382 .nr(4)
8383 .kr(2)
8384 .sr(1)
8385 .m(2)
8386 .n(4)
8387 .k(k)
8388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8389 }
8390 }
8391
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)8392 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
8393 TEST_REQUIRES_X86_SSE2;
8394 for (size_t k = 1; k < 8; k++) {
8395 for (uint32_t n = 1; n <= 4; n++) {
8396 for (uint32_t m = 1; m <= 2; m++) {
8397 GemmMicrokernelTester()
8398 .mr(2)
8399 .nr(4)
8400 .kr(2)
8401 .sr(1)
8402 .m(m)
8403 .n(n)
8404 .k(k)
8405 .iterations(1)
8406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8407 }
8408 }
8409 }
8410 }
8411
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)8412 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
8413 TEST_REQUIRES_X86_SSE2;
8414 for (size_t k = 9; k < 16; k++) {
8415 GemmMicrokernelTester()
8416 .mr(2)
8417 .nr(4)
8418 .kr(2)
8419 .sr(1)
8420 .m(2)
8421 .n(4)
8422 .k(k)
8423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8424 }
8425 }
8426
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)8427 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
8428 TEST_REQUIRES_X86_SSE2;
8429 for (size_t k = 9; k < 16; k++) {
8430 for (uint32_t n = 1; n <= 4; n++) {
8431 for (uint32_t m = 1; m <= 2; m++) {
8432 GemmMicrokernelTester()
8433 .mr(2)
8434 .nr(4)
8435 .kr(2)
8436 .sr(1)
8437 .m(m)
8438 .n(n)
8439 .k(k)
8440 .iterations(1)
8441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8442 }
8443 }
8444 }
8445 }
8446
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)8447 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
8448 TEST_REQUIRES_X86_SSE2;
8449 for (size_t k = 16; k <= 80; k += 8) {
8450 GemmMicrokernelTester()
8451 .mr(2)
8452 .nr(4)
8453 .kr(2)
8454 .sr(1)
8455 .m(2)
8456 .n(4)
8457 .k(k)
8458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8459 }
8460 }
8461
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)8462 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
8463 TEST_REQUIRES_X86_SSE2;
8464 for (size_t k = 16; k <= 80; k += 8) {
8465 for (uint32_t n = 1; n <= 4; n++) {
8466 for (uint32_t m = 1; m <= 2; m++) {
8467 GemmMicrokernelTester()
8468 .mr(2)
8469 .nr(4)
8470 .kr(2)
8471 .sr(1)
8472 .m(m)
8473 .n(n)
8474 .k(k)
8475 .iterations(1)
8476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8477 }
8478 }
8479 }
8480 }
8481
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)8482 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
8483 TEST_REQUIRES_X86_SSE2;
8484 for (uint32_t n = 5; n < 8; n++) {
8485 for (size_t k = 1; k <= 40; k += 9) {
8486 GemmMicrokernelTester()
8487 .mr(2)
8488 .nr(4)
8489 .kr(2)
8490 .sr(1)
8491 .m(2)
8492 .n(n)
8493 .k(k)
8494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8495 }
8496 }
8497 }
8498
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)8499 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
8500 TEST_REQUIRES_X86_SSE2;
8501 for (uint32_t n = 5; n < 8; n++) {
8502 for (size_t k = 1; k <= 40; k += 9) {
8503 GemmMicrokernelTester()
8504 .mr(2)
8505 .nr(4)
8506 .kr(2)
8507 .sr(1)
8508 .m(2)
8509 .n(n)
8510 .k(k)
8511 .cn_stride(7)
8512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8513 }
8514 }
8515 }
8516
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)8517 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
8518 TEST_REQUIRES_X86_SSE2;
8519 for (uint32_t n = 5; n < 8; n++) {
8520 for (size_t k = 1; k <= 40; k += 9) {
8521 for (uint32_t m = 1; m <= 2; m++) {
8522 GemmMicrokernelTester()
8523 .mr(2)
8524 .nr(4)
8525 .kr(2)
8526 .sr(1)
8527 .m(m)
8528 .n(n)
8529 .k(k)
8530 .iterations(1)
8531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8532 }
8533 }
8534 }
8535 }
8536
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)8537 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
8538 TEST_REQUIRES_X86_SSE2;
8539 for (uint32_t n = 8; n <= 12; n += 4) {
8540 for (size_t k = 1; k <= 40; k += 9) {
8541 GemmMicrokernelTester()
8542 .mr(2)
8543 .nr(4)
8544 .kr(2)
8545 .sr(1)
8546 .m(2)
8547 .n(n)
8548 .k(k)
8549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8550 }
8551 }
8552 }
8553
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)8554 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
8555 TEST_REQUIRES_X86_SSE2;
8556 for (uint32_t n = 8; n <= 12; n += 4) {
8557 for (size_t k = 1; k <= 40; k += 9) {
8558 GemmMicrokernelTester()
8559 .mr(2)
8560 .nr(4)
8561 .kr(2)
8562 .sr(1)
8563 .m(2)
8564 .n(n)
8565 .k(k)
8566 .cn_stride(7)
8567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8568 }
8569 }
8570 }
8571
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)8572 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
8573 TEST_REQUIRES_X86_SSE2;
8574 for (uint32_t n = 8; n <= 12; n += 4) {
8575 for (size_t k = 1; k <= 40; k += 9) {
8576 for (uint32_t m = 1; m <= 2; m++) {
8577 GemmMicrokernelTester()
8578 .mr(2)
8579 .nr(4)
8580 .kr(2)
8581 .sr(1)
8582 .m(m)
8583 .n(n)
8584 .k(k)
8585 .iterations(1)
8586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8587 }
8588 }
8589 }
8590 }
8591
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel)8592 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
8593 TEST_REQUIRES_X86_SSE2;
8594 for (size_t k = 1; k <= 40; k += 9) {
8595 GemmMicrokernelTester()
8596 .mr(2)
8597 .nr(4)
8598 .kr(2)
8599 .sr(1)
8600 .m(2)
8601 .n(4)
8602 .k(k)
8603 .ks(3)
8604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8605 }
8606 }
8607
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel_subtile)8608 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
8609 TEST_REQUIRES_X86_SSE2;
8610 for (size_t k = 1; k <= 40; k += 9) {
8611 for (uint32_t n = 1; n <= 4; n++) {
8612 for (uint32_t m = 1; m <= 2; m++) {
8613 GemmMicrokernelTester()
8614 .mr(2)
8615 .nr(4)
8616 .kr(2)
8617 .sr(1)
8618 .m(m)
8619 .n(n)
8620 .k(k)
8621 .ks(3)
8622 .iterations(1)
8623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8624 }
8625 }
8626 }
8627 }
8628
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_small_kernel)8629 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
8630 TEST_REQUIRES_X86_SSE2;
8631 for (uint32_t n = 5; n < 8; n++) {
8632 for (size_t k = 1; k <= 40; k += 9) {
8633 GemmMicrokernelTester()
8634 .mr(2)
8635 .nr(4)
8636 .kr(2)
8637 .sr(1)
8638 .m(2)
8639 .n(n)
8640 .k(k)
8641 .ks(3)
8642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8643 }
8644 }
8645 }
8646
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_small_kernel)8647 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
8648 TEST_REQUIRES_X86_SSE2;
8649 for (uint32_t n = 8; n <= 12; n += 4) {
8650 for (size_t k = 1; k <= 40; k += 9) {
8651 GemmMicrokernelTester()
8652 .mr(2)
8653 .nr(4)
8654 .kr(2)
8655 .sr(1)
8656 .m(2)
8657 .n(n)
8658 .k(k)
8659 .ks(3)
8660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8661 }
8662 }
8663 }
8664
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)8665 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
8666 TEST_REQUIRES_X86_SSE2;
8667 for (size_t k = 1; k <= 40; k += 9) {
8668 for (uint32_t n = 1; n <= 4; n++) {
8669 for (uint32_t m = 1; m <= 2; m++) {
8670 GemmMicrokernelTester()
8671 .mr(2)
8672 .nr(4)
8673 .kr(2)
8674 .sr(1)
8675 .m(m)
8676 .n(n)
8677 .k(k)
8678 .cm_stride(7)
8679 .iterations(1)
8680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8681 }
8682 }
8683 }
8684 }
8685
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,a_offset)8686 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
8687 TEST_REQUIRES_X86_SSE2;
8688 for (size_t k = 1; k <= 40; k += 9) {
8689 GemmMicrokernelTester()
8690 .mr(2)
8691 .nr(4)
8692 .kr(2)
8693 .sr(1)
8694 .m(2)
8695 .n(4)
8696 .k(k)
8697 .ks(3)
8698 .a_offset(83)
8699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8700 }
8701 }
8702
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,zero)8703 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
8704 TEST_REQUIRES_X86_SSE2;
8705 for (size_t k = 1; k <= 40; k += 9) {
8706 for (uint32_t mz = 0; mz < 2; mz++) {
8707 GemmMicrokernelTester()
8708 .mr(2)
8709 .nr(4)
8710 .kr(2)
8711 .sr(1)
8712 .m(2)
8713 .n(4)
8714 .k(k)
8715 .ks(3)
8716 .a_offset(83)
8717 .zero_index(mz)
8718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8719 }
8720 }
8721 }
8722
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)8723 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
8724 TEST_REQUIRES_X86_SSE2;
8725 GemmMicrokernelTester()
8726 .mr(2)
8727 .nr(4)
8728 .kr(2)
8729 .sr(1)
8730 .m(2)
8731 .n(4)
8732 .k(8)
8733 .qmin(128)
8734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8735 }
8736
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)8737 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
8738 TEST_REQUIRES_X86_SSE2;
8739 GemmMicrokernelTester()
8740 .mr(2)
8741 .nr(4)
8742 .kr(2)
8743 .sr(1)
8744 .m(2)
8745 .n(4)
8746 .k(8)
8747 .qmax(128)
8748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8749 }
8750
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)8751 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
8752 TEST_REQUIRES_X86_SSE2;
8753 GemmMicrokernelTester()
8754 .mr(2)
8755 .nr(4)
8756 .kr(2)
8757 .sr(1)
8758 .m(2)
8759 .n(4)
8760 .k(8)
8761 .cm_stride(7)
8762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8763 }
8764
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_a_zero_point)8765 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_a_zero_point) {
8766 TEST_REQUIRES_X86_SSE2;
8767 for (size_t k = 1; k <= 40; k += 9) {
8768 GemmMicrokernelTester()
8769 .mr(2)
8770 .nr(4)
8771 .kr(2)
8772 .sr(1)
8773 .m(2)
8774 .n(4)
8775 .k(k)
8776 .a_zero_point(0)
8777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8778 }
8779 }
8780
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_b_zero_point)8781 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_b_zero_point) {
8782 TEST_REQUIRES_X86_SSE2;
8783 for (size_t k = 1; k <= 40; k += 9) {
8784 GemmMicrokernelTester()
8785 .mr(2)
8786 .nr(4)
8787 .kr(2)
8788 .sr(1)
8789 .m(2)
8790 .n(4)
8791 .k(k)
8792 .b_zero_point(0)
8793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8794 }
8795 }
8796
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_zero_point)8797 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_zero_point) {
8798 TEST_REQUIRES_X86_SSE2;
8799 for (size_t k = 1; k <= 40; k += 9) {
8800 GemmMicrokernelTester()
8801 .mr(2)
8802 .nr(4)
8803 .kr(2)
8804 .sr(1)
8805 .m(2)
8806 .n(4)
8807 .k(k)
8808 .a_zero_point(0)
8809 .b_zero_point(0)
8810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8811 }
8812 }
8813 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8814
8815
8816 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)8817 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
8818 TEST_REQUIRES_X86_SSE41;
8819 GemmMicrokernelTester()
8820 .mr(2)
8821 .nr(4)
8822 .kr(2)
8823 .sr(1)
8824 .m(2)
8825 .n(4)
8826 .k(8)
8827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8828 }
8829
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)8830 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
8831 TEST_REQUIRES_X86_SSE41;
8832 GemmMicrokernelTester()
8833 .mr(2)
8834 .nr(4)
8835 .kr(2)
8836 .sr(1)
8837 .m(2)
8838 .n(4)
8839 .k(8)
8840 .cn_stride(7)
8841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8842 }
8843
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)8844 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
8845 TEST_REQUIRES_X86_SSE41;
8846 for (uint32_t n = 1; n <= 4; n++) {
8847 for (uint32_t m = 1; m <= 2; m++) {
8848 GemmMicrokernelTester()
8849 .mr(2)
8850 .nr(4)
8851 .kr(2)
8852 .sr(1)
8853 .m(m)
8854 .n(n)
8855 .k(8)
8856 .iterations(1)
8857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8858 }
8859 }
8860 }
8861
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)8862 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8863 TEST_REQUIRES_X86_SSE41;
8864 for (uint32_t m = 1; m <= 2; m++) {
8865 GemmMicrokernelTester()
8866 .mr(2)
8867 .nr(4)
8868 .kr(2)
8869 .sr(1)
8870 .m(m)
8871 .n(4)
8872 .k(8)
8873 .iterations(1)
8874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8875 }
8876 }
8877
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)8878 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8879 TEST_REQUIRES_X86_SSE41;
8880 for (uint32_t n = 1; n <= 4; n++) {
8881 GemmMicrokernelTester()
8882 .mr(2)
8883 .nr(4)
8884 .kr(2)
8885 .sr(1)
8886 .m(2)
8887 .n(n)
8888 .k(8)
8889 .iterations(1)
8890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8891 }
8892 }
8893
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)8894 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
8895 TEST_REQUIRES_X86_SSE41;
8896 for (size_t k = 1; k < 8; k++) {
8897 GemmMicrokernelTester()
8898 .mr(2)
8899 .nr(4)
8900 .kr(2)
8901 .sr(1)
8902 .m(2)
8903 .n(4)
8904 .k(k)
8905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8906 }
8907 }
8908
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)8909 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
8910 TEST_REQUIRES_X86_SSE41;
8911 for (size_t k = 1; k < 8; k++) {
8912 for (uint32_t n = 1; n <= 4; n++) {
8913 for (uint32_t m = 1; m <= 2; m++) {
8914 GemmMicrokernelTester()
8915 .mr(2)
8916 .nr(4)
8917 .kr(2)
8918 .sr(1)
8919 .m(m)
8920 .n(n)
8921 .k(k)
8922 .iterations(1)
8923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8924 }
8925 }
8926 }
8927 }
8928
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)8929 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
8930 TEST_REQUIRES_X86_SSE41;
8931 for (size_t k = 9; k < 16; k++) {
8932 GemmMicrokernelTester()
8933 .mr(2)
8934 .nr(4)
8935 .kr(2)
8936 .sr(1)
8937 .m(2)
8938 .n(4)
8939 .k(k)
8940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8941 }
8942 }
8943
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)8944 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
8945 TEST_REQUIRES_X86_SSE41;
8946 for (size_t k = 9; k < 16; k++) {
8947 for (uint32_t n = 1; n <= 4; n++) {
8948 for (uint32_t m = 1; m <= 2; m++) {
8949 GemmMicrokernelTester()
8950 .mr(2)
8951 .nr(4)
8952 .kr(2)
8953 .sr(1)
8954 .m(m)
8955 .n(n)
8956 .k(k)
8957 .iterations(1)
8958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8959 }
8960 }
8961 }
8962 }
8963
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)8964 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
8965 TEST_REQUIRES_X86_SSE41;
8966 for (size_t k = 16; k <= 80; k += 8) {
8967 GemmMicrokernelTester()
8968 .mr(2)
8969 .nr(4)
8970 .kr(2)
8971 .sr(1)
8972 .m(2)
8973 .n(4)
8974 .k(k)
8975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8976 }
8977 }
8978
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)8979 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
8980 TEST_REQUIRES_X86_SSE41;
8981 for (size_t k = 16; k <= 80; k += 8) {
8982 for (uint32_t n = 1; n <= 4; n++) {
8983 for (uint32_t m = 1; m <= 2; m++) {
8984 GemmMicrokernelTester()
8985 .mr(2)
8986 .nr(4)
8987 .kr(2)
8988 .sr(1)
8989 .m(m)
8990 .n(n)
8991 .k(k)
8992 .iterations(1)
8993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8994 }
8995 }
8996 }
8997 }
8998
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)8999 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
9000 TEST_REQUIRES_X86_SSE41;
9001 for (uint32_t n = 5; n < 8; n++) {
9002 for (size_t k = 1; k <= 40; k += 9) {
9003 GemmMicrokernelTester()
9004 .mr(2)
9005 .nr(4)
9006 .kr(2)
9007 .sr(1)
9008 .m(2)
9009 .n(n)
9010 .k(k)
9011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9012 }
9013 }
9014 }
9015
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)9016 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9017 TEST_REQUIRES_X86_SSE41;
9018 for (uint32_t n = 5; n < 8; n++) {
9019 for (size_t k = 1; k <= 40; k += 9) {
9020 GemmMicrokernelTester()
9021 .mr(2)
9022 .nr(4)
9023 .kr(2)
9024 .sr(1)
9025 .m(2)
9026 .n(n)
9027 .k(k)
9028 .cn_stride(7)
9029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9030 }
9031 }
9032 }
9033
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)9034 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
9035 TEST_REQUIRES_X86_SSE41;
9036 for (uint32_t n = 5; n < 8; n++) {
9037 for (size_t k = 1; k <= 40; k += 9) {
9038 for (uint32_t m = 1; m <= 2; m++) {
9039 GemmMicrokernelTester()
9040 .mr(2)
9041 .nr(4)
9042 .kr(2)
9043 .sr(1)
9044 .m(m)
9045 .n(n)
9046 .k(k)
9047 .iterations(1)
9048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9049 }
9050 }
9051 }
9052 }
9053
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)9054 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
9055 TEST_REQUIRES_X86_SSE41;
9056 for (uint32_t n = 8; n <= 12; n += 4) {
9057 for (size_t k = 1; k <= 40; k += 9) {
9058 GemmMicrokernelTester()
9059 .mr(2)
9060 .nr(4)
9061 .kr(2)
9062 .sr(1)
9063 .m(2)
9064 .n(n)
9065 .k(k)
9066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9067 }
9068 }
9069 }
9070
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)9071 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
9072 TEST_REQUIRES_X86_SSE41;
9073 for (uint32_t n = 8; n <= 12; n += 4) {
9074 for (size_t k = 1; k <= 40; k += 9) {
9075 GemmMicrokernelTester()
9076 .mr(2)
9077 .nr(4)
9078 .kr(2)
9079 .sr(1)
9080 .m(2)
9081 .n(n)
9082 .k(k)
9083 .cn_stride(7)
9084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9085 }
9086 }
9087 }
9088
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)9089 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
9090 TEST_REQUIRES_X86_SSE41;
9091 for (uint32_t n = 8; n <= 12; n += 4) {
9092 for (size_t k = 1; k <= 40; k += 9) {
9093 for (uint32_t m = 1; m <= 2; m++) {
9094 GemmMicrokernelTester()
9095 .mr(2)
9096 .nr(4)
9097 .kr(2)
9098 .sr(1)
9099 .m(m)
9100 .n(n)
9101 .k(k)
9102 .iterations(1)
9103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9104 }
9105 }
9106 }
9107 }
9108
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel)9109 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
9110 TEST_REQUIRES_X86_SSE41;
9111 for (size_t k = 1; k <= 40; k += 9) {
9112 GemmMicrokernelTester()
9113 .mr(2)
9114 .nr(4)
9115 .kr(2)
9116 .sr(1)
9117 .m(2)
9118 .n(4)
9119 .k(k)
9120 .ks(3)
9121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9122 }
9123 }
9124
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel_subtile)9125 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
9126 TEST_REQUIRES_X86_SSE41;
9127 for (size_t k = 1; k <= 40; k += 9) {
9128 for (uint32_t n = 1; n <= 4; n++) {
9129 for (uint32_t m = 1; m <= 2; m++) {
9130 GemmMicrokernelTester()
9131 .mr(2)
9132 .nr(4)
9133 .kr(2)
9134 .sr(1)
9135 .m(m)
9136 .n(n)
9137 .k(k)
9138 .ks(3)
9139 .iterations(1)
9140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9141 }
9142 }
9143 }
9144 }
9145
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_small_kernel)9146 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
9147 TEST_REQUIRES_X86_SSE41;
9148 for (uint32_t n = 5; n < 8; n++) {
9149 for (size_t k = 1; k <= 40; k += 9) {
9150 GemmMicrokernelTester()
9151 .mr(2)
9152 .nr(4)
9153 .kr(2)
9154 .sr(1)
9155 .m(2)
9156 .n(n)
9157 .k(k)
9158 .ks(3)
9159 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9160 }
9161 }
9162 }
9163
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_small_kernel)9164 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
9165 TEST_REQUIRES_X86_SSE41;
9166 for (uint32_t n = 8; n <= 12; n += 4) {
9167 for (size_t k = 1; k <= 40; k += 9) {
9168 GemmMicrokernelTester()
9169 .mr(2)
9170 .nr(4)
9171 .kr(2)
9172 .sr(1)
9173 .m(2)
9174 .n(n)
9175 .k(k)
9176 .ks(3)
9177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9178 }
9179 }
9180 }
9181
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)9182 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
9183 TEST_REQUIRES_X86_SSE41;
9184 for (size_t k = 1; k <= 40; k += 9) {
9185 for (uint32_t n = 1; n <= 4; n++) {
9186 for (uint32_t m = 1; m <= 2; m++) {
9187 GemmMicrokernelTester()
9188 .mr(2)
9189 .nr(4)
9190 .kr(2)
9191 .sr(1)
9192 .m(m)
9193 .n(n)
9194 .k(k)
9195 .cm_stride(7)
9196 .iterations(1)
9197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9198 }
9199 }
9200 }
9201 }
9202
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,a_offset)9203 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
9204 TEST_REQUIRES_X86_SSE41;
9205 for (size_t k = 1; k <= 40; k += 9) {
9206 GemmMicrokernelTester()
9207 .mr(2)
9208 .nr(4)
9209 .kr(2)
9210 .sr(1)
9211 .m(2)
9212 .n(4)
9213 .k(k)
9214 .ks(3)
9215 .a_offset(83)
9216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9217 }
9218 }
9219
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,zero)9220 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
9221 TEST_REQUIRES_X86_SSE41;
9222 for (size_t k = 1; k <= 40; k += 9) {
9223 for (uint32_t mz = 0; mz < 2; mz++) {
9224 GemmMicrokernelTester()
9225 .mr(2)
9226 .nr(4)
9227 .kr(2)
9228 .sr(1)
9229 .m(2)
9230 .n(4)
9231 .k(k)
9232 .ks(3)
9233 .a_offset(83)
9234 .zero_index(mz)
9235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9236 }
9237 }
9238 }
9239
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)9240 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
9241 TEST_REQUIRES_X86_SSE41;
9242 GemmMicrokernelTester()
9243 .mr(2)
9244 .nr(4)
9245 .kr(2)
9246 .sr(1)
9247 .m(2)
9248 .n(4)
9249 .k(8)
9250 .qmin(128)
9251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9252 }
9253
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)9254 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
9255 TEST_REQUIRES_X86_SSE41;
9256 GemmMicrokernelTester()
9257 .mr(2)
9258 .nr(4)
9259 .kr(2)
9260 .sr(1)
9261 .m(2)
9262 .n(4)
9263 .k(8)
9264 .qmax(128)
9265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9266 }
9267
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)9268 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
9269 TEST_REQUIRES_X86_SSE41;
9270 GemmMicrokernelTester()
9271 .mr(2)
9272 .nr(4)
9273 .kr(2)
9274 .sr(1)
9275 .m(2)
9276 .n(4)
9277 .k(8)
9278 .cm_stride(7)
9279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9280 }
9281
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_a_zero_point)9282 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_a_zero_point) {
9283 TEST_REQUIRES_X86_SSE41;
9284 for (size_t k = 1; k <= 40; k += 9) {
9285 GemmMicrokernelTester()
9286 .mr(2)
9287 .nr(4)
9288 .kr(2)
9289 .sr(1)
9290 .m(2)
9291 .n(4)
9292 .k(k)
9293 .a_zero_point(0)
9294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9295 }
9296 }
9297
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_b_zero_point)9298 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_b_zero_point) {
9299 TEST_REQUIRES_X86_SSE41;
9300 for (size_t k = 1; k <= 40; k += 9) {
9301 GemmMicrokernelTester()
9302 .mr(2)
9303 .nr(4)
9304 .kr(2)
9305 .sr(1)
9306 .m(2)
9307 .n(4)
9308 .k(k)
9309 .b_zero_point(0)
9310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9311 }
9312 }
9313
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_zero_point)9314 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_zero_point) {
9315 TEST_REQUIRES_X86_SSE41;
9316 for (size_t k = 1; k <= 40; k += 9) {
9317 GemmMicrokernelTester()
9318 .mr(2)
9319 .nr(4)
9320 .kr(2)
9321 .sr(1)
9322 .m(2)
9323 .n(4)
9324 .k(k)
9325 .a_zero_point(0)
9326 .b_zero_point(0)
9327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9328 }
9329 }
9330 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9331
9332
9333 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)9334 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
9335 TEST_REQUIRES_X86_SSE2;
9336 GemmMicrokernelTester()
9337 .mr(4)
9338 .nr(4)
9339 .kr(2)
9340 .sr(1)
9341 .m(4)
9342 .n(4)
9343 .k(8)
9344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9345 }
9346
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)9347 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
9348 TEST_REQUIRES_X86_SSE2;
9349 GemmMicrokernelTester()
9350 .mr(4)
9351 .nr(4)
9352 .kr(2)
9353 .sr(1)
9354 .m(4)
9355 .n(4)
9356 .k(8)
9357 .cn_stride(7)
9358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9359 }
9360
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)9361 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
9362 TEST_REQUIRES_X86_SSE2;
9363 for (uint32_t n = 1; n <= 4; n++) {
9364 for (uint32_t m = 1; m <= 4; m++) {
9365 GemmMicrokernelTester()
9366 .mr(4)
9367 .nr(4)
9368 .kr(2)
9369 .sr(1)
9370 .m(m)
9371 .n(n)
9372 .k(8)
9373 .iterations(1)
9374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9375 }
9376 }
9377 }
9378
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)9379 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
9380 TEST_REQUIRES_X86_SSE2;
9381 for (uint32_t m = 1; m <= 4; m++) {
9382 GemmMicrokernelTester()
9383 .mr(4)
9384 .nr(4)
9385 .kr(2)
9386 .sr(1)
9387 .m(m)
9388 .n(4)
9389 .k(8)
9390 .iterations(1)
9391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9392 }
9393 }
9394
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)9395 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
9396 TEST_REQUIRES_X86_SSE2;
9397 for (uint32_t n = 1; n <= 4; n++) {
9398 GemmMicrokernelTester()
9399 .mr(4)
9400 .nr(4)
9401 .kr(2)
9402 .sr(1)
9403 .m(4)
9404 .n(n)
9405 .k(8)
9406 .iterations(1)
9407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9408 }
9409 }
9410
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)9411 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
9412 TEST_REQUIRES_X86_SSE2;
9413 for (size_t k = 1; k < 8; k++) {
9414 GemmMicrokernelTester()
9415 .mr(4)
9416 .nr(4)
9417 .kr(2)
9418 .sr(1)
9419 .m(4)
9420 .n(4)
9421 .k(k)
9422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9423 }
9424 }
9425
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)9426 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
9427 TEST_REQUIRES_X86_SSE2;
9428 for (size_t k = 1; k < 8; k++) {
9429 for (uint32_t n = 1; n <= 4; n++) {
9430 for (uint32_t m = 1; m <= 4; m++) {
9431 GemmMicrokernelTester()
9432 .mr(4)
9433 .nr(4)
9434 .kr(2)
9435 .sr(1)
9436 .m(m)
9437 .n(n)
9438 .k(k)
9439 .iterations(1)
9440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9441 }
9442 }
9443 }
9444 }
9445
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)9446 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
9447 TEST_REQUIRES_X86_SSE2;
9448 for (size_t k = 9; k < 16; k++) {
9449 GemmMicrokernelTester()
9450 .mr(4)
9451 .nr(4)
9452 .kr(2)
9453 .sr(1)
9454 .m(4)
9455 .n(4)
9456 .k(k)
9457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9458 }
9459 }
9460
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)9461 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
9462 TEST_REQUIRES_X86_SSE2;
9463 for (size_t k = 9; k < 16; k++) {
9464 for (uint32_t n = 1; n <= 4; n++) {
9465 for (uint32_t m = 1; m <= 4; m++) {
9466 GemmMicrokernelTester()
9467 .mr(4)
9468 .nr(4)
9469 .kr(2)
9470 .sr(1)
9471 .m(m)
9472 .n(n)
9473 .k(k)
9474 .iterations(1)
9475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9476 }
9477 }
9478 }
9479 }
9480
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)9481 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
9482 TEST_REQUIRES_X86_SSE2;
9483 for (size_t k = 16; k <= 80; k += 8) {
9484 GemmMicrokernelTester()
9485 .mr(4)
9486 .nr(4)
9487 .kr(2)
9488 .sr(1)
9489 .m(4)
9490 .n(4)
9491 .k(k)
9492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9493 }
9494 }
9495
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)9496 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
9497 TEST_REQUIRES_X86_SSE2;
9498 for (size_t k = 16; k <= 80; k += 8) {
9499 for (uint32_t n = 1; n <= 4; n++) {
9500 for (uint32_t m = 1; m <= 4; m++) {
9501 GemmMicrokernelTester()
9502 .mr(4)
9503 .nr(4)
9504 .kr(2)
9505 .sr(1)
9506 .m(m)
9507 .n(n)
9508 .k(k)
9509 .iterations(1)
9510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9511 }
9512 }
9513 }
9514 }
9515
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)9516 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
9517 TEST_REQUIRES_X86_SSE2;
9518 for (uint32_t n = 5; n < 8; n++) {
9519 for (size_t k = 1; k <= 40; k += 9) {
9520 GemmMicrokernelTester()
9521 .mr(4)
9522 .nr(4)
9523 .kr(2)
9524 .sr(1)
9525 .m(4)
9526 .n(n)
9527 .k(k)
9528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9529 }
9530 }
9531 }
9532
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)9533 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
9534 TEST_REQUIRES_X86_SSE2;
9535 for (uint32_t n = 5; n < 8; n++) {
9536 for (size_t k = 1; k <= 40; k += 9) {
9537 GemmMicrokernelTester()
9538 .mr(4)
9539 .nr(4)
9540 .kr(2)
9541 .sr(1)
9542 .m(4)
9543 .n(n)
9544 .k(k)
9545 .cn_stride(7)
9546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9547 }
9548 }
9549 }
9550
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)9551 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
9552 TEST_REQUIRES_X86_SSE2;
9553 for (uint32_t n = 5; n < 8; n++) {
9554 for (size_t k = 1; k <= 40; k += 9) {
9555 for (uint32_t m = 1; m <= 4; m++) {
9556 GemmMicrokernelTester()
9557 .mr(4)
9558 .nr(4)
9559 .kr(2)
9560 .sr(1)
9561 .m(m)
9562 .n(n)
9563 .k(k)
9564 .iterations(1)
9565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9566 }
9567 }
9568 }
9569 }
9570
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)9571 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
9572 TEST_REQUIRES_X86_SSE2;
9573 for (uint32_t n = 8; n <= 12; n += 4) {
9574 for (size_t k = 1; k <= 40; k += 9) {
9575 GemmMicrokernelTester()
9576 .mr(4)
9577 .nr(4)
9578 .kr(2)
9579 .sr(1)
9580 .m(4)
9581 .n(n)
9582 .k(k)
9583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9584 }
9585 }
9586 }
9587
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)9588 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
9589 TEST_REQUIRES_X86_SSE2;
9590 for (uint32_t n = 8; n <= 12; n += 4) {
9591 for (size_t k = 1; k <= 40; k += 9) {
9592 GemmMicrokernelTester()
9593 .mr(4)
9594 .nr(4)
9595 .kr(2)
9596 .sr(1)
9597 .m(4)
9598 .n(n)
9599 .k(k)
9600 .cn_stride(7)
9601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9602 }
9603 }
9604 }
9605
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)9606 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
9607 TEST_REQUIRES_X86_SSE2;
9608 for (uint32_t n = 8; n <= 12; n += 4) {
9609 for (size_t k = 1; k <= 40; k += 9) {
9610 for (uint32_t m = 1; m <= 4; m++) {
9611 GemmMicrokernelTester()
9612 .mr(4)
9613 .nr(4)
9614 .kr(2)
9615 .sr(1)
9616 .m(m)
9617 .n(n)
9618 .k(k)
9619 .iterations(1)
9620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9621 }
9622 }
9623 }
9624 }
9625
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)9626 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
9627 TEST_REQUIRES_X86_SSE2;
9628 for (size_t k = 1; k <= 40; k += 9) {
9629 GemmMicrokernelTester()
9630 .mr(4)
9631 .nr(4)
9632 .kr(2)
9633 .sr(1)
9634 .m(4)
9635 .n(4)
9636 .k(k)
9637 .ks(3)
9638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9639 }
9640 }
9641
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)9642 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
9643 TEST_REQUIRES_X86_SSE2;
9644 for (size_t k = 1; k <= 40; k += 9) {
9645 for (uint32_t n = 1; n <= 4; n++) {
9646 for (uint32_t m = 1; m <= 4; m++) {
9647 GemmMicrokernelTester()
9648 .mr(4)
9649 .nr(4)
9650 .kr(2)
9651 .sr(1)
9652 .m(m)
9653 .n(n)
9654 .k(k)
9655 .ks(3)
9656 .iterations(1)
9657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9658 }
9659 }
9660 }
9661 }
9662
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)9663 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
9664 TEST_REQUIRES_X86_SSE2;
9665 for (uint32_t n = 5; n < 8; n++) {
9666 for (size_t k = 1; k <= 40; k += 9) {
9667 GemmMicrokernelTester()
9668 .mr(4)
9669 .nr(4)
9670 .kr(2)
9671 .sr(1)
9672 .m(4)
9673 .n(n)
9674 .k(k)
9675 .ks(3)
9676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9677 }
9678 }
9679 }
9680
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)9681 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
9682 TEST_REQUIRES_X86_SSE2;
9683 for (uint32_t n = 8; n <= 12; n += 4) {
9684 for (size_t k = 1; k <= 40; k += 9) {
9685 GemmMicrokernelTester()
9686 .mr(4)
9687 .nr(4)
9688 .kr(2)
9689 .sr(1)
9690 .m(4)
9691 .n(n)
9692 .k(k)
9693 .ks(3)
9694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9695 }
9696 }
9697 }
9698
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)9699 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
9700 TEST_REQUIRES_X86_SSE2;
9701 for (size_t k = 1; k <= 40; k += 9) {
9702 for (uint32_t n = 1; n <= 4; n++) {
9703 for (uint32_t m = 1; m <= 4; m++) {
9704 GemmMicrokernelTester()
9705 .mr(4)
9706 .nr(4)
9707 .kr(2)
9708 .sr(1)
9709 .m(m)
9710 .n(n)
9711 .k(k)
9712 .cm_stride(7)
9713 .iterations(1)
9714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9715 }
9716 }
9717 }
9718 }
9719
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)9720 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
9721 TEST_REQUIRES_X86_SSE2;
9722 for (size_t k = 1; k <= 40; k += 9) {
9723 GemmMicrokernelTester()
9724 .mr(4)
9725 .nr(4)
9726 .kr(2)
9727 .sr(1)
9728 .m(4)
9729 .n(4)
9730 .k(k)
9731 .ks(3)
9732 .a_offset(163)
9733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9734 }
9735 }
9736
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)9737 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
9738 TEST_REQUIRES_X86_SSE2;
9739 for (size_t k = 1; k <= 40; k += 9) {
9740 for (uint32_t mz = 0; mz < 4; mz++) {
9741 GemmMicrokernelTester()
9742 .mr(4)
9743 .nr(4)
9744 .kr(2)
9745 .sr(1)
9746 .m(4)
9747 .n(4)
9748 .k(k)
9749 .ks(3)
9750 .a_offset(163)
9751 .zero_index(mz)
9752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9753 }
9754 }
9755 }
9756
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)9757 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
9758 TEST_REQUIRES_X86_SSE2;
9759 GemmMicrokernelTester()
9760 .mr(4)
9761 .nr(4)
9762 .kr(2)
9763 .sr(1)
9764 .m(4)
9765 .n(4)
9766 .k(8)
9767 .qmin(128)
9768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9769 }
9770
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)9771 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
9772 TEST_REQUIRES_X86_SSE2;
9773 GemmMicrokernelTester()
9774 .mr(4)
9775 .nr(4)
9776 .kr(2)
9777 .sr(1)
9778 .m(4)
9779 .n(4)
9780 .k(8)
9781 .qmax(128)
9782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9783 }
9784
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)9785 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
9786 TEST_REQUIRES_X86_SSE2;
9787 GemmMicrokernelTester()
9788 .mr(4)
9789 .nr(4)
9790 .kr(2)
9791 .sr(1)
9792 .m(4)
9793 .n(4)
9794 .k(8)
9795 .cm_stride(7)
9796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9797 }
9798
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_a_zero_point)9799 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_a_zero_point) {
9800 TEST_REQUIRES_X86_SSE2;
9801 for (size_t k = 1; k <= 40; k += 9) {
9802 GemmMicrokernelTester()
9803 .mr(4)
9804 .nr(4)
9805 .kr(2)
9806 .sr(1)
9807 .m(4)
9808 .n(4)
9809 .k(k)
9810 .a_zero_point(0)
9811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9812 }
9813 }
9814
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_b_zero_point)9815 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_b_zero_point) {
9816 TEST_REQUIRES_X86_SSE2;
9817 for (size_t k = 1; k <= 40; k += 9) {
9818 GemmMicrokernelTester()
9819 .mr(4)
9820 .nr(4)
9821 .kr(2)
9822 .sr(1)
9823 .m(4)
9824 .n(4)
9825 .k(k)
9826 .b_zero_point(0)
9827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9828 }
9829 }
9830
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_zero_point)9831 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_zero_point) {
9832 TEST_REQUIRES_X86_SSE2;
9833 for (size_t k = 1; k <= 40; k += 9) {
9834 GemmMicrokernelTester()
9835 .mr(4)
9836 .nr(4)
9837 .kr(2)
9838 .sr(1)
9839 .m(4)
9840 .n(4)
9841 .k(k)
9842 .a_zero_point(0)
9843 .b_zero_point(0)
9844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9845 }
9846 }
9847 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9848
9849
9850 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8)9851 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
9852 TEST_REQUIRES_X86_AVX;
9853 GemmMicrokernelTester()
9854 .mr(1)
9855 .nr(4)
9856 .kr(2)
9857 .sr(1)
9858 .m(1)
9859 .n(4)
9860 .k(8)
9861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9862 }
9863
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cn)9864 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
9865 TEST_REQUIRES_X86_AVX;
9866 GemmMicrokernelTester()
9867 .mr(1)
9868 .nr(4)
9869 .kr(2)
9870 .sr(1)
9871 .m(1)
9872 .n(4)
9873 .k(8)
9874 .cn_stride(7)
9875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9876 }
9877
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile)9878 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
9879 TEST_REQUIRES_X86_AVX;
9880 for (uint32_t n = 1; n <= 4; n++) {
9881 for (uint32_t m = 1; m <= 1; m++) {
9882 GemmMicrokernelTester()
9883 .mr(1)
9884 .nr(4)
9885 .kr(2)
9886 .sr(1)
9887 .m(m)
9888 .n(n)
9889 .k(8)
9890 .iterations(1)
9891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9892 }
9893 }
9894 }
9895
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_m)9896 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
9897 TEST_REQUIRES_X86_AVX;
9898 for (uint32_t m = 1; m <= 1; m++) {
9899 GemmMicrokernelTester()
9900 .mr(1)
9901 .nr(4)
9902 .kr(2)
9903 .sr(1)
9904 .m(m)
9905 .n(4)
9906 .k(8)
9907 .iterations(1)
9908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9909 }
9910 }
9911
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_n)9912 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
9913 TEST_REQUIRES_X86_AVX;
9914 for (uint32_t n = 1; n <= 4; n++) {
9915 GemmMicrokernelTester()
9916 .mr(1)
9917 .nr(4)
9918 .kr(2)
9919 .sr(1)
9920 .m(1)
9921 .n(n)
9922 .k(8)
9923 .iterations(1)
9924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9925 }
9926 }
9927
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8)9928 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
9929 TEST_REQUIRES_X86_AVX;
9930 for (size_t k = 1; k < 8; k++) {
9931 GemmMicrokernelTester()
9932 .mr(1)
9933 .nr(4)
9934 .kr(2)
9935 .sr(1)
9936 .m(1)
9937 .n(4)
9938 .k(k)
9939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9940 }
9941 }
9942
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_subtile)9943 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
9944 TEST_REQUIRES_X86_AVX;
9945 for (size_t k = 1; k < 8; k++) {
9946 for (uint32_t n = 1; n <= 4; n++) {
9947 for (uint32_t m = 1; m <= 1; m++) {
9948 GemmMicrokernelTester()
9949 .mr(1)
9950 .nr(4)
9951 .kr(2)
9952 .sr(1)
9953 .m(m)
9954 .n(n)
9955 .k(k)
9956 .iterations(1)
9957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9958 }
9959 }
9960 }
9961 }
9962
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8)9963 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
9964 TEST_REQUIRES_X86_AVX;
9965 for (size_t k = 9; k < 16; k++) {
9966 GemmMicrokernelTester()
9967 .mr(1)
9968 .nr(4)
9969 .kr(2)
9970 .sr(1)
9971 .m(1)
9972 .n(4)
9973 .k(k)
9974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9975 }
9976 }
9977
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_subtile)9978 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
9979 TEST_REQUIRES_X86_AVX;
9980 for (size_t k = 9; k < 16; k++) {
9981 for (uint32_t n = 1; n <= 4; n++) {
9982 for (uint32_t m = 1; m <= 1; m++) {
9983 GemmMicrokernelTester()
9984 .mr(1)
9985 .nr(4)
9986 .kr(2)
9987 .sr(1)
9988 .m(m)
9989 .n(n)
9990 .k(k)
9991 .iterations(1)
9992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9993 }
9994 }
9995 }
9996 }
9997
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8)9998 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
9999 TEST_REQUIRES_X86_AVX;
10000 for (size_t k = 16; k <= 80; k += 8) {
10001 GemmMicrokernelTester()
10002 .mr(1)
10003 .nr(4)
10004 .kr(2)
10005 .sr(1)
10006 .m(1)
10007 .n(4)
10008 .k(k)
10009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10010 }
10011 }
10012
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_subtile)10013 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
10014 TEST_REQUIRES_X86_AVX;
10015 for (size_t k = 16; k <= 80; k += 8) {
10016 for (uint32_t n = 1; n <= 4; n++) {
10017 for (uint32_t m = 1; m <= 1; m++) {
10018 GemmMicrokernelTester()
10019 .mr(1)
10020 .nr(4)
10021 .kr(2)
10022 .sr(1)
10023 .m(m)
10024 .n(n)
10025 .k(k)
10026 .iterations(1)
10027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10028 }
10029 }
10030 }
10031 }
10032
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4)10033 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
10034 TEST_REQUIRES_X86_AVX;
10035 for (uint32_t n = 5; n < 8; n++) {
10036 for (size_t k = 1; k <= 40; k += 9) {
10037 GemmMicrokernelTester()
10038 .mr(1)
10039 .nr(4)
10040 .kr(2)
10041 .sr(1)
10042 .m(1)
10043 .n(n)
10044 .k(k)
10045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10046 }
10047 }
10048 }
10049
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_cn)10050 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
10051 TEST_REQUIRES_X86_AVX;
10052 for (uint32_t n = 5; n < 8; n++) {
10053 for (size_t k = 1; k <= 40; k += 9) {
10054 GemmMicrokernelTester()
10055 .mr(1)
10056 .nr(4)
10057 .kr(2)
10058 .sr(1)
10059 .m(1)
10060 .n(n)
10061 .k(k)
10062 .cn_stride(7)
10063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10064 }
10065 }
10066 }
10067
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_subtile)10068 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
10069 TEST_REQUIRES_X86_AVX;
10070 for (uint32_t n = 5; n < 8; n++) {
10071 for (size_t k = 1; k <= 40; k += 9) {
10072 for (uint32_t m = 1; m <= 1; m++) {
10073 GemmMicrokernelTester()
10074 .mr(1)
10075 .nr(4)
10076 .kr(2)
10077 .sr(1)
10078 .m(m)
10079 .n(n)
10080 .k(k)
10081 .iterations(1)
10082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10083 }
10084 }
10085 }
10086 }
10087
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4)10088 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
10089 TEST_REQUIRES_X86_AVX;
10090 for (uint32_t n = 8; n <= 12; n += 4) {
10091 for (size_t k = 1; k <= 40; k += 9) {
10092 GemmMicrokernelTester()
10093 .mr(1)
10094 .nr(4)
10095 .kr(2)
10096 .sr(1)
10097 .m(1)
10098 .n(n)
10099 .k(k)
10100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10101 }
10102 }
10103 }
10104
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_cn)10105 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
10106 TEST_REQUIRES_X86_AVX;
10107 for (uint32_t n = 8; n <= 12; n += 4) {
10108 for (size_t k = 1; k <= 40; k += 9) {
10109 GemmMicrokernelTester()
10110 .mr(1)
10111 .nr(4)
10112 .kr(2)
10113 .sr(1)
10114 .m(1)
10115 .n(n)
10116 .k(k)
10117 .cn_stride(7)
10118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10119 }
10120 }
10121 }
10122
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_subtile)10123 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
10124 TEST_REQUIRES_X86_AVX;
10125 for (uint32_t n = 8; n <= 12; n += 4) {
10126 for (size_t k = 1; k <= 40; k += 9) {
10127 for (uint32_t m = 1; m <= 1; m++) {
10128 GemmMicrokernelTester()
10129 .mr(1)
10130 .nr(4)
10131 .kr(2)
10132 .sr(1)
10133 .m(m)
10134 .n(n)
10135 .k(k)
10136 .iterations(1)
10137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10138 }
10139 }
10140 }
10141 }
10142
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel)10143 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel) {
10144 TEST_REQUIRES_X86_AVX;
10145 for (size_t k = 1; k <= 40; k += 9) {
10146 GemmMicrokernelTester()
10147 .mr(1)
10148 .nr(4)
10149 .kr(2)
10150 .sr(1)
10151 .m(1)
10152 .n(4)
10153 .k(k)
10154 .ks(3)
10155 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10156 }
10157 }
10158
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel_subtile)10159 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel_subtile) {
10160 TEST_REQUIRES_X86_AVX;
10161 for (size_t k = 1; k <= 40; k += 9) {
10162 for (uint32_t n = 1; n <= 4; n++) {
10163 for (uint32_t m = 1; m <= 1; m++) {
10164 GemmMicrokernelTester()
10165 .mr(1)
10166 .nr(4)
10167 .kr(2)
10168 .sr(1)
10169 .m(m)
10170 .n(n)
10171 .k(k)
10172 .ks(3)
10173 .iterations(1)
10174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10175 }
10176 }
10177 }
10178 }
10179
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_small_kernel)10180 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
10181 TEST_REQUIRES_X86_AVX;
10182 for (uint32_t n = 5; n < 8; n++) {
10183 for (size_t k = 1; k <= 40; k += 9) {
10184 GemmMicrokernelTester()
10185 .mr(1)
10186 .nr(4)
10187 .kr(2)
10188 .sr(1)
10189 .m(1)
10190 .n(n)
10191 .k(k)
10192 .ks(3)
10193 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10194 }
10195 }
10196 }
10197
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_small_kernel)10198 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_small_kernel) {
10199 TEST_REQUIRES_X86_AVX;
10200 for (uint32_t n = 8; n <= 12; n += 4) {
10201 for (size_t k = 1; k <= 40; k += 9) {
10202 GemmMicrokernelTester()
10203 .mr(1)
10204 .nr(4)
10205 .kr(2)
10206 .sr(1)
10207 .m(1)
10208 .n(n)
10209 .k(k)
10210 .ks(3)
10211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10212 }
10213 }
10214 }
10215
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm_subtile)10216 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
10217 TEST_REQUIRES_X86_AVX;
10218 for (size_t k = 1; k <= 40; k += 9) {
10219 for (uint32_t n = 1; n <= 4; n++) {
10220 for (uint32_t m = 1; m <= 1; m++) {
10221 GemmMicrokernelTester()
10222 .mr(1)
10223 .nr(4)
10224 .kr(2)
10225 .sr(1)
10226 .m(m)
10227 .n(n)
10228 .k(k)
10229 .cm_stride(7)
10230 .iterations(1)
10231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10232 }
10233 }
10234 }
10235 }
10236
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,a_offset)10237 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, a_offset) {
10238 TEST_REQUIRES_X86_AVX;
10239 for (size_t k = 1; k <= 40; k += 9) {
10240 GemmMicrokernelTester()
10241 .mr(1)
10242 .nr(4)
10243 .kr(2)
10244 .sr(1)
10245 .m(1)
10246 .n(4)
10247 .k(k)
10248 .ks(3)
10249 .a_offset(43)
10250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10251 }
10252 }
10253
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,zero)10254 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, zero) {
10255 TEST_REQUIRES_X86_AVX;
10256 for (size_t k = 1; k <= 40; k += 9) {
10257 for (uint32_t mz = 0; mz < 1; mz++) {
10258 GemmMicrokernelTester()
10259 .mr(1)
10260 .nr(4)
10261 .kr(2)
10262 .sr(1)
10263 .m(1)
10264 .n(4)
10265 .k(k)
10266 .ks(3)
10267 .a_offset(43)
10268 .zero_index(mz)
10269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10270 }
10271 }
10272 }
10273
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmin)10274 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
10275 TEST_REQUIRES_X86_AVX;
10276 GemmMicrokernelTester()
10277 .mr(1)
10278 .nr(4)
10279 .kr(2)
10280 .sr(1)
10281 .m(1)
10282 .n(4)
10283 .k(8)
10284 .qmin(128)
10285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10286 }
10287
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmax)10288 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
10289 TEST_REQUIRES_X86_AVX;
10290 GemmMicrokernelTester()
10291 .mr(1)
10292 .nr(4)
10293 .kr(2)
10294 .sr(1)
10295 .m(1)
10296 .n(4)
10297 .k(8)
10298 .qmax(128)
10299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10300 }
10301
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm)10302 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
10303 TEST_REQUIRES_X86_AVX;
10304 GemmMicrokernelTester()
10305 .mr(1)
10306 .nr(4)
10307 .kr(2)
10308 .sr(1)
10309 .m(1)
10310 .n(4)
10311 .k(8)
10312 .cm_stride(7)
10313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10314 }
10315
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_a_zero_point)10316 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_a_zero_point) {
10317 TEST_REQUIRES_X86_AVX;
10318 for (size_t k = 1; k <= 40; k += 9) {
10319 GemmMicrokernelTester()
10320 .mr(1)
10321 .nr(4)
10322 .kr(2)
10323 .sr(1)
10324 .m(1)
10325 .n(4)
10326 .k(k)
10327 .a_zero_point(0)
10328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10329 }
10330 }
10331
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_b_zero_point)10332 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_b_zero_point) {
10333 TEST_REQUIRES_X86_AVX;
10334 for (size_t k = 1; k <= 40; k += 9) {
10335 GemmMicrokernelTester()
10336 .mr(1)
10337 .nr(4)
10338 .kr(2)
10339 .sr(1)
10340 .m(1)
10341 .n(4)
10342 .k(k)
10343 .b_zero_point(0)
10344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10345 }
10346 }
10347
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_zero_point)10348 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_zero_point) {
10349 TEST_REQUIRES_X86_AVX;
10350 for (size_t k = 1; k <= 40; k += 9) {
10351 GemmMicrokernelTester()
10352 .mr(1)
10353 .nr(4)
10354 .kr(2)
10355 .sr(1)
10356 .m(1)
10357 .n(4)
10358 .k(k)
10359 .a_zero_point(0)
10360 .b_zero_point(0)
10361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10362 }
10363 }
10364 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10365
10366
10367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8)10368 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
10369 TEST_REQUIRES_X86_AVX;
10370 GemmMicrokernelTester()
10371 .mr(2)
10372 .nr(4)
10373 .kr(2)
10374 .sr(1)
10375 .m(2)
10376 .n(4)
10377 .k(8)
10378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10379 }
10380
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cn)10381 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
10382 TEST_REQUIRES_X86_AVX;
10383 GemmMicrokernelTester()
10384 .mr(2)
10385 .nr(4)
10386 .kr(2)
10387 .sr(1)
10388 .m(2)
10389 .n(4)
10390 .k(8)
10391 .cn_stride(7)
10392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10393 }
10394
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile)10395 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
10396 TEST_REQUIRES_X86_AVX;
10397 for (uint32_t n = 1; n <= 4; n++) {
10398 for (uint32_t m = 1; m <= 2; m++) {
10399 GemmMicrokernelTester()
10400 .mr(2)
10401 .nr(4)
10402 .kr(2)
10403 .sr(1)
10404 .m(m)
10405 .n(n)
10406 .k(8)
10407 .iterations(1)
10408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10409 }
10410 }
10411 }
10412
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_m)10413 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
10414 TEST_REQUIRES_X86_AVX;
10415 for (uint32_t m = 1; m <= 2; m++) {
10416 GemmMicrokernelTester()
10417 .mr(2)
10418 .nr(4)
10419 .kr(2)
10420 .sr(1)
10421 .m(m)
10422 .n(4)
10423 .k(8)
10424 .iterations(1)
10425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10426 }
10427 }
10428
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_n)10429 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
10430 TEST_REQUIRES_X86_AVX;
10431 for (uint32_t n = 1; n <= 4; n++) {
10432 GemmMicrokernelTester()
10433 .mr(2)
10434 .nr(4)
10435 .kr(2)
10436 .sr(1)
10437 .m(2)
10438 .n(n)
10439 .k(8)
10440 .iterations(1)
10441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10442 }
10443 }
10444
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8)10445 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
10446 TEST_REQUIRES_X86_AVX;
10447 for (size_t k = 1; k < 8; k++) {
10448 GemmMicrokernelTester()
10449 .mr(2)
10450 .nr(4)
10451 .kr(2)
10452 .sr(1)
10453 .m(2)
10454 .n(4)
10455 .k(k)
10456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10457 }
10458 }
10459
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_subtile)10460 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
10461 TEST_REQUIRES_X86_AVX;
10462 for (size_t k = 1; k < 8; k++) {
10463 for (uint32_t n = 1; n <= 4; n++) {
10464 for (uint32_t m = 1; m <= 2; m++) {
10465 GemmMicrokernelTester()
10466 .mr(2)
10467 .nr(4)
10468 .kr(2)
10469 .sr(1)
10470 .m(m)
10471 .n(n)
10472 .k(k)
10473 .iterations(1)
10474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10475 }
10476 }
10477 }
10478 }
10479
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8)10480 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
10481 TEST_REQUIRES_X86_AVX;
10482 for (size_t k = 9; k < 16; k++) {
10483 GemmMicrokernelTester()
10484 .mr(2)
10485 .nr(4)
10486 .kr(2)
10487 .sr(1)
10488 .m(2)
10489 .n(4)
10490 .k(k)
10491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10492 }
10493 }
10494
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_subtile)10495 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
10496 TEST_REQUIRES_X86_AVX;
10497 for (size_t k = 9; k < 16; k++) {
10498 for (uint32_t n = 1; n <= 4; n++) {
10499 for (uint32_t m = 1; m <= 2; m++) {
10500 GemmMicrokernelTester()
10501 .mr(2)
10502 .nr(4)
10503 .kr(2)
10504 .sr(1)
10505 .m(m)
10506 .n(n)
10507 .k(k)
10508 .iterations(1)
10509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10510 }
10511 }
10512 }
10513 }
10514
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8)10515 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
10516 TEST_REQUIRES_X86_AVX;
10517 for (size_t k = 16; k <= 80; k += 8) {
10518 GemmMicrokernelTester()
10519 .mr(2)
10520 .nr(4)
10521 .kr(2)
10522 .sr(1)
10523 .m(2)
10524 .n(4)
10525 .k(k)
10526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10527 }
10528 }
10529
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_subtile)10530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
10531 TEST_REQUIRES_X86_AVX;
10532 for (size_t k = 16; k <= 80; k += 8) {
10533 for (uint32_t n = 1; n <= 4; n++) {
10534 for (uint32_t m = 1; m <= 2; m++) {
10535 GemmMicrokernelTester()
10536 .mr(2)
10537 .nr(4)
10538 .kr(2)
10539 .sr(1)
10540 .m(m)
10541 .n(n)
10542 .k(k)
10543 .iterations(1)
10544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10545 }
10546 }
10547 }
10548 }
10549
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4)10550 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
10551 TEST_REQUIRES_X86_AVX;
10552 for (uint32_t n = 5; n < 8; n++) {
10553 for (size_t k = 1; k <= 40; k += 9) {
10554 GemmMicrokernelTester()
10555 .mr(2)
10556 .nr(4)
10557 .kr(2)
10558 .sr(1)
10559 .m(2)
10560 .n(n)
10561 .k(k)
10562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10563 }
10564 }
10565 }
10566
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_cn)10567 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
10568 TEST_REQUIRES_X86_AVX;
10569 for (uint32_t n = 5; n < 8; n++) {
10570 for (size_t k = 1; k <= 40; k += 9) {
10571 GemmMicrokernelTester()
10572 .mr(2)
10573 .nr(4)
10574 .kr(2)
10575 .sr(1)
10576 .m(2)
10577 .n(n)
10578 .k(k)
10579 .cn_stride(7)
10580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10581 }
10582 }
10583 }
10584
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_subtile)10585 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
10586 TEST_REQUIRES_X86_AVX;
10587 for (uint32_t n = 5; n < 8; n++) {
10588 for (size_t k = 1; k <= 40; k += 9) {
10589 for (uint32_t m = 1; m <= 2; m++) {
10590 GemmMicrokernelTester()
10591 .mr(2)
10592 .nr(4)
10593 .kr(2)
10594 .sr(1)
10595 .m(m)
10596 .n(n)
10597 .k(k)
10598 .iterations(1)
10599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10600 }
10601 }
10602 }
10603 }
10604
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4)10605 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
10606 TEST_REQUIRES_X86_AVX;
10607 for (uint32_t n = 8; n <= 12; n += 4) {
10608 for (size_t k = 1; k <= 40; k += 9) {
10609 GemmMicrokernelTester()
10610 .mr(2)
10611 .nr(4)
10612 .kr(2)
10613 .sr(1)
10614 .m(2)
10615 .n(n)
10616 .k(k)
10617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10618 }
10619 }
10620 }
10621
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_cn)10622 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
10623 TEST_REQUIRES_X86_AVX;
10624 for (uint32_t n = 8; n <= 12; n += 4) {
10625 for (size_t k = 1; k <= 40; k += 9) {
10626 GemmMicrokernelTester()
10627 .mr(2)
10628 .nr(4)
10629 .kr(2)
10630 .sr(1)
10631 .m(2)
10632 .n(n)
10633 .k(k)
10634 .cn_stride(7)
10635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10636 }
10637 }
10638 }
10639
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_subtile)10640 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
10641 TEST_REQUIRES_X86_AVX;
10642 for (uint32_t n = 8; n <= 12; n += 4) {
10643 for (size_t k = 1; k <= 40; k += 9) {
10644 for (uint32_t m = 1; m <= 2; m++) {
10645 GemmMicrokernelTester()
10646 .mr(2)
10647 .nr(4)
10648 .kr(2)
10649 .sr(1)
10650 .m(m)
10651 .n(n)
10652 .k(k)
10653 .iterations(1)
10654 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10655 }
10656 }
10657 }
10658 }
10659
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel)10660 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel) {
10661 TEST_REQUIRES_X86_AVX;
10662 for (size_t k = 1; k <= 40; k += 9) {
10663 GemmMicrokernelTester()
10664 .mr(2)
10665 .nr(4)
10666 .kr(2)
10667 .sr(1)
10668 .m(2)
10669 .n(4)
10670 .k(k)
10671 .ks(3)
10672 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10673 }
10674 }
10675
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel_subtile)10676 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel_subtile) {
10677 TEST_REQUIRES_X86_AVX;
10678 for (size_t k = 1; k <= 40; k += 9) {
10679 for (uint32_t n = 1; n <= 4; n++) {
10680 for (uint32_t m = 1; m <= 2; m++) {
10681 GemmMicrokernelTester()
10682 .mr(2)
10683 .nr(4)
10684 .kr(2)
10685 .sr(1)
10686 .m(m)
10687 .n(n)
10688 .k(k)
10689 .ks(3)
10690 .iterations(1)
10691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10692 }
10693 }
10694 }
10695 }
10696
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_small_kernel)10697 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
10698 TEST_REQUIRES_X86_AVX;
10699 for (uint32_t n = 5; n < 8; n++) {
10700 for (size_t k = 1; k <= 40; k += 9) {
10701 GemmMicrokernelTester()
10702 .mr(2)
10703 .nr(4)
10704 .kr(2)
10705 .sr(1)
10706 .m(2)
10707 .n(n)
10708 .k(k)
10709 .ks(3)
10710 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10711 }
10712 }
10713 }
10714
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_small_kernel)10715 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_small_kernel) {
10716 TEST_REQUIRES_X86_AVX;
10717 for (uint32_t n = 8; n <= 12; n += 4) {
10718 for (size_t k = 1; k <= 40; k += 9) {
10719 GemmMicrokernelTester()
10720 .mr(2)
10721 .nr(4)
10722 .kr(2)
10723 .sr(1)
10724 .m(2)
10725 .n(n)
10726 .k(k)
10727 .ks(3)
10728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10729 }
10730 }
10731 }
10732
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm_subtile)10733 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
10734 TEST_REQUIRES_X86_AVX;
10735 for (size_t k = 1; k <= 40; k += 9) {
10736 for (uint32_t n = 1; n <= 4; n++) {
10737 for (uint32_t m = 1; m <= 2; m++) {
10738 GemmMicrokernelTester()
10739 .mr(2)
10740 .nr(4)
10741 .kr(2)
10742 .sr(1)
10743 .m(m)
10744 .n(n)
10745 .k(k)
10746 .cm_stride(7)
10747 .iterations(1)
10748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10749 }
10750 }
10751 }
10752 }
10753
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,a_offset)10754 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, a_offset) {
10755 TEST_REQUIRES_X86_AVX;
10756 for (size_t k = 1; k <= 40; k += 9) {
10757 GemmMicrokernelTester()
10758 .mr(2)
10759 .nr(4)
10760 .kr(2)
10761 .sr(1)
10762 .m(2)
10763 .n(4)
10764 .k(k)
10765 .ks(3)
10766 .a_offset(83)
10767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10768 }
10769 }
10770
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,zero)10771 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, zero) {
10772 TEST_REQUIRES_X86_AVX;
10773 for (size_t k = 1; k <= 40; k += 9) {
10774 for (uint32_t mz = 0; mz < 2; mz++) {
10775 GemmMicrokernelTester()
10776 .mr(2)
10777 .nr(4)
10778 .kr(2)
10779 .sr(1)
10780 .m(2)
10781 .n(4)
10782 .k(k)
10783 .ks(3)
10784 .a_offset(83)
10785 .zero_index(mz)
10786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10787 }
10788 }
10789 }
10790
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmin)10791 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
10792 TEST_REQUIRES_X86_AVX;
10793 GemmMicrokernelTester()
10794 .mr(2)
10795 .nr(4)
10796 .kr(2)
10797 .sr(1)
10798 .m(2)
10799 .n(4)
10800 .k(8)
10801 .qmin(128)
10802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10803 }
10804
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmax)10805 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
10806 TEST_REQUIRES_X86_AVX;
10807 GemmMicrokernelTester()
10808 .mr(2)
10809 .nr(4)
10810 .kr(2)
10811 .sr(1)
10812 .m(2)
10813 .n(4)
10814 .k(8)
10815 .qmax(128)
10816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10817 }
10818
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm)10819 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
10820 TEST_REQUIRES_X86_AVX;
10821 GemmMicrokernelTester()
10822 .mr(2)
10823 .nr(4)
10824 .kr(2)
10825 .sr(1)
10826 .m(2)
10827 .n(4)
10828 .k(8)
10829 .cm_stride(7)
10830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10831 }
10832
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_a_zero_point)10833 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_a_zero_point) {
10834 TEST_REQUIRES_X86_AVX;
10835 for (size_t k = 1; k <= 40; k += 9) {
10836 GemmMicrokernelTester()
10837 .mr(2)
10838 .nr(4)
10839 .kr(2)
10840 .sr(1)
10841 .m(2)
10842 .n(4)
10843 .k(k)
10844 .a_zero_point(0)
10845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10846 }
10847 }
10848
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_b_zero_point)10849 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_b_zero_point) {
10850 TEST_REQUIRES_X86_AVX;
10851 for (size_t k = 1; k <= 40; k += 9) {
10852 GemmMicrokernelTester()
10853 .mr(2)
10854 .nr(4)
10855 .kr(2)
10856 .sr(1)
10857 .m(2)
10858 .n(4)
10859 .k(k)
10860 .b_zero_point(0)
10861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10862 }
10863 }
10864
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_zero_point)10865 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_zero_point) {
10866 TEST_REQUIRES_X86_AVX;
10867 for (size_t k = 1; k <= 40; k += 9) {
10868 GemmMicrokernelTester()
10869 .mr(2)
10870 .nr(4)
10871 .kr(2)
10872 .sr(1)
10873 .m(2)
10874 .n(4)
10875 .k(k)
10876 .a_zero_point(0)
10877 .b_zero_point(0)
10878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10879 }
10880 }
10881 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10882
10883
10884 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)10885 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
10886 TEST_REQUIRES_X86_XOP;
10887 GemmMicrokernelTester()
10888 .mr(3)
10889 .nr(4)
10890 .kr(2)
10891 .sr(1)
10892 .m(3)
10893 .n(4)
10894 .k(8)
10895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10896 }
10897
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)10898 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
10899 TEST_REQUIRES_X86_XOP;
10900 GemmMicrokernelTester()
10901 .mr(3)
10902 .nr(4)
10903 .kr(2)
10904 .sr(1)
10905 .m(3)
10906 .n(4)
10907 .k(8)
10908 .cn_stride(7)
10909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10910 }
10911
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)10912 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
10913 TEST_REQUIRES_X86_XOP;
10914 for (uint32_t n = 1; n <= 4; n++) {
10915 for (uint32_t m = 1; m <= 3; m++) {
10916 GemmMicrokernelTester()
10917 .mr(3)
10918 .nr(4)
10919 .kr(2)
10920 .sr(1)
10921 .m(m)
10922 .n(n)
10923 .k(8)
10924 .iterations(1)
10925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10926 }
10927 }
10928 }
10929
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)10930 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
10931 TEST_REQUIRES_X86_XOP;
10932 for (uint32_t m = 1; m <= 3; m++) {
10933 GemmMicrokernelTester()
10934 .mr(3)
10935 .nr(4)
10936 .kr(2)
10937 .sr(1)
10938 .m(m)
10939 .n(4)
10940 .k(8)
10941 .iterations(1)
10942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10943 }
10944 }
10945
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)10946 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
10947 TEST_REQUIRES_X86_XOP;
10948 for (uint32_t n = 1; n <= 4; n++) {
10949 GemmMicrokernelTester()
10950 .mr(3)
10951 .nr(4)
10952 .kr(2)
10953 .sr(1)
10954 .m(3)
10955 .n(n)
10956 .k(8)
10957 .iterations(1)
10958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10959 }
10960 }
10961
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)10962 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
10963 TEST_REQUIRES_X86_XOP;
10964 for (size_t k = 1; k < 8; k++) {
10965 GemmMicrokernelTester()
10966 .mr(3)
10967 .nr(4)
10968 .kr(2)
10969 .sr(1)
10970 .m(3)
10971 .n(4)
10972 .k(k)
10973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10974 }
10975 }
10976
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)10977 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
10978 TEST_REQUIRES_X86_XOP;
10979 for (size_t k = 1; k < 8; k++) {
10980 for (uint32_t n = 1; n <= 4; n++) {
10981 for (uint32_t m = 1; m <= 3; m++) {
10982 GemmMicrokernelTester()
10983 .mr(3)
10984 .nr(4)
10985 .kr(2)
10986 .sr(1)
10987 .m(m)
10988 .n(n)
10989 .k(k)
10990 .iterations(1)
10991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10992 }
10993 }
10994 }
10995 }
10996
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)10997 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
10998 TEST_REQUIRES_X86_XOP;
10999 for (size_t k = 9; k < 16; k++) {
11000 GemmMicrokernelTester()
11001 .mr(3)
11002 .nr(4)
11003 .kr(2)
11004 .sr(1)
11005 .m(3)
11006 .n(4)
11007 .k(k)
11008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11009 }
11010 }
11011
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)11012 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
11013 TEST_REQUIRES_X86_XOP;
11014 for (size_t k = 9; k < 16; k++) {
11015 for (uint32_t n = 1; n <= 4; n++) {
11016 for (uint32_t m = 1; m <= 3; m++) {
11017 GemmMicrokernelTester()
11018 .mr(3)
11019 .nr(4)
11020 .kr(2)
11021 .sr(1)
11022 .m(m)
11023 .n(n)
11024 .k(k)
11025 .iterations(1)
11026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11027 }
11028 }
11029 }
11030 }
11031
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)11032 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
11033 TEST_REQUIRES_X86_XOP;
11034 for (size_t k = 16; k <= 80; k += 8) {
11035 GemmMicrokernelTester()
11036 .mr(3)
11037 .nr(4)
11038 .kr(2)
11039 .sr(1)
11040 .m(3)
11041 .n(4)
11042 .k(k)
11043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11044 }
11045 }
11046
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)11047 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
11048 TEST_REQUIRES_X86_XOP;
11049 for (size_t k = 16; k <= 80; k += 8) {
11050 for (uint32_t n = 1; n <= 4; n++) {
11051 for (uint32_t m = 1; m <= 3; m++) {
11052 GemmMicrokernelTester()
11053 .mr(3)
11054 .nr(4)
11055 .kr(2)
11056 .sr(1)
11057 .m(m)
11058 .n(n)
11059 .k(k)
11060 .iterations(1)
11061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11062 }
11063 }
11064 }
11065 }
11066
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)11067 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
11068 TEST_REQUIRES_X86_XOP;
11069 for (uint32_t n = 5; n < 8; n++) {
11070 for (size_t k = 1; k <= 40; k += 9) {
11071 GemmMicrokernelTester()
11072 .mr(3)
11073 .nr(4)
11074 .kr(2)
11075 .sr(1)
11076 .m(3)
11077 .n(n)
11078 .k(k)
11079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11080 }
11081 }
11082 }
11083
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)11084 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
11085 TEST_REQUIRES_X86_XOP;
11086 for (uint32_t n = 5; n < 8; n++) {
11087 for (size_t k = 1; k <= 40; k += 9) {
11088 GemmMicrokernelTester()
11089 .mr(3)
11090 .nr(4)
11091 .kr(2)
11092 .sr(1)
11093 .m(3)
11094 .n(n)
11095 .k(k)
11096 .cn_stride(7)
11097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11098 }
11099 }
11100 }
11101
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)11102 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
11103 TEST_REQUIRES_X86_XOP;
11104 for (uint32_t n = 5; n < 8; n++) {
11105 for (size_t k = 1; k <= 40; k += 9) {
11106 for (uint32_t m = 1; m <= 3; m++) {
11107 GemmMicrokernelTester()
11108 .mr(3)
11109 .nr(4)
11110 .kr(2)
11111 .sr(1)
11112 .m(m)
11113 .n(n)
11114 .k(k)
11115 .iterations(1)
11116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11117 }
11118 }
11119 }
11120 }
11121
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)11122 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
11123 TEST_REQUIRES_X86_XOP;
11124 for (uint32_t n = 8; n <= 12; n += 4) {
11125 for (size_t k = 1; k <= 40; k += 9) {
11126 GemmMicrokernelTester()
11127 .mr(3)
11128 .nr(4)
11129 .kr(2)
11130 .sr(1)
11131 .m(3)
11132 .n(n)
11133 .k(k)
11134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)11139 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
11140 TEST_REQUIRES_X86_XOP;
11141 for (uint32_t n = 8; n <= 12; n += 4) {
11142 for (size_t k = 1; k <= 40; k += 9) {
11143 GemmMicrokernelTester()
11144 .mr(3)
11145 .nr(4)
11146 .kr(2)
11147 .sr(1)
11148 .m(3)
11149 .n(n)
11150 .k(k)
11151 .cn_stride(7)
11152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)11157 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
11158 TEST_REQUIRES_X86_XOP;
11159 for (uint32_t n = 8; n <= 12; n += 4) {
11160 for (size_t k = 1; k <= 40; k += 9) {
11161 for (uint32_t m = 1; m <= 3; m++) {
11162 GemmMicrokernelTester()
11163 .mr(3)
11164 .nr(4)
11165 .kr(2)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .iterations(1)
11171 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11172 }
11173 }
11174 }
11175 }
11176
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel)11177 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
11178 TEST_REQUIRES_X86_XOP;
11179 for (size_t k = 1; k <= 40; k += 9) {
11180 GemmMicrokernelTester()
11181 .mr(3)
11182 .nr(4)
11183 .kr(2)
11184 .sr(1)
11185 .m(3)
11186 .n(4)
11187 .k(k)
11188 .ks(3)
11189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11190 }
11191 }
11192
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel_subtile)11193 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
11194 TEST_REQUIRES_X86_XOP;
11195 for (size_t k = 1; k <= 40; k += 9) {
11196 for (uint32_t n = 1; n <= 4; n++) {
11197 for (uint32_t m = 1; m <= 3; m++) {
11198 GemmMicrokernelTester()
11199 .mr(3)
11200 .nr(4)
11201 .kr(2)
11202 .sr(1)
11203 .m(m)
11204 .n(n)
11205 .k(k)
11206 .ks(3)
11207 .iterations(1)
11208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11209 }
11210 }
11211 }
11212 }
11213
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_small_kernel)11214 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
11215 TEST_REQUIRES_X86_XOP;
11216 for (uint32_t n = 5; n < 8; n++) {
11217 for (size_t k = 1; k <= 40; k += 9) {
11218 GemmMicrokernelTester()
11219 .mr(3)
11220 .nr(4)
11221 .kr(2)
11222 .sr(1)
11223 .m(3)
11224 .n(n)
11225 .k(k)
11226 .ks(3)
11227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11228 }
11229 }
11230 }
11231
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_small_kernel)11232 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
11233 TEST_REQUIRES_X86_XOP;
11234 for (uint32_t n = 8; n <= 12; n += 4) {
11235 for (size_t k = 1; k <= 40; k += 9) {
11236 GemmMicrokernelTester()
11237 .mr(3)
11238 .nr(4)
11239 .kr(2)
11240 .sr(1)
11241 .m(3)
11242 .n(n)
11243 .k(k)
11244 .ks(3)
11245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11246 }
11247 }
11248 }
11249
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)11250 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
11251 TEST_REQUIRES_X86_XOP;
11252 for (size_t k = 1; k <= 40; k += 9) {
11253 for (uint32_t n = 1; n <= 4; n++) {
11254 for (uint32_t m = 1; m <= 3; m++) {
11255 GemmMicrokernelTester()
11256 .mr(3)
11257 .nr(4)
11258 .kr(2)
11259 .sr(1)
11260 .m(m)
11261 .n(n)
11262 .k(k)
11263 .cm_stride(7)
11264 .iterations(1)
11265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11266 }
11267 }
11268 }
11269 }
11270
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,a_offset)11271 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
11272 TEST_REQUIRES_X86_XOP;
11273 for (size_t k = 1; k <= 40; k += 9) {
11274 GemmMicrokernelTester()
11275 .mr(3)
11276 .nr(4)
11277 .kr(2)
11278 .sr(1)
11279 .m(3)
11280 .n(4)
11281 .k(k)
11282 .ks(3)
11283 .a_offset(127)
11284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11285 }
11286 }
11287
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,zero)11288 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
11289 TEST_REQUIRES_X86_XOP;
11290 for (size_t k = 1; k <= 40; k += 9) {
11291 for (uint32_t mz = 0; mz < 3; mz++) {
11292 GemmMicrokernelTester()
11293 .mr(3)
11294 .nr(4)
11295 .kr(2)
11296 .sr(1)
11297 .m(3)
11298 .n(4)
11299 .k(k)
11300 .ks(3)
11301 .a_offset(127)
11302 .zero_index(mz)
11303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11304 }
11305 }
11306 }
11307
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)11308 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
11309 TEST_REQUIRES_X86_XOP;
11310 GemmMicrokernelTester()
11311 .mr(3)
11312 .nr(4)
11313 .kr(2)
11314 .sr(1)
11315 .m(3)
11316 .n(4)
11317 .k(8)
11318 .qmin(128)
11319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11320 }
11321
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)11322 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
11323 TEST_REQUIRES_X86_XOP;
11324 GemmMicrokernelTester()
11325 .mr(3)
11326 .nr(4)
11327 .kr(2)
11328 .sr(1)
11329 .m(3)
11330 .n(4)
11331 .k(8)
11332 .qmax(128)
11333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11334 }
11335
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)11336 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
11337 TEST_REQUIRES_X86_XOP;
11338 GemmMicrokernelTester()
11339 .mr(3)
11340 .nr(4)
11341 .kr(2)
11342 .sr(1)
11343 .m(3)
11344 .n(4)
11345 .k(8)
11346 .cm_stride(7)
11347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11348 }
11349
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_a_zero_point)11350 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_a_zero_point) {
11351 TEST_REQUIRES_X86_XOP;
11352 for (size_t k = 1; k <= 40; k += 9) {
11353 GemmMicrokernelTester()
11354 .mr(3)
11355 .nr(4)
11356 .kr(2)
11357 .sr(1)
11358 .m(3)
11359 .n(4)
11360 .k(k)
11361 .a_zero_point(0)
11362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11363 }
11364 }
11365
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_b_zero_point)11366 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_b_zero_point) {
11367 TEST_REQUIRES_X86_XOP;
11368 for (size_t k = 1; k <= 40; k += 9) {
11369 GemmMicrokernelTester()
11370 .mr(3)
11371 .nr(4)
11372 .kr(2)
11373 .sr(1)
11374 .m(3)
11375 .n(4)
11376 .k(k)
11377 .b_zero_point(0)
11378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11379 }
11380 }
11381
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_zero_point)11382 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_zero_point) {
11383 TEST_REQUIRES_X86_XOP;
11384 for (size_t k = 1; k <= 40; k += 9) {
11385 GemmMicrokernelTester()
11386 .mr(3)
11387 .nr(4)
11388 .kr(2)
11389 .sr(1)
11390 .m(3)
11391 .n(4)
11392 .k(k)
11393 .a_zero_point(0)
11394 .b_zero_point(0)
11395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11396 }
11397 }
11398 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11399
11400
11401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)11402 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
11403 TEST_REQUIRES_X86_AVX;
11404 GemmMicrokernelTester()
11405 .mr(4)
11406 .nr(4)
11407 .kr(2)
11408 .sr(1)
11409 .m(4)
11410 .n(4)
11411 .k(8)
11412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11413 }
11414
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)11415 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
11416 TEST_REQUIRES_X86_AVX;
11417 GemmMicrokernelTester()
11418 .mr(4)
11419 .nr(4)
11420 .kr(2)
11421 .sr(1)
11422 .m(4)
11423 .n(4)
11424 .k(8)
11425 .cn_stride(7)
11426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11427 }
11428
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)11429 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
11430 TEST_REQUIRES_X86_AVX;
11431 for (uint32_t n = 1; n <= 4; n++) {
11432 for (uint32_t m = 1; m <= 4; m++) {
11433 GemmMicrokernelTester()
11434 .mr(4)
11435 .nr(4)
11436 .kr(2)
11437 .sr(1)
11438 .m(m)
11439 .n(n)
11440 .k(8)
11441 .iterations(1)
11442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11443 }
11444 }
11445 }
11446
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)11447 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
11448 TEST_REQUIRES_X86_AVX;
11449 for (uint32_t m = 1; m <= 4; m++) {
11450 GemmMicrokernelTester()
11451 .mr(4)
11452 .nr(4)
11453 .kr(2)
11454 .sr(1)
11455 .m(m)
11456 .n(4)
11457 .k(8)
11458 .iterations(1)
11459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11460 }
11461 }
11462
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)11463 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
11464 TEST_REQUIRES_X86_AVX;
11465 for (uint32_t n = 1; n <= 4; n++) {
11466 GemmMicrokernelTester()
11467 .mr(4)
11468 .nr(4)
11469 .kr(2)
11470 .sr(1)
11471 .m(4)
11472 .n(n)
11473 .k(8)
11474 .iterations(1)
11475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11476 }
11477 }
11478
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)11479 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
11480 TEST_REQUIRES_X86_AVX;
11481 for (size_t k = 1; k < 8; k++) {
11482 GemmMicrokernelTester()
11483 .mr(4)
11484 .nr(4)
11485 .kr(2)
11486 .sr(1)
11487 .m(4)
11488 .n(4)
11489 .k(k)
11490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11491 }
11492 }
11493
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)11494 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
11495 TEST_REQUIRES_X86_AVX;
11496 for (size_t k = 1; k < 8; k++) {
11497 for (uint32_t n = 1; n <= 4; n++) {
11498 for (uint32_t m = 1; m <= 4; m++) {
11499 GemmMicrokernelTester()
11500 .mr(4)
11501 .nr(4)
11502 .kr(2)
11503 .sr(1)
11504 .m(m)
11505 .n(n)
11506 .k(k)
11507 .iterations(1)
11508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11509 }
11510 }
11511 }
11512 }
11513
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)11514 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
11515 TEST_REQUIRES_X86_AVX;
11516 for (size_t k = 9; k < 16; k++) {
11517 GemmMicrokernelTester()
11518 .mr(4)
11519 .nr(4)
11520 .kr(2)
11521 .sr(1)
11522 .m(4)
11523 .n(4)
11524 .k(k)
11525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11526 }
11527 }
11528
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)11529 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
11530 TEST_REQUIRES_X86_AVX;
11531 for (size_t k = 9; k < 16; k++) {
11532 for (uint32_t n = 1; n <= 4; n++) {
11533 for (uint32_t m = 1; m <= 4; m++) {
11534 GemmMicrokernelTester()
11535 .mr(4)
11536 .nr(4)
11537 .kr(2)
11538 .sr(1)
11539 .m(m)
11540 .n(n)
11541 .k(k)
11542 .iterations(1)
11543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11544 }
11545 }
11546 }
11547 }
11548
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)11549 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
11550 TEST_REQUIRES_X86_AVX;
11551 for (size_t k = 16; k <= 80; k += 8) {
11552 GemmMicrokernelTester()
11553 .mr(4)
11554 .nr(4)
11555 .kr(2)
11556 .sr(1)
11557 .m(4)
11558 .n(4)
11559 .k(k)
11560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11561 }
11562 }
11563
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)11564 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
11565 TEST_REQUIRES_X86_AVX;
11566 for (size_t k = 16; k <= 80; k += 8) {
11567 for (uint32_t n = 1; n <= 4; n++) {
11568 for (uint32_t m = 1; m <= 4; m++) {
11569 GemmMicrokernelTester()
11570 .mr(4)
11571 .nr(4)
11572 .kr(2)
11573 .sr(1)
11574 .m(m)
11575 .n(n)
11576 .k(k)
11577 .iterations(1)
11578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11579 }
11580 }
11581 }
11582 }
11583
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)11584 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
11585 TEST_REQUIRES_X86_AVX;
11586 for (uint32_t n = 5; n < 8; n++) {
11587 for (size_t k = 1; k <= 40; k += 9) {
11588 GemmMicrokernelTester()
11589 .mr(4)
11590 .nr(4)
11591 .kr(2)
11592 .sr(1)
11593 .m(4)
11594 .n(n)
11595 .k(k)
11596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11597 }
11598 }
11599 }
11600
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)11601 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
11602 TEST_REQUIRES_X86_AVX;
11603 for (uint32_t n = 5; n < 8; n++) {
11604 for (size_t k = 1; k <= 40; k += 9) {
11605 GemmMicrokernelTester()
11606 .mr(4)
11607 .nr(4)
11608 .kr(2)
11609 .sr(1)
11610 .m(4)
11611 .n(n)
11612 .k(k)
11613 .cn_stride(7)
11614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11615 }
11616 }
11617 }
11618
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)11619 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
11620 TEST_REQUIRES_X86_AVX;
11621 for (uint32_t n = 5; n < 8; n++) {
11622 for (size_t k = 1; k <= 40; k += 9) {
11623 for (uint32_t m = 1; m <= 4; m++) {
11624 GemmMicrokernelTester()
11625 .mr(4)
11626 .nr(4)
11627 .kr(2)
11628 .sr(1)
11629 .m(m)
11630 .n(n)
11631 .k(k)
11632 .iterations(1)
11633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11634 }
11635 }
11636 }
11637 }
11638
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)11639 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
11640 TEST_REQUIRES_X86_AVX;
11641 for (uint32_t n = 8; n <= 12; n += 4) {
11642 for (size_t k = 1; k <= 40; k += 9) {
11643 GemmMicrokernelTester()
11644 .mr(4)
11645 .nr(4)
11646 .kr(2)
11647 .sr(1)
11648 .m(4)
11649 .n(n)
11650 .k(k)
11651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11652 }
11653 }
11654 }
11655
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)11656 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
11657 TEST_REQUIRES_X86_AVX;
11658 for (uint32_t n = 8; n <= 12; n += 4) {
11659 for (size_t k = 1; k <= 40; k += 9) {
11660 GemmMicrokernelTester()
11661 .mr(4)
11662 .nr(4)
11663 .kr(2)
11664 .sr(1)
11665 .m(4)
11666 .n(n)
11667 .k(k)
11668 .cn_stride(7)
11669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11670 }
11671 }
11672 }
11673
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)11674 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
11675 TEST_REQUIRES_X86_AVX;
11676 for (uint32_t n = 8; n <= 12; n += 4) {
11677 for (size_t k = 1; k <= 40; k += 9) {
11678 for (uint32_t m = 1; m <= 4; m++) {
11679 GemmMicrokernelTester()
11680 .mr(4)
11681 .nr(4)
11682 .kr(2)
11683 .sr(1)
11684 .m(m)
11685 .n(n)
11686 .k(k)
11687 .iterations(1)
11688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11689 }
11690 }
11691 }
11692 }
11693
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel)11694 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
11695 TEST_REQUIRES_X86_AVX;
11696 for (size_t k = 1; k <= 40; k += 9) {
11697 GemmMicrokernelTester()
11698 .mr(4)
11699 .nr(4)
11700 .kr(2)
11701 .sr(1)
11702 .m(4)
11703 .n(4)
11704 .k(k)
11705 .ks(3)
11706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11707 }
11708 }
11709
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel_subtile)11710 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
11711 TEST_REQUIRES_X86_AVX;
11712 for (size_t k = 1; k <= 40; k += 9) {
11713 for (uint32_t n = 1; n <= 4; n++) {
11714 for (uint32_t m = 1; m <= 4; m++) {
11715 GemmMicrokernelTester()
11716 .mr(4)
11717 .nr(4)
11718 .kr(2)
11719 .sr(1)
11720 .m(m)
11721 .n(n)
11722 .k(k)
11723 .ks(3)
11724 .iterations(1)
11725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11726 }
11727 }
11728 }
11729 }
11730
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_small_kernel)11731 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
11732 TEST_REQUIRES_X86_AVX;
11733 for (uint32_t n = 5; n < 8; n++) {
11734 for (size_t k = 1; k <= 40; k += 9) {
11735 GemmMicrokernelTester()
11736 .mr(4)
11737 .nr(4)
11738 .kr(2)
11739 .sr(1)
11740 .m(4)
11741 .n(n)
11742 .k(k)
11743 .ks(3)
11744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11745 }
11746 }
11747 }
11748
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_small_kernel)11749 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
11750 TEST_REQUIRES_X86_AVX;
11751 for (uint32_t n = 8; n <= 12; n += 4) {
11752 for (size_t k = 1; k <= 40; k += 9) {
11753 GemmMicrokernelTester()
11754 .mr(4)
11755 .nr(4)
11756 .kr(2)
11757 .sr(1)
11758 .m(4)
11759 .n(n)
11760 .k(k)
11761 .ks(3)
11762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11763 }
11764 }
11765 }
11766
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)11767 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
11768 TEST_REQUIRES_X86_AVX;
11769 for (size_t k = 1; k <= 40; k += 9) {
11770 for (uint32_t n = 1; n <= 4; n++) {
11771 for (uint32_t m = 1; m <= 4; m++) {
11772 GemmMicrokernelTester()
11773 .mr(4)
11774 .nr(4)
11775 .kr(2)
11776 .sr(1)
11777 .m(m)
11778 .n(n)
11779 .k(k)
11780 .cm_stride(7)
11781 .iterations(1)
11782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11783 }
11784 }
11785 }
11786 }
11787
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,a_offset)11788 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
11789 TEST_REQUIRES_X86_AVX;
11790 for (size_t k = 1; k <= 40; k += 9) {
11791 GemmMicrokernelTester()
11792 .mr(4)
11793 .nr(4)
11794 .kr(2)
11795 .sr(1)
11796 .m(4)
11797 .n(4)
11798 .k(k)
11799 .ks(3)
11800 .a_offset(163)
11801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11802 }
11803 }
11804
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,zero)11805 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
11806 TEST_REQUIRES_X86_AVX;
11807 for (size_t k = 1; k <= 40; k += 9) {
11808 for (uint32_t mz = 0; mz < 4; mz++) {
11809 GemmMicrokernelTester()
11810 .mr(4)
11811 .nr(4)
11812 .kr(2)
11813 .sr(1)
11814 .m(4)
11815 .n(4)
11816 .k(k)
11817 .ks(3)
11818 .a_offset(163)
11819 .zero_index(mz)
11820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11821 }
11822 }
11823 }
11824
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)11825 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
11826 TEST_REQUIRES_X86_AVX;
11827 GemmMicrokernelTester()
11828 .mr(4)
11829 .nr(4)
11830 .kr(2)
11831 .sr(1)
11832 .m(4)
11833 .n(4)
11834 .k(8)
11835 .qmin(128)
11836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11837 }
11838
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)11839 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
11840 TEST_REQUIRES_X86_AVX;
11841 GemmMicrokernelTester()
11842 .mr(4)
11843 .nr(4)
11844 .kr(2)
11845 .sr(1)
11846 .m(4)
11847 .n(4)
11848 .k(8)
11849 .qmax(128)
11850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11851 }
11852
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)11853 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
11854 TEST_REQUIRES_X86_AVX;
11855 GemmMicrokernelTester()
11856 .mr(4)
11857 .nr(4)
11858 .kr(2)
11859 .sr(1)
11860 .m(4)
11861 .n(4)
11862 .k(8)
11863 .cm_stride(7)
11864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11865 }
11866
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_a_zero_point)11867 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_a_zero_point) {
11868 TEST_REQUIRES_X86_AVX;
11869 for (size_t k = 1; k <= 40; k += 9) {
11870 GemmMicrokernelTester()
11871 .mr(4)
11872 .nr(4)
11873 .kr(2)
11874 .sr(1)
11875 .m(4)
11876 .n(4)
11877 .k(k)
11878 .a_zero_point(0)
11879 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11880 }
11881 }
11882
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_b_zero_point)11883 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_b_zero_point) {
11884 TEST_REQUIRES_X86_AVX;
11885 for (size_t k = 1; k <= 40; k += 9) {
11886 GemmMicrokernelTester()
11887 .mr(4)
11888 .nr(4)
11889 .kr(2)
11890 .sr(1)
11891 .m(4)
11892 .n(4)
11893 .k(k)
11894 .b_zero_point(0)
11895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11896 }
11897 }
11898
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_zero_point)11899 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_zero_point) {
11900 TEST_REQUIRES_X86_AVX;
11901 for (size_t k = 1; k <= 40; k += 9) {
11902 GemmMicrokernelTester()
11903 .mr(4)
11904 .nr(4)
11905 .kr(2)
11906 .sr(1)
11907 .m(4)
11908 .n(4)
11909 .k(k)
11910 .a_zero_point(0)
11911 .b_zero_point(0)
11912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11913 }
11914 }
11915 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11916
11917
11918 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)11919 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
11920 TEST_REQUIRES_X86_SSE2;
11921 GemmMicrokernelTester()
11922 .mr(1)
11923 .nr(4)
11924 .kr(2)
11925 .sr(4)
11926 .m(1)
11927 .n(4)
11928 .k(8)
11929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11930 }
11931
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)11932 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
11933 TEST_REQUIRES_X86_SSE2;
11934 GemmMicrokernelTester()
11935 .mr(1)
11936 .nr(4)
11937 .kr(2)
11938 .sr(4)
11939 .m(1)
11940 .n(4)
11941 .k(8)
11942 .cn_stride(7)
11943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11944 }
11945
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)11946 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
11947 TEST_REQUIRES_X86_SSE2;
11948 for (uint32_t n = 1; n <= 4; n++) {
11949 for (uint32_t m = 1; m <= 1; m++) {
11950 GemmMicrokernelTester()
11951 .mr(1)
11952 .nr(4)
11953 .kr(2)
11954 .sr(4)
11955 .m(m)
11956 .n(n)
11957 .k(8)
11958 .iterations(1)
11959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11960 }
11961 }
11962 }
11963
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)11964 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
11965 TEST_REQUIRES_X86_SSE2;
11966 for (uint32_t m = 1; m <= 1; m++) {
11967 GemmMicrokernelTester()
11968 .mr(1)
11969 .nr(4)
11970 .kr(2)
11971 .sr(4)
11972 .m(m)
11973 .n(4)
11974 .k(8)
11975 .iterations(1)
11976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11977 }
11978 }
11979
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)11980 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
11981 TEST_REQUIRES_X86_SSE2;
11982 for (uint32_t n = 1; n <= 4; n++) {
11983 GemmMicrokernelTester()
11984 .mr(1)
11985 .nr(4)
11986 .kr(2)
11987 .sr(4)
11988 .m(1)
11989 .n(n)
11990 .k(8)
11991 .iterations(1)
11992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11993 }
11994 }
11995
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)11996 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
11997 TEST_REQUIRES_X86_SSE2;
11998 for (size_t k = 1; k < 8; k++) {
11999 GemmMicrokernelTester()
12000 .mr(1)
12001 .nr(4)
12002 .kr(2)
12003 .sr(4)
12004 .m(1)
12005 .n(4)
12006 .k(k)
12007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12008 }
12009 }
12010
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)12011 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
12012 TEST_REQUIRES_X86_SSE2;
12013 for (size_t k = 1; k < 8; k++) {
12014 for (uint32_t n = 1; n <= 4; n++) {
12015 for (uint32_t m = 1; m <= 1; m++) {
12016 GemmMicrokernelTester()
12017 .mr(1)
12018 .nr(4)
12019 .kr(2)
12020 .sr(4)
12021 .m(m)
12022 .n(n)
12023 .k(k)
12024 .iterations(1)
12025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12026 }
12027 }
12028 }
12029 }
12030
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)12031 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
12032 TEST_REQUIRES_X86_SSE2;
12033 for (size_t k = 9; k < 16; k++) {
12034 GemmMicrokernelTester()
12035 .mr(1)
12036 .nr(4)
12037 .kr(2)
12038 .sr(4)
12039 .m(1)
12040 .n(4)
12041 .k(k)
12042 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12043 }
12044 }
12045
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)12046 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
12047 TEST_REQUIRES_X86_SSE2;
12048 for (size_t k = 9; k < 16; k++) {
12049 for (uint32_t n = 1; n <= 4; n++) {
12050 for (uint32_t m = 1; m <= 1; m++) {
12051 GemmMicrokernelTester()
12052 .mr(1)
12053 .nr(4)
12054 .kr(2)
12055 .sr(4)
12056 .m(m)
12057 .n(n)
12058 .k(k)
12059 .iterations(1)
12060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12061 }
12062 }
12063 }
12064 }
12065
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)12066 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
12067 TEST_REQUIRES_X86_SSE2;
12068 for (size_t k = 16; k <= 80; k += 8) {
12069 GemmMicrokernelTester()
12070 .mr(1)
12071 .nr(4)
12072 .kr(2)
12073 .sr(4)
12074 .m(1)
12075 .n(4)
12076 .k(k)
12077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12078 }
12079 }
12080
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)12081 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
12082 TEST_REQUIRES_X86_SSE2;
12083 for (size_t k = 16; k <= 80; k += 8) {
12084 for (uint32_t n = 1; n <= 4; n++) {
12085 for (uint32_t m = 1; m <= 1; m++) {
12086 GemmMicrokernelTester()
12087 .mr(1)
12088 .nr(4)
12089 .kr(2)
12090 .sr(4)
12091 .m(m)
12092 .n(n)
12093 .k(k)
12094 .iterations(1)
12095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12096 }
12097 }
12098 }
12099 }
12100
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)12101 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
12102 TEST_REQUIRES_X86_SSE2;
12103 for (uint32_t n = 5; n < 8; n++) {
12104 for (size_t k = 1; k <= 40; k += 9) {
12105 GemmMicrokernelTester()
12106 .mr(1)
12107 .nr(4)
12108 .kr(2)
12109 .sr(4)
12110 .m(1)
12111 .n(n)
12112 .k(k)
12113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12114 }
12115 }
12116 }
12117
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)12118 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
12119 TEST_REQUIRES_X86_SSE2;
12120 for (uint32_t n = 5; n < 8; n++) {
12121 for (size_t k = 1; k <= 40; k += 9) {
12122 GemmMicrokernelTester()
12123 .mr(1)
12124 .nr(4)
12125 .kr(2)
12126 .sr(4)
12127 .m(1)
12128 .n(n)
12129 .k(k)
12130 .cn_stride(7)
12131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12132 }
12133 }
12134 }
12135
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)12136 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
12137 TEST_REQUIRES_X86_SSE2;
12138 for (uint32_t n = 5; n < 8; n++) {
12139 for (size_t k = 1; k <= 40; k += 9) {
12140 for (uint32_t m = 1; m <= 1; m++) {
12141 GemmMicrokernelTester()
12142 .mr(1)
12143 .nr(4)
12144 .kr(2)
12145 .sr(4)
12146 .m(m)
12147 .n(n)
12148 .k(k)
12149 .iterations(1)
12150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12151 }
12152 }
12153 }
12154 }
12155
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)12156 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
12157 TEST_REQUIRES_X86_SSE2;
12158 for (uint32_t n = 8; n <= 12; n += 4) {
12159 for (size_t k = 1; k <= 40; k += 9) {
12160 GemmMicrokernelTester()
12161 .mr(1)
12162 .nr(4)
12163 .kr(2)
12164 .sr(4)
12165 .m(1)
12166 .n(n)
12167 .k(k)
12168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12169 }
12170 }
12171 }
12172
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)12173 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
12174 TEST_REQUIRES_X86_SSE2;
12175 for (uint32_t n = 8; n <= 12; n += 4) {
12176 for (size_t k = 1; k <= 40; k += 9) {
12177 GemmMicrokernelTester()
12178 .mr(1)
12179 .nr(4)
12180 .kr(2)
12181 .sr(4)
12182 .m(1)
12183 .n(n)
12184 .k(k)
12185 .cn_stride(7)
12186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12187 }
12188 }
12189 }
12190
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)12191 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
12192 TEST_REQUIRES_X86_SSE2;
12193 for (uint32_t n = 8; n <= 12; n += 4) {
12194 for (size_t k = 1; k <= 40; k += 9) {
12195 for (uint32_t m = 1; m <= 1; m++) {
12196 GemmMicrokernelTester()
12197 .mr(1)
12198 .nr(4)
12199 .kr(2)
12200 .sr(4)
12201 .m(m)
12202 .n(n)
12203 .k(k)
12204 .iterations(1)
12205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12206 }
12207 }
12208 }
12209 }
12210
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel)12211 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel) {
12212 TEST_REQUIRES_X86_SSE2;
12213 for (size_t k = 1; k <= 40; k += 9) {
12214 GemmMicrokernelTester()
12215 .mr(1)
12216 .nr(4)
12217 .kr(2)
12218 .sr(4)
12219 .m(1)
12220 .n(4)
12221 .k(k)
12222 .ks(3)
12223 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12224 }
12225 }
12226
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel_subtile)12227 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel_subtile) {
12228 TEST_REQUIRES_X86_SSE2;
12229 for (size_t k = 1; k <= 40; k += 9) {
12230 for (uint32_t n = 1; n <= 4; n++) {
12231 for (uint32_t m = 1; m <= 1; m++) {
12232 GemmMicrokernelTester()
12233 .mr(1)
12234 .nr(4)
12235 .kr(2)
12236 .sr(4)
12237 .m(m)
12238 .n(n)
12239 .k(k)
12240 .ks(3)
12241 .iterations(1)
12242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12243 }
12244 }
12245 }
12246 }
12247
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_small_kernel)12248 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
12249 TEST_REQUIRES_X86_SSE2;
12250 for (uint32_t n = 5; n < 8; n++) {
12251 for (size_t k = 1; k <= 40; k += 9) {
12252 GemmMicrokernelTester()
12253 .mr(1)
12254 .nr(4)
12255 .kr(2)
12256 .sr(4)
12257 .m(1)
12258 .n(n)
12259 .k(k)
12260 .ks(3)
12261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12262 }
12263 }
12264 }
12265
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_small_kernel)12266 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
12267 TEST_REQUIRES_X86_SSE2;
12268 for (uint32_t n = 8; n <= 12; n += 4) {
12269 for (size_t k = 1; k <= 40; k += 9) {
12270 GemmMicrokernelTester()
12271 .mr(1)
12272 .nr(4)
12273 .kr(2)
12274 .sr(4)
12275 .m(1)
12276 .n(n)
12277 .k(k)
12278 .ks(3)
12279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12280 }
12281 }
12282 }
12283
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)12284 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
12285 TEST_REQUIRES_X86_SSE2;
12286 for (size_t k = 1; k <= 40; k += 9) {
12287 for (uint32_t n = 1; n <= 4; n++) {
12288 for (uint32_t m = 1; m <= 1; m++) {
12289 GemmMicrokernelTester()
12290 .mr(1)
12291 .nr(4)
12292 .kr(2)
12293 .sr(4)
12294 .m(m)
12295 .n(n)
12296 .k(k)
12297 .cm_stride(7)
12298 .iterations(1)
12299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12300 }
12301 }
12302 }
12303 }
12304
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,a_offset)12305 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, a_offset) {
12306 TEST_REQUIRES_X86_SSE2;
12307 for (size_t k = 1; k <= 40; k += 9) {
12308 GemmMicrokernelTester()
12309 .mr(1)
12310 .nr(4)
12311 .kr(2)
12312 .sr(4)
12313 .m(1)
12314 .n(4)
12315 .k(k)
12316 .ks(3)
12317 .a_offset(43)
12318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12319 }
12320 }
12321
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,zero)12322 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, zero) {
12323 TEST_REQUIRES_X86_SSE2;
12324 for (size_t k = 1; k <= 40; k += 9) {
12325 for (uint32_t mz = 0; mz < 1; mz++) {
12326 GemmMicrokernelTester()
12327 .mr(1)
12328 .nr(4)
12329 .kr(2)
12330 .sr(4)
12331 .m(1)
12332 .n(4)
12333 .k(k)
12334 .ks(3)
12335 .a_offset(43)
12336 .zero_index(mz)
12337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12338 }
12339 }
12340 }
12341
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)12342 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
12343 TEST_REQUIRES_X86_SSE2;
12344 GemmMicrokernelTester()
12345 .mr(1)
12346 .nr(4)
12347 .kr(2)
12348 .sr(4)
12349 .m(1)
12350 .n(4)
12351 .k(8)
12352 .qmin(128)
12353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12354 }
12355
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)12356 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
12357 TEST_REQUIRES_X86_SSE2;
12358 GemmMicrokernelTester()
12359 .mr(1)
12360 .nr(4)
12361 .kr(2)
12362 .sr(4)
12363 .m(1)
12364 .n(4)
12365 .k(8)
12366 .qmax(128)
12367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12368 }
12369
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)12370 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
12371 TEST_REQUIRES_X86_SSE2;
12372 GemmMicrokernelTester()
12373 .mr(1)
12374 .nr(4)
12375 .kr(2)
12376 .sr(4)
12377 .m(1)
12378 .n(4)
12379 .k(8)
12380 .cm_stride(7)
12381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12382 }
12383
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_a_zero_point)12384 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_a_zero_point) {
12385 TEST_REQUIRES_X86_SSE2;
12386 for (size_t k = 1; k <= 40; k += 9) {
12387 GemmMicrokernelTester()
12388 .mr(1)
12389 .nr(4)
12390 .kr(2)
12391 .sr(4)
12392 .m(1)
12393 .n(4)
12394 .k(k)
12395 .a_zero_point(0)
12396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12397 }
12398 }
12399
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_b_zero_point)12400 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_b_zero_point) {
12401 TEST_REQUIRES_X86_SSE2;
12402 for (size_t k = 1; k <= 40; k += 9) {
12403 GemmMicrokernelTester()
12404 .mr(1)
12405 .nr(4)
12406 .kr(2)
12407 .sr(4)
12408 .m(1)
12409 .n(4)
12410 .k(k)
12411 .b_zero_point(0)
12412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12413 }
12414 }
12415
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_zero_point)12416 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_zero_point) {
12417 TEST_REQUIRES_X86_SSE2;
12418 for (size_t k = 1; k <= 40; k += 9) {
12419 GemmMicrokernelTester()
12420 .mr(1)
12421 .nr(4)
12422 .kr(2)
12423 .sr(4)
12424 .m(1)
12425 .n(4)
12426 .k(k)
12427 .a_zero_point(0)
12428 .b_zero_point(0)
12429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12430 }
12431 }
12432 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12433
12434
12435 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)12436 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
12437 TEST_REQUIRES_X86_SSE41;
12438 GemmMicrokernelTester()
12439 .mr(1)
12440 .nr(4)
12441 .kr(2)
12442 .sr(4)
12443 .m(1)
12444 .n(4)
12445 .k(8)
12446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12447 }
12448
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)12449 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
12450 TEST_REQUIRES_X86_SSE41;
12451 GemmMicrokernelTester()
12452 .mr(1)
12453 .nr(4)
12454 .kr(2)
12455 .sr(4)
12456 .m(1)
12457 .n(4)
12458 .k(8)
12459 .cn_stride(7)
12460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12461 }
12462
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)12463 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
12464 TEST_REQUIRES_X86_SSE41;
12465 for (uint32_t n = 1; n <= 4; n++) {
12466 for (uint32_t m = 1; m <= 1; m++) {
12467 GemmMicrokernelTester()
12468 .mr(1)
12469 .nr(4)
12470 .kr(2)
12471 .sr(4)
12472 .m(m)
12473 .n(n)
12474 .k(8)
12475 .iterations(1)
12476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12477 }
12478 }
12479 }
12480
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)12481 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
12482 TEST_REQUIRES_X86_SSE41;
12483 for (uint32_t m = 1; m <= 1; m++) {
12484 GemmMicrokernelTester()
12485 .mr(1)
12486 .nr(4)
12487 .kr(2)
12488 .sr(4)
12489 .m(m)
12490 .n(4)
12491 .k(8)
12492 .iterations(1)
12493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12494 }
12495 }
12496
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)12497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
12498 TEST_REQUIRES_X86_SSE41;
12499 for (uint32_t n = 1; n <= 4; n++) {
12500 GemmMicrokernelTester()
12501 .mr(1)
12502 .nr(4)
12503 .kr(2)
12504 .sr(4)
12505 .m(1)
12506 .n(n)
12507 .k(8)
12508 .iterations(1)
12509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12510 }
12511 }
12512
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)12513 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
12514 TEST_REQUIRES_X86_SSE41;
12515 for (size_t k = 1; k < 8; k++) {
12516 GemmMicrokernelTester()
12517 .mr(1)
12518 .nr(4)
12519 .kr(2)
12520 .sr(4)
12521 .m(1)
12522 .n(4)
12523 .k(k)
12524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12525 }
12526 }
12527
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)12528 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
12529 TEST_REQUIRES_X86_SSE41;
12530 for (size_t k = 1; k < 8; k++) {
12531 for (uint32_t n = 1; n <= 4; n++) {
12532 for (uint32_t m = 1; m <= 1; m++) {
12533 GemmMicrokernelTester()
12534 .mr(1)
12535 .nr(4)
12536 .kr(2)
12537 .sr(4)
12538 .m(m)
12539 .n(n)
12540 .k(k)
12541 .iterations(1)
12542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12543 }
12544 }
12545 }
12546 }
12547
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)12548 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
12549 TEST_REQUIRES_X86_SSE41;
12550 for (size_t k = 9; k < 16; k++) {
12551 GemmMicrokernelTester()
12552 .mr(1)
12553 .nr(4)
12554 .kr(2)
12555 .sr(4)
12556 .m(1)
12557 .n(4)
12558 .k(k)
12559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12560 }
12561 }
12562
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)12563 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
12564 TEST_REQUIRES_X86_SSE41;
12565 for (size_t k = 9; k < 16; k++) {
12566 for (uint32_t n = 1; n <= 4; n++) {
12567 for (uint32_t m = 1; m <= 1; m++) {
12568 GemmMicrokernelTester()
12569 .mr(1)
12570 .nr(4)
12571 .kr(2)
12572 .sr(4)
12573 .m(m)
12574 .n(n)
12575 .k(k)
12576 .iterations(1)
12577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12578 }
12579 }
12580 }
12581 }
12582
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)12583 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
12584 TEST_REQUIRES_X86_SSE41;
12585 for (size_t k = 16; k <= 80; k += 8) {
12586 GemmMicrokernelTester()
12587 .mr(1)
12588 .nr(4)
12589 .kr(2)
12590 .sr(4)
12591 .m(1)
12592 .n(4)
12593 .k(k)
12594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12595 }
12596 }
12597
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)12598 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
12599 TEST_REQUIRES_X86_SSE41;
12600 for (size_t k = 16; k <= 80; k += 8) {
12601 for (uint32_t n = 1; n <= 4; n++) {
12602 for (uint32_t m = 1; m <= 1; m++) {
12603 GemmMicrokernelTester()
12604 .mr(1)
12605 .nr(4)
12606 .kr(2)
12607 .sr(4)
12608 .m(m)
12609 .n(n)
12610 .k(k)
12611 .iterations(1)
12612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12613 }
12614 }
12615 }
12616 }
12617
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)12618 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
12619 TEST_REQUIRES_X86_SSE41;
12620 for (uint32_t n = 5; n < 8; n++) {
12621 for (size_t k = 1; k <= 40; k += 9) {
12622 GemmMicrokernelTester()
12623 .mr(1)
12624 .nr(4)
12625 .kr(2)
12626 .sr(4)
12627 .m(1)
12628 .n(n)
12629 .k(k)
12630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12631 }
12632 }
12633 }
12634
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)12635 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
12636 TEST_REQUIRES_X86_SSE41;
12637 for (uint32_t n = 5; n < 8; n++) {
12638 for (size_t k = 1; k <= 40; k += 9) {
12639 GemmMicrokernelTester()
12640 .mr(1)
12641 .nr(4)
12642 .kr(2)
12643 .sr(4)
12644 .m(1)
12645 .n(n)
12646 .k(k)
12647 .cn_stride(7)
12648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12649 }
12650 }
12651 }
12652
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)12653 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
12654 TEST_REQUIRES_X86_SSE41;
12655 for (uint32_t n = 5; n < 8; n++) {
12656 for (size_t k = 1; k <= 40; k += 9) {
12657 for (uint32_t m = 1; m <= 1; m++) {
12658 GemmMicrokernelTester()
12659 .mr(1)
12660 .nr(4)
12661 .kr(2)
12662 .sr(4)
12663 .m(m)
12664 .n(n)
12665 .k(k)
12666 .iterations(1)
12667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12668 }
12669 }
12670 }
12671 }
12672
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)12673 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
12674 TEST_REQUIRES_X86_SSE41;
12675 for (uint32_t n = 8; n <= 12; n += 4) {
12676 for (size_t k = 1; k <= 40; k += 9) {
12677 GemmMicrokernelTester()
12678 .mr(1)
12679 .nr(4)
12680 .kr(2)
12681 .sr(4)
12682 .m(1)
12683 .n(n)
12684 .k(k)
12685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12686 }
12687 }
12688 }
12689
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)12690 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
12691 TEST_REQUIRES_X86_SSE41;
12692 for (uint32_t n = 8; n <= 12; n += 4) {
12693 for (size_t k = 1; k <= 40; k += 9) {
12694 GemmMicrokernelTester()
12695 .mr(1)
12696 .nr(4)
12697 .kr(2)
12698 .sr(4)
12699 .m(1)
12700 .n(n)
12701 .k(k)
12702 .cn_stride(7)
12703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12704 }
12705 }
12706 }
12707
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)12708 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
12709 TEST_REQUIRES_X86_SSE41;
12710 for (uint32_t n = 8; n <= 12; n += 4) {
12711 for (size_t k = 1; k <= 40; k += 9) {
12712 for (uint32_t m = 1; m <= 1; m++) {
12713 GemmMicrokernelTester()
12714 .mr(1)
12715 .nr(4)
12716 .kr(2)
12717 .sr(4)
12718 .m(m)
12719 .n(n)
12720 .k(k)
12721 .iterations(1)
12722 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12723 }
12724 }
12725 }
12726 }
12727
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel)12728 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel) {
12729 TEST_REQUIRES_X86_SSE41;
12730 for (size_t k = 1; k <= 40; k += 9) {
12731 GemmMicrokernelTester()
12732 .mr(1)
12733 .nr(4)
12734 .kr(2)
12735 .sr(4)
12736 .m(1)
12737 .n(4)
12738 .k(k)
12739 .ks(3)
12740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12741 }
12742 }
12743
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel_subtile)12744 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel_subtile) {
12745 TEST_REQUIRES_X86_SSE41;
12746 for (size_t k = 1; k <= 40; k += 9) {
12747 for (uint32_t n = 1; n <= 4; n++) {
12748 for (uint32_t m = 1; m <= 1; m++) {
12749 GemmMicrokernelTester()
12750 .mr(1)
12751 .nr(4)
12752 .kr(2)
12753 .sr(4)
12754 .m(m)
12755 .n(n)
12756 .k(k)
12757 .ks(3)
12758 .iterations(1)
12759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12760 }
12761 }
12762 }
12763 }
12764
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_small_kernel)12765 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
12766 TEST_REQUIRES_X86_SSE41;
12767 for (uint32_t n = 5; n < 8; n++) {
12768 for (size_t k = 1; k <= 40; k += 9) {
12769 GemmMicrokernelTester()
12770 .mr(1)
12771 .nr(4)
12772 .kr(2)
12773 .sr(4)
12774 .m(1)
12775 .n(n)
12776 .k(k)
12777 .ks(3)
12778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12779 }
12780 }
12781 }
12782
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_small_kernel)12783 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
12784 TEST_REQUIRES_X86_SSE41;
12785 for (uint32_t n = 8; n <= 12; n += 4) {
12786 for (size_t k = 1; k <= 40; k += 9) {
12787 GemmMicrokernelTester()
12788 .mr(1)
12789 .nr(4)
12790 .kr(2)
12791 .sr(4)
12792 .m(1)
12793 .n(n)
12794 .k(k)
12795 .ks(3)
12796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12797 }
12798 }
12799 }
12800
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)12801 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
12802 TEST_REQUIRES_X86_SSE41;
12803 for (size_t k = 1; k <= 40; k += 9) {
12804 for (uint32_t n = 1; n <= 4; n++) {
12805 for (uint32_t m = 1; m <= 1; m++) {
12806 GemmMicrokernelTester()
12807 .mr(1)
12808 .nr(4)
12809 .kr(2)
12810 .sr(4)
12811 .m(m)
12812 .n(n)
12813 .k(k)
12814 .cm_stride(7)
12815 .iterations(1)
12816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12817 }
12818 }
12819 }
12820 }
12821
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,a_offset)12822 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, a_offset) {
12823 TEST_REQUIRES_X86_SSE41;
12824 for (size_t k = 1; k <= 40; k += 9) {
12825 GemmMicrokernelTester()
12826 .mr(1)
12827 .nr(4)
12828 .kr(2)
12829 .sr(4)
12830 .m(1)
12831 .n(4)
12832 .k(k)
12833 .ks(3)
12834 .a_offset(43)
12835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12836 }
12837 }
12838
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,zero)12839 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, zero) {
12840 TEST_REQUIRES_X86_SSE41;
12841 for (size_t k = 1; k <= 40; k += 9) {
12842 for (uint32_t mz = 0; mz < 1; mz++) {
12843 GemmMicrokernelTester()
12844 .mr(1)
12845 .nr(4)
12846 .kr(2)
12847 .sr(4)
12848 .m(1)
12849 .n(4)
12850 .k(k)
12851 .ks(3)
12852 .a_offset(43)
12853 .zero_index(mz)
12854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12855 }
12856 }
12857 }
12858
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)12859 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
12860 TEST_REQUIRES_X86_SSE41;
12861 GemmMicrokernelTester()
12862 .mr(1)
12863 .nr(4)
12864 .kr(2)
12865 .sr(4)
12866 .m(1)
12867 .n(4)
12868 .k(8)
12869 .qmin(128)
12870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12871 }
12872
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)12873 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
12874 TEST_REQUIRES_X86_SSE41;
12875 GemmMicrokernelTester()
12876 .mr(1)
12877 .nr(4)
12878 .kr(2)
12879 .sr(4)
12880 .m(1)
12881 .n(4)
12882 .k(8)
12883 .qmax(128)
12884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12885 }
12886
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)12887 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
12888 TEST_REQUIRES_X86_SSE41;
12889 GemmMicrokernelTester()
12890 .mr(1)
12891 .nr(4)
12892 .kr(2)
12893 .sr(4)
12894 .m(1)
12895 .n(4)
12896 .k(8)
12897 .cm_stride(7)
12898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12899 }
12900
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_a_zero_point)12901 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_a_zero_point) {
12902 TEST_REQUIRES_X86_SSE41;
12903 for (size_t k = 1; k <= 40; k += 9) {
12904 GemmMicrokernelTester()
12905 .mr(1)
12906 .nr(4)
12907 .kr(2)
12908 .sr(4)
12909 .m(1)
12910 .n(4)
12911 .k(k)
12912 .a_zero_point(0)
12913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12914 }
12915 }
12916
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_b_zero_point)12917 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_b_zero_point) {
12918 TEST_REQUIRES_X86_SSE41;
12919 for (size_t k = 1; k <= 40; k += 9) {
12920 GemmMicrokernelTester()
12921 .mr(1)
12922 .nr(4)
12923 .kr(2)
12924 .sr(4)
12925 .m(1)
12926 .n(4)
12927 .k(k)
12928 .b_zero_point(0)
12929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12930 }
12931 }
12932
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_zero_point)12933 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_zero_point) {
12934 TEST_REQUIRES_X86_SSE41;
12935 for (size_t k = 1; k <= 40; k += 9) {
12936 GemmMicrokernelTester()
12937 .mr(1)
12938 .nr(4)
12939 .kr(2)
12940 .sr(4)
12941 .m(1)
12942 .n(4)
12943 .k(k)
12944 .a_zero_point(0)
12945 .b_zero_point(0)
12946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12947 }
12948 }
12949 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12950
12951
12952 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)12953 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
12954 TEST_REQUIRES_X86_SSE2;
12955 GemmMicrokernelTester()
12956 .mr(2)
12957 .nr(4)
12958 .kr(2)
12959 .sr(4)
12960 .m(2)
12961 .n(4)
12962 .k(8)
12963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12964 }
12965
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)12966 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
12967 TEST_REQUIRES_X86_SSE2;
12968 GemmMicrokernelTester()
12969 .mr(2)
12970 .nr(4)
12971 .kr(2)
12972 .sr(4)
12973 .m(2)
12974 .n(4)
12975 .k(8)
12976 .cn_stride(7)
12977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12978 }
12979
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)12980 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
12981 TEST_REQUIRES_X86_SSE2;
12982 for (uint32_t n = 1; n <= 4; n++) {
12983 for (uint32_t m = 1; m <= 2; m++) {
12984 GemmMicrokernelTester()
12985 .mr(2)
12986 .nr(4)
12987 .kr(2)
12988 .sr(4)
12989 .m(m)
12990 .n(n)
12991 .k(8)
12992 .iterations(1)
12993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12994 }
12995 }
12996 }
12997
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)12998 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
12999 TEST_REQUIRES_X86_SSE2;
13000 for (uint32_t m = 1; m <= 2; m++) {
13001 GemmMicrokernelTester()
13002 .mr(2)
13003 .nr(4)
13004 .kr(2)
13005 .sr(4)
13006 .m(m)
13007 .n(4)
13008 .k(8)
13009 .iterations(1)
13010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13011 }
13012 }
13013
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)13014 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
13015 TEST_REQUIRES_X86_SSE2;
13016 for (uint32_t n = 1; n <= 4; n++) {
13017 GemmMicrokernelTester()
13018 .mr(2)
13019 .nr(4)
13020 .kr(2)
13021 .sr(4)
13022 .m(2)
13023 .n(n)
13024 .k(8)
13025 .iterations(1)
13026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13027 }
13028 }
13029
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)13030 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
13031 TEST_REQUIRES_X86_SSE2;
13032 for (size_t k = 1; k < 8; k++) {
13033 GemmMicrokernelTester()
13034 .mr(2)
13035 .nr(4)
13036 .kr(2)
13037 .sr(4)
13038 .m(2)
13039 .n(4)
13040 .k(k)
13041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13042 }
13043 }
13044
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)13045 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
13046 TEST_REQUIRES_X86_SSE2;
13047 for (size_t k = 1; k < 8; k++) {
13048 for (uint32_t n = 1; n <= 4; n++) {
13049 for (uint32_t m = 1; m <= 2; m++) {
13050 GemmMicrokernelTester()
13051 .mr(2)
13052 .nr(4)
13053 .kr(2)
13054 .sr(4)
13055 .m(m)
13056 .n(n)
13057 .k(k)
13058 .iterations(1)
13059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13060 }
13061 }
13062 }
13063 }
13064
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)13065 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
13066 TEST_REQUIRES_X86_SSE2;
13067 for (size_t k = 9; k < 16; k++) {
13068 GemmMicrokernelTester()
13069 .mr(2)
13070 .nr(4)
13071 .kr(2)
13072 .sr(4)
13073 .m(2)
13074 .n(4)
13075 .k(k)
13076 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13077 }
13078 }
13079
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)13080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
13081 TEST_REQUIRES_X86_SSE2;
13082 for (size_t k = 9; k < 16; k++) {
13083 for (uint32_t n = 1; n <= 4; n++) {
13084 for (uint32_t m = 1; m <= 2; m++) {
13085 GemmMicrokernelTester()
13086 .mr(2)
13087 .nr(4)
13088 .kr(2)
13089 .sr(4)
13090 .m(m)
13091 .n(n)
13092 .k(k)
13093 .iterations(1)
13094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13095 }
13096 }
13097 }
13098 }
13099
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)13100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
13101 TEST_REQUIRES_X86_SSE2;
13102 for (size_t k = 16; k <= 80; k += 8) {
13103 GemmMicrokernelTester()
13104 .mr(2)
13105 .nr(4)
13106 .kr(2)
13107 .sr(4)
13108 .m(2)
13109 .n(4)
13110 .k(k)
13111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13112 }
13113 }
13114
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)13115 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
13116 TEST_REQUIRES_X86_SSE2;
13117 for (size_t k = 16; k <= 80; k += 8) {
13118 for (uint32_t n = 1; n <= 4; n++) {
13119 for (uint32_t m = 1; m <= 2; m++) {
13120 GemmMicrokernelTester()
13121 .mr(2)
13122 .nr(4)
13123 .kr(2)
13124 .sr(4)
13125 .m(m)
13126 .n(n)
13127 .k(k)
13128 .iterations(1)
13129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13130 }
13131 }
13132 }
13133 }
13134
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)13135 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
13136 TEST_REQUIRES_X86_SSE2;
13137 for (uint32_t n = 5; n < 8; n++) {
13138 for (size_t k = 1; k <= 40; k += 9) {
13139 GemmMicrokernelTester()
13140 .mr(2)
13141 .nr(4)
13142 .kr(2)
13143 .sr(4)
13144 .m(2)
13145 .n(n)
13146 .k(k)
13147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13148 }
13149 }
13150 }
13151
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)13152 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
13153 TEST_REQUIRES_X86_SSE2;
13154 for (uint32_t n = 5; n < 8; n++) {
13155 for (size_t k = 1; k <= 40; k += 9) {
13156 GemmMicrokernelTester()
13157 .mr(2)
13158 .nr(4)
13159 .kr(2)
13160 .sr(4)
13161 .m(2)
13162 .n(n)
13163 .k(k)
13164 .cn_stride(7)
13165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13166 }
13167 }
13168 }
13169
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)13170 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
13171 TEST_REQUIRES_X86_SSE2;
13172 for (uint32_t n = 5; n < 8; n++) {
13173 for (size_t k = 1; k <= 40; k += 9) {
13174 for (uint32_t m = 1; m <= 2; m++) {
13175 GemmMicrokernelTester()
13176 .mr(2)
13177 .nr(4)
13178 .kr(2)
13179 .sr(4)
13180 .m(m)
13181 .n(n)
13182 .k(k)
13183 .iterations(1)
13184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13185 }
13186 }
13187 }
13188 }
13189
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)13190 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
13191 TEST_REQUIRES_X86_SSE2;
13192 for (uint32_t n = 8; n <= 12; n += 4) {
13193 for (size_t k = 1; k <= 40; k += 9) {
13194 GemmMicrokernelTester()
13195 .mr(2)
13196 .nr(4)
13197 .kr(2)
13198 .sr(4)
13199 .m(2)
13200 .n(n)
13201 .k(k)
13202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13203 }
13204 }
13205 }
13206
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)13207 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
13208 TEST_REQUIRES_X86_SSE2;
13209 for (uint32_t n = 8; n <= 12; n += 4) {
13210 for (size_t k = 1; k <= 40; k += 9) {
13211 GemmMicrokernelTester()
13212 .mr(2)
13213 .nr(4)
13214 .kr(2)
13215 .sr(4)
13216 .m(2)
13217 .n(n)
13218 .k(k)
13219 .cn_stride(7)
13220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13221 }
13222 }
13223 }
13224
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)13225 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
13226 TEST_REQUIRES_X86_SSE2;
13227 for (uint32_t n = 8; n <= 12; n += 4) {
13228 for (size_t k = 1; k <= 40; k += 9) {
13229 for (uint32_t m = 1; m <= 2; m++) {
13230 GemmMicrokernelTester()
13231 .mr(2)
13232 .nr(4)
13233 .kr(2)
13234 .sr(4)
13235 .m(m)
13236 .n(n)
13237 .k(k)
13238 .iterations(1)
13239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13240 }
13241 }
13242 }
13243 }
13244
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel)13245 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel) {
13246 TEST_REQUIRES_X86_SSE2;
13247 for (size_t k = 1; k <= 40; k += 9) {
13248 GemmMicrokernelTester()
13249 .mr(2)
13250 .nr(4)
13251 .kr(2)
13252 .sr(4)
13253 .m(2)
13254 .n(4)
13255 .k(k)
13256 .ks(3)
13257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13258 }
13259 }
13260
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel_subtile)13261 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel_subtile) {
13262 TEST_REQUIRES_X86_SSE2;
13263 for (size_t k = 1; k <= 40; k += 9) {
13264 for (uint32_t n = 1; n <= 4; n++) {
13265 for (uint32_t m = 1; m <= 2; m++) {
13266 GemmMicrokernelTester()
13267 .mr(2)
13268 .nr(4)
13269 .kr(2)
13270 .sr(4)
13271 .m(m)
13272 .n(n)
13273 .k(k)
13274 .ks(3)
13275 .iterations(1)
13276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13277 }
13278 }
13279 }
13280 }
13281
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_small_kernel)13282 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
13283 TEST_REQUIRES_X86_SSE2;
13284 for (uint32_t n = 5; n < 8; n++) {
13285 for (size_t k = 1; k <= 40; k += 9) {
13286 GemmMicrokernelTester()
13287 .mr(2)
13288 .nr(4)
13289 .kr(2)
13290 .sr(4)
13291 .m(2)
13292 .n(n)
13293 .k(k)
13294 .ks(3)
13295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13296 }
13297 }
13298 }
13299
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_small_kernel)13300 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
13301 TEST_REQUIRES_X86_SSE2;
13302 for (uint32_t n = 8; n <= 12; n += 4) {
13303 for (size_t k = 1; k <= 40; k += 9) {
13304 GemmMicrokernelTester()
13305 .mr(2)
13306 .nr(4)
13307 .kr(2)
13308 .sr(4)
13309 .m(2)
13310 .n(n)
13311 .k(k)
13312 .ks(3)
13313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13314 }
13315 }
13316 }
13317
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)13318 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
13319 TEST_REQUIRES_X86_SSE2;
13320 for (size_t k = 1; k <= 40; k += 9) {
13321 for (uint32_t n = 1; n <= 4; n++) {
13322 for (uint32_t m = 1; m <= 2; m++) {
13323 GemmMicrokernelTester()
13324 .mr(2)
13325 .nr(4)
13326 .kr(2)
13327 .sr(4)
13328 .m(m)
13329 .n(n)
13330 .k(k)
13331 .cm_stride(7)
13332 .iterations(1)
13333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13334 }
13335 }
13336 }
13337 }
13338
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,a_offset)13339 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, a_offset) {
13340 TEST_REQUIRES_X86_SSE2;
13341 for (size_t k = 1; k <= 40; k += 9) {
13342 GemmMicrokernelTester()
13343 .mr(2)
13344 .nr(4)
13345 .kr(2)
13346 .sr(4)
13347 .m(2)
13348 .n(4)
13349 .k(k)
13350 .ks(3)
13351 .a_offset(83)
13352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13353 }
13354 }
13355
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,zero)13356 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, zero) {
13357 TEST_REQUIRES_X86_SSE2;
13358 for (size_t k = 1; k <= 40; k += 9) {
13359 for (uint32_t mz = 0; mz < 2; mz++) {
13360 GemmMicrokernelTester()
13361 .mr(2)
13362 .nr(4)
13363 .kr(2)
13364 .sr(4)
13365 .m(2)
13366 .n(4)
13367 .k(k)
13368 .ks(3)
13369 .a_offset(83)
13370 .zero_index(mz)
13371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13372 }
13373 }
13374 }
13375
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)13376 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
13377 TEST_REQUIRES_X86_SSE2;
13378 GemmMicrokernelTester()
13379 .mr(2)
13380 .nr(4)
13381 .kr(2)
13382 .sr(4)
13383 .m(2)
13384 .n(4)
13385 .k(8)
13386 .qmin(128)
13387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13388 }
13389
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)13390 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
13391 TEST_REQUIRES_X86_SSE2;
13392 GemmMicrokernelTester()
13393 .mr(2)
13394 .nr(4)
13395 .kr(2)
13396 .sr(4)
13397 .m(2)
13398 .n(4)
13399 .k(8)
13400 .qmax(128)
13401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13402 }
13403
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)13404 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
13405 TEST_REQUIRES_X86_SSE2;
13406 GemmMicrokernelTester()
13407 .mr(2)
13408 .nr(4)
13409 .kr(2)
13410 .sr(4)
13411 .m(2)
13412 .n(4)
13413 .k(8)
13414 .cm_stride(7)
13415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13416 }
13417
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_a_zero_point)13418 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_a_zero_point) {
13419 TEST_REQUIRES_X86_SSE2;
13420 for (size_t k = 1; k <= 40; k += 9) {
13421 GemmMicrokernelTester()
13422 .mr(2)
13423 .nr(4)
13424 .kr(2)
13425 .sr(4)
13426 .m(2)
13427 .n(4)
13428 .k(k)
13429 .a_zero_point(0)
13430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13431 }
13432 }
13433
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_b_zero_point)13434 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_b_zero_point) {
13435 TEST_REQUIRES_X86_SSE2;
13436 for (size_t k = 1; k <= 40; k += 9) {
13437 GemmMicrokernelTester()
13438 .mr(2)
13439 .nr(4)
13440 .kr(2)
13441 .sr(4)
13442 .m(2)
13443 .n(4)
13444 .k(k)
13445 .b_zero_point(0)
13446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13447 }
13448 }
13449
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_zero_point)13450 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_zero_point) {
13451 TEST_REQUIRES_X86_SSE2;
13452 for (size_t k = 1; k <= 40; k += 9) {
13453 GemmMicrokernelTester()
13454 .mr(2)
13455 .nr(4)
13456 .kr(2)
13457 .sr(4)
13458 .m(2)
13459 .n(4)
13460 .k(k)
13461 .a_zero_point(0)
13462 .b_zero_point(0)
13463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13464 }
13465 }
13466 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13467
13468
13469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8)13470 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8) {
13471 TEST_REQUIRES_X86_SSE41;
13472 GemmMicrokernelTester()
13473 .mr(4)
13474 .nr(4)
13475 .kr(2)
13476 .sr(4)
13477 .m(4)
13478 .n(4)
13479 .k(8)
13480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13481 }
13482
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cn)13483 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cn) {
13484 TEST_REQUIRES_X86_SSE41;
13485 GemmMicrokernelTester()
13486 .mr(4)
13487 .nr(4)
13488 .kr(2)
13489 .sr(4)
13490 .m(4)
13491 .n(4)
13492 .k(8)
13493 .cn_stride(7)
13494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13495 }
13496
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile)13497 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile) {
13498 TEST_REQUIRES_X86_SSE41;
13499 for (uint32_t n = 1; n <= 4; n++) {
13500 for (uint32_t m = 1; m <= 4; m++) {
13501 GemmMicrokernelTester()
13502 .mr(4)
13503 .nr(4)
13504 .kr(2)
13505 .sr(4)
13506 .m(m)
13507 .n(n)
13508 .k(8)
13509 .iterations(1)
13510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13511 }
13512 }
13513 }
13514
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_m)13515 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
13516 TEST_REQUIRES_X86_SSE41;
13517 for (uint32_t m = 1; m <= 4; m++) {
13518 GemmMicrokernelTester()
13519 .mr(4)
13520 .nr(4)
13521 .kr(2)
13522 .sr(4)
13523 .m(m)
13524 .n(4)
13525 .k(8)
13526 .iterations(1)
13527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13528 }
13529 }
13530
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_n)13531 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
13532 TEST_REQUIRES_X86_SSE41;
13533 for (uint32_t n = 1; n <= 4; n++) {
13534 GemmMicrokernelTester()
13535 .mr(4)
13536 .nr(4)
13537 .kr(2)
13538 .sr(4)
13539 .m(4)
13540 .n(n)
13541 .k(8)
13542 .iterations(1)
13543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13544 }
13545 }
13546
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8)13547 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8) {
13548 TEST_REQUIRES_X86_SSE41;
13549 for (size_t k = 1; k < 8; k++) {
13550 GemmMicrokernelTester()
13551 .mr(4)
13552 .nr(4)
13553 .kr(2)
13554 .sr(4)
13555 .m(4)
13556 .n(4)
13557 .k(k)
13558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13559 }
13560 }
13561
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_subtile)13562 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_subtile) {
13563 TEST_REQUIRES_X86_SSE41;
13564 for (size_t k = 1; k < 8; k++) {
13565 for (uint32_t n = 1; n <= 4; n++) {
13566 for (uint32_t m = 1; m <= 4; m++) {
13567 GemmMicrokernelTester()
13568 .mr(4)
13569 .nr(4)
13570 .kr(2)
13571 .sr(4)
13572 .m(m)
13573 .n(n)
13574 .k(k)
13575 .iterations(1)
13576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13577 }
13578 }
13579 }
13580 }
13581
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8)13582 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8) {
13583 TEST_REQUIRES_X86_SSE41;
13584 for (size_t k = 9; k < 16; k++) {
13585 GemmMicrokernelTester()
13586 .mr(4)
13587 .nr(4)
13588 .kr(2)
13589 .sr(4)
13590 .m(4)
13591 .n(4)
13592 .k(k)
13593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13594 }
13595 }
13596
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_subtile)13597 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_subtile) {
13598 TEST_REQUIRES_X86_SSE41;
13599 for (size_t k = 9; k < 16; k++) {
13600 for (uint32_t n = 1; n <= 4; n++) {
13601 for (uint32_t m = 1; m <= 4; m++) {
13602 GemmMicrokernelTester()
13603 .mr(4)
13604 .nr(4)
13605 .kr(2)
13606 .sr(4)
13607 .m(m)
13608 .n(n)
13609 .k(k)
13610 .iterations(1)
13611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13612 }
13613 }
13614 }
13615 }
13616
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8)13617 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8) {
13618 TEST_REQUIRES_X86_SSE41;
13619 for (size_t k = 16; k <= 80; k += 8) {
13620 GemmMicrokernelTester()
13621 .mr(4)
13622 .nr(4)
13623 .kr(2)
13624 .sr(4)
13625 .m(4)
13626 .n(4)
13627 .k(k)
13628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13629 }
13630 }
13631
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_subtile)13632 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_subtile) {
13633 TEST_REQUIRES_X86_SSE41;
13634 for (size_t k = 16; k <= 80; k += 8) {
13635 for (uint32_t n = 1; n <= 4; n++) {
13636 for (uint32_t m = 1; m <= 4; m++) {
13637 GemmMicrokernelTester()
13638 .mr(4)
13639 .nr(4)
13640 .kr(2)
13641 .sr(4)
13642 .m(m)
13643 .n(n)
13644 .k(k)
13645 .iterations(1)
13646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13647 }
13648 }
13649 }
13650 }
13651
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4)13652 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4) {
13653 TEST_REQUIRES_X86_SSE41;
13654 for (uint32_t n = 5; n < 8; n++) {
13655 for (size_t k = 1; k <= 40; k += 9) {
13656 GemmMicrokernelTester()
13657 .mr(4)
13658 .nr(4)
13659 .kr(2)
13660 .sr(4)
13661 .m(4)
13662 .n(n)
13663 .k(k)
13664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13665 }
13666 }
13667 }
13668
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_cn)13669 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
13670 TEST_REQUIRES_X86_SSE41;
13671 for (uint32_t n = 5; n < 8; n++) {
13672 for (size_t k = 1; k <= 40; k += 9) {
13673 GemmMicrokernelTester()
13674 .mr(4)
13675 .nr(4)
13676 .kr(2)
13677 .sr(4)
13678 .m(4)
13679 .n(n)
13680 .k(k)
13681 .cn_stride(7)
13682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13683 }
13684 }
13685 }
13686
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_subtile)13687 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_subtile) {
13688 TEST_REQUIRES_X86_SSE41;
13689 for (uint32_t n = 5; n < 8; n++) {
13690 for (size_t k = 1; k <= 40; k += 9) {
13691 for (uint32_t m = 1; m <= 4; m++) {
13692 GemmMicrokernelTester()
13693 .mr(4)
13694 .nr(4)
13695 .kr(2)
13696 .sr(4)
13697 .m(m)
13698 .n(n)
13699 .k(k)
13700 .iterations(1)
13701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13702 }
13703 }
13704 }
13705 }
13706
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4)13707 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4) {
13708 TEST_REQUIRES_X86_SSE41;
13709 for (uint32_t n = 8; n <= 12; n += 4) {
13710 for (size_t k = 1; k <= 40; k += 9) {
13711 GemmMicrokernelTester()
13712 .mr(4)
13713 .nr(4)
13714 .kr(2)
13715 .sr(4)
13716 .m(4)
13717 .n(n)
13718 .k(k)
13719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13720 }
13721 }
13722 }
13723
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_cn)13724 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
13725 TEST_REQUIRES_X86_SSE41;
13726 for (uint32_t n = 8; n <= 12; n += 4) {
13727 for (size_t k = 1; k <= 40; k += 9) {
13728 GemmMicrokernelTester()
13729 .mr(4)
13730 .nr(4)
13731 .kr(2)
13732 .sr(4)
13733 .m(4)
13734 .n(n)
13735 .k(k)
13736 .cn_stride(7)
13737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13738 }
13739 }
13740 }
13741
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_subtile)13742 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_subtile) {
13743 TEST_REQUIRES_X86_SSE41;
13744 for (uint32_t n = 8; n <= 12; n += 4) {
13745 for (size_t k = 1; k <= 40; k += 9) {
13746 for (uint32_t m = 1; m <= 4; m++) {
13747 GemmMicrokernelTester()
13748 .mr(4)
13749 .nr(4)
13750 .kr(2)
13751 .sr(4)
13752 .m(m)
13753 .n(n)
13754 .k(k)
13755 .iterations(1)
13756 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13757 }
13758 }
13759 }
13760 }
13761
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel)13762 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel) {
13763 TEST_REQUIRES_X86_SSE41;
13764 for (size_t k = 1; k <= 40; k += 9) {
13765 GemmMicrokernelTester()
13766 .mr(4)
13767 .nr(4)
13768 .kr(2)
13769 .sr(4)
13770 .m(4)
13771 .n(4)
13772 .k(k)
13773 .ks(3)
13774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13775 }
13776 }
13777
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel_subtile)13778 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel_subtile) {
13779 TEST_REQUIRES_X86_SSE41;
13780 for (size_t k = 1; k <= 40; k += 9) {
13781 for (uint32_t n = 1; n <= 4; n++) {
13782 for (uint32_t m = 1; m <= 4; m++) {
13783 GemmMicrokernelTester()
13784 .mr(4)
13785 .nr(4)
13786 .kr(2)
13787 .sr(4)
13788 .m(m)
13789 .n(n)
13790 .k(k)
13791 .ks(3)
13792 .iterations(1)
13793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13794 }
13795 }
13796 }
13797 }
13798
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_small_kernel)13799 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
13800 TEST_REQUIRES_X86_SSE41;
13801 for (uint32_t n = 5; n < 8; n++) {
13802 for (size_t k = 1; k <= 40; k += 9) {
13803 GemmMicrokernelTester()
13804 .mr(4)
13805 .nr(4)
13806 .kr(2)
13807 .sr(4)
13808 .m(4)
13809 .n(n)
13810 .k(k)
13811 .ks(3)
13812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_small_kernel)13817 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
13818 TEST_REQUIRES_X86_SSE41;
13819 for (uint32_t n = 8; n <= 12; n += 4) {
13820 for (size_t k = 1; k <= 40; k += 9) {
13821 GemmMicrokernelTester()
13822 .mr(4)
13823 .nr(4)
13824 .kr(2)
13825 .sr(4)
13826 .m(4)
13827 .n(n)
13828 .k(k)
13829 .ks(3)
13830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13831 }
13832 }
13833 }
13834
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm_subtile)13835 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm_subtile) {
13836 TEST_REQUIRES_X86_SSE41;
13837 for (size_t k = 1; k <= 40; k += 9) {
13838 for (uint32_t n = 1; n <= 4; n++) {
13839 for (uint32_t m = 1; m <= 4; m++) {
13840 GemmMicrokernelTester()
13841 .mr(4)
13842 .nr(4)
13843 .kr(2)
13844 .sr(4)
13845 .m(m)
13846 .n(n)
13847 .k(k)
13848 .cm_stride(7)
13849 .iterations(1)
13850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13851 }
13852 }
13853 }
13854 }
13855
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,a_offset)13856 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, a_offset) {
13857 TEST_REQUIRES_X86_SSE41;
13858 for (size_t k = 1; k <= 40; k += 9) {
13859 GemmMicrokernelTester()
13860 .mr(4)
13861 .nr(4)
13862 .kr(2)
13863 .sr(4)
13864 .m(4)
13865 .n(4)
13866 .k(k)
13867 .ks(3)
13868 .a_offset(163)
13869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13870 }
13871 }
13872
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,zero)13873 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, zero) {
13874 TEST_REQUIRES_X86_SSE41;
13875 for (size_t k = 1; k <= 40; k += 9) {
13876 for (uint32_t mz = 0; mz < 4; mz++) {
13877 GemmMicrokernelTester()
13878 .mr(4)
13879 .nr(4)
13880 .kr(2)
13881 .sr(4)
13882 .m(4)
13883 .n(4)
13884 .k(k)
13885 .ks(3)
13886 .a_offset(163)
13887 .zero_index(mz)
13888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13889 }
13890 }
13891 }
13892
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmin)13893 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmin) {
13894 TEST_REQUIRES_X86_SSE41;
13895 GemmMicrokernelTester()
13896 .mr(4)
13897 .nr(4)
13898 .kr(2)
13899 .sr(4)
13900 .m(4)
13901 .n(4)
13902 .k(8)
13903 .qmin(128)
13904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13905 }
13906
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmax)13907 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmax) {
13908 TEST_REQUIRES_X86_SSE41;
13909 GemmMicrokernelTester()
13910 .mr(4)
13911 .nr(4)
13912 .kr(2)
13913 .sr(4)
13914 .m(4)
13915 .n(4)
13916 .k(8)
13917 .qmax(128)
13918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13919 }
13920
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm)13921 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm) {
13922 TEST_REQUIRES_X86_SSE41;
13923 GemmMicrokernelTester()
13924 .mr(4)
13925 .nr(4)
13926 .kr(2)
13927 .sr(4)
13928 .m(4)
13929 .n(4)
13930 .k(8)
13931 .cm_stride(7)
13932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13933 }
13934
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_a_zero_point)13935 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_a_zero_point) {
13936 TEST_REQUIRES_X86_SSE41;
13937 for (size_t k = 1; k <= 40; k += 9) {
13938 GemmMicrokernelTester()
13939 .mr(4)
13940 .nr(4)
13941 .kr(2)
13942 .sr(4)
13943 .m(4)
13944 .n(4)
13945 .k(k)
13946 .a_zero_point(0)
13947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13948 }
13949 }
13950
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_b_zero_point)13951 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_b_zero_point) {
13952 TEST_REQUIRES_X86_SSE41;
13953 for (size_t k = 1; k <= 40; k += 9) {
13954 GemmMicrokernelTester()
13955 .mr(4)
13956 .nr(4)
13957 .kr(2)
13958 .sr(4)
13959 .m(4)
13960 .n(4)
13961 .k(k)
13962 .b_zero_point(0)
13963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13964 }
13965 }
13966
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_zero_point)13967 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_zero_point) {
13968 TEST_REQUIRES_X86_SSE41;
13969 for (size_t k = 1; k <= 40; k += 9) {
13970 GemmMicrokernelTester()
13971 .mr(4)
13972 .nr(4)
13973 .kr(2)
13974 .sr(4)
13975 .m(4)
13976 .n(4)
13977 .k(k)
13978 .a_zero_point(0)
13979 .b_zero_point(0)
13980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13981 }
13982 }
13983 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13984
13985
13986 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)13987 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
13988 TEST_REQUIRES_X86_AVX;
13989 GemmMicrokernelTester()
13990 .mr(3)
13991 .nr(4)
13992 .kr(2)
13993 .sr(4)
13994 .m(3)
13995 .n(4)
13996 .k(8)
13997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13998 }
13999
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)14000 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
14001 TEST_REQUIRES_X86_AVX;
14002 GemmMicrokernelTester()
14003 .mr(3)
14004 .nr(4)
14005 .kr(2)
14006 .sr(4)
14007 .m(3)
14008 .n(4)
14009 .k(8)
14010 .cn_stride(7)
14011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14012 }
14013
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)14014 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
14015 TEST_REQUIRES_X86_AVX;
14016 for (uint32_t n = 1; n <= 4; n++) {
14017 for (uint32_t m = 1; m <= 3; m++) {
14018 GemmMicrokernelTester()
14019 .mr(3)
14020 .nr(4)
14021 .kr(2)
14022 .sr(4)
14023 .m(m)
14024 .n(n)
14025 .k(8)
14026 .iterations(1)
14027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14028 }
14029 }
14030 }
14031
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)14032 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
14033 TEST_REQUIRES_X86_AVX;
14034 for (uint32_t m = 1; m <= 3; m++) {
14035 GemmMicrokernelTester()
14036 .mr(3)
14037 .nr(4)
14038 .kr(2)
14039 .sr(4)
14040 .m(m)
14041 .n(4)
14042 .k(8)
14043 .iterations(1)
14044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14045 }
14046 }
14047
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)14048 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
14049 TEST_REQUIRES_X86_AVX;
14050 for (uint32_t n = 1; n <= 4; n++) {
14051 GemmMicrokernelTester()
14052 .mr(3)
14053 .nr(4)
14054 .kr(2)
14055 .sr(4)
14056 .m(3)
14057 .n(n)
14058 .k(8)
14059 .iterations(1)
14060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14061 }
14062 }
14063
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)14064 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
14065 TEST_REQUIRES_X86_AVX;
14066 for (size_t k = 1; k < 8; k++) {
14067 GemmMicrokernelTester()
14068 .mr(3)
14069 .nr(4)
14070 .kr(2)
14071 .sr(4)
14072 .m(3)
14073 .n(4)
14074 .k(k)
14075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14076 }
14077 }
14078
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)14079 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
14080 TEST_REQUIRES_X86_AVX;
14081 for (size_t k = 1; k < 8; k++) {
14082 for (uint32_t n = 1; n <= 4; n++) {
14083 for (uint32_t m = 1; m <= 3; m++) {
14084 GemmMicrokernelTester()
14085 .mr(3)
14086 .nr(4)
14087 .kr(2)
14088 .sr(4)
14089 .m(m)
14090 .n(n)
14091 .k(k)
14092 .iterations(1)
14093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14094 }
14095 }
14096 }
14097 }
14098
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)14099 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
14100 TEST_REQUIRES_X86_AVX;
14101 for (size_t k = 9; k < 16; k++) {
14102 GemmMicrokernelTester()
14103 .mr(3)
14104 .nr(4)
14105 .kr(2)
14106 .sr(4)
14107 .m(3)
14108 .n(4)
14109 .k(k)
14110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14111 }
14112 }
14113
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)14114 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
14115 TEST_REQUIRES_X86_AVX;
14116 for (size_t k = 9; k < 16; k++) {
14117 for (uint32_t n = 1; n <= 4; n++) {
14118 for (uint32_t m = 1; m <= 3; m++) {
14119 GemmMicrokernelTester()
14120 .mr(3)
14121 .nr(4)
14122 .kr(2)
14123 .sr(4)
14124 .m(m)
14125 .n(n)
14126 .k(k)
14127 .iterations(1)
14128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14129 }
14130 }
14131 }
14132 }
14133
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)14134 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
14135 TEST_REQUIRES_X86_AVX;
14136 for (size_t k = 16; k <= 80; k += 8) {
14137 GemmMicrokernelTester()
14138 .mr(3)
14139 .nr(4)
14140 .kr(2)
14141 .sr(4)
14142 .m(3)
14143 .n(4)
14144 .k(k)
14145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14146 }
14147 }
14148
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)14149 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
14150 TEST_REQUIRES_X86_AVX;
14151 for (size_t k = 16; k <= 80; k += 8) {
14152 for (uint32_t n = 1; n <= 4; n++) {
14153 for (uint32_t m = 1; m <= 3; m++) {
14154 GemmMicrokernelTester()
14155 .mr(3)
14156 .nr(4)
14157 .kr(2)
14158 .sr(4)
14159 .m(m)
14160 .n(n)
14161 .k(k)
14162 .iterations(1)
14163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14164 }
14165 }
14166 }
14167 }
14168
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)14169 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
14170 TEST_REQUIRES_X86_AVX;
14171 for (uint32_t n = 5; n < 8; n++) {
14172 for (size_t k = 1; k <= 40; k += 9) {
14173 GemmMicrokernelTester()
14174 .mr(3)
14175 .nr(4)
14176 .kr(2)
14177 .sr(4)
14178 .m(3)
14179 .n(n)
14180 .k(k)
14181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14182 }
14183 }
14184 }
14185
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)14186 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
14187 TEST_REQUIRES_X86_AVX;
14188 for (uint32_t n = 5; n < 8; n++) {
14189 for (size_t k = 1; k <= 40; k += 9) {
14190 GemmMicrokernelTester()
14191 .mr(3)
14192 .nr(4)
14193 .kr(2)
14194 .sr(4)
14195 .m(3)
14196 .n(n)
14197 .k(k)
14198 .cn_stride(7)
14199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14200 }
14201 }
14202 }
14203
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)14204 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
14205 TEST_REQUIRES_X86_AVX;
14206 for (uint32_t n = 5; n < 8; n++) {
14207 for (size_t k = 1; k <= 40; k += 9) {
14208 for (uint32_t m = 1; m <= 3; m++) {
14209 GemmMicrokernelTester()
14210 .mr(3)
14211 .nr(4)
14212 .kr(2)
14213 .sr(4)
14214 .m(m)
14215 .n(n)
14216 .k(k)
14217 .iterations(1)
14218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14219 }
14220 }
14221 }
14222 }
14223
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)14224 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
14225 TEST_REQUIRES_X86_AVX;
14226 for (uint32_t n = 8; n <= 12; n += 4) {
14227 for (size_t k = 1; k <= 40; k += 9) {
14228 GemmMicrokernelTester()
14229 .mr(3)
14230 .nr(4)
14231 .kr(2)
14232 .sr(4)
14233 .m(3)
14234 .n(n)
14235 .k(k)
14236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14237 }
14238 }
14239 }
14240
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)14241 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
14242 TEST_REQUIRES_X86_AVX;
14243 for (uint32_t n = 8; n <= 12; n += 4) {
14244 for (size_t k = 1; k <= 40; k += 9) {
14245 GemmMicrokernelTester()
14246 .mr(3)
14247 .nr(4)
14248 .kr(2)
14249 .sr(4)
14250 .m(3)
14251 .n(n)
14252 .k(k)
14253 .cn_stride(7)
14254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14255 }
14256 }
14257 }
14258
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)14259 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
14260 TEST_REQUIRES_X86_AVX;
14261 for (uint32_t n = 8; n <= 12; n += 4) {
14262 for (size_t k = 1; k <= 40; k += 9) {
14263 for (uint32_t m = 1; m <= 3; m++) {
14264 GemmMicrokernelTester()
14265 .mr(3)
14266 .nr(4)
14267 .kr(2)
14268 .sr(4)
14269 .m(m)
14270 .n(n)
14271 .k(k)
14272 .iterations(1)
14273 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14274 }
14275 }
14276 }
14277 }
14278
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel)14279 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel) {
14280 TEST_REQUIRES_X86_AVX;
14281 for (size_t k = 1; k <= 40; k += 9) {
14282 GemmMicrokernelTester()
14283 .mr(3)
14284 .nr(4)
14285 .kr(2)
14286 .sr(4)
14287 .m(3)
14288 .n(4)
14289 .k(k)
14290 .ks(3)
14291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14292 }
14293 }
14294
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel_subtile)14295 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel_subtile) {
14296 TEST_REQUIRES_X86_AVX;
14297 for (size_t k = 1; k <= 40; k += 9) {
14298 for (uint32_t n = 1; n <= 4; n++) {
14299 for (uint32_t m = 1; m <= 3; m++) {
14300 GemmMicrokernelTester()
14301 .mr(3)
14302 .nr(4)
14303 .kr(2)
14304 .sr(4)
14305 .m(m)
14306 .n(n)
14307 .k(k)
14308 .ks(3)
14309 .iterations(1)
14310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14311 }
14312 }
14313 }
14314 }
14315
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_small_kernel)14316 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
14317 TEST_REQUIRES_X86_AVX;
14318 for (uint32_t n = 5; n < 8; n++) {
14319 for (size_t k = 1; k <= 40; k += 9) {
14320 GemmMicrokernelTester()
14321 .mr(3)
14322 .nr(4)
14323 .kr(2)
14324 .sr(4)
14325 .m(3)
14326 .n(n)
14327 .k(k)
14328 .ks(3)
14329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14330 }
14331 }
14332 }
14333
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_small_kernel)14334 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_small_kernel) {
14335 TEST_REQUIRES_X86_AVX;
14336 for (uint32_t n = 8; n <= 12; n += 4) {
14337 for (size_t k = 1; k <= 40; k += 9) {
14338 GemmMicrokernelTester()
14339 .mr(3)
14340 .nr(4)
14341 .kr(2)
14342 .sr(4)
14343 .m(3)
14344 .n(n)
14345 .k(k)
14346 .ks(3)
14347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14348 }
14349 }
14350 }
14351
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)14352 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
14353 TEST_REQUIRES_X86_AVX;
14354 for (size_t k = 1; k <= 40; k += 9) {
14355 for (uint32_t n = 1; n <= 4; n++) {
14356 for (uint32_t m = 1; m <= 3; m++) {
14357 GemmMicrokernelTester()
14358 .mr(3)
14359 .nr(4)
14360 .kr(2)
14361 .sr(4)
14362 .m(m)
14363 .n(n)
14364 .k(k)
14365 .cm_stride(7)
14366 .iterations(1)
14367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14368 }
14369 }
14370 }
14371 }
14372
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,a_offset)14373 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, a_offset) {
14374 TEST_REQUIRES_X86_AVX;
14375 for (size_t k = 1; k <= 40; k += 9) {
14376 GemmMicrokernelTester()
14377 .mr(3)
14378 .nr(4)
14379 .kr(2)
14380 .sr(4)
14381 .m(3)
14382 .n(4)
14383 .k(k)
14384 .ks(3)
14385 .a_offset(127)
14386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14387 }
14388 }
14389
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,zero)14390 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, zero) {
14391 TEST_REQUIRES_X86_AVX;
14392 for (size_t k = 1; k <= 40; k += 9) {
14393 for (uint32_t mz = 0; mz < 3; mz++) {
14394 GemmMicrokernelTester()
14395 .mr(3)
14396 .nr(4)
14397 .kr(2)
14398 .sr(4)
14399 .m(3)
14400 .n(4)
14401 .k(k)
14402 .ks(3)
14403 .a_offset(127)
14404 .zero_index(mz)
14405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14406 }
14407 }
14408 }
14409
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)14410 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
14411 TEST_REQUIRES_X86_AVX;
14412 GemmMicrokernelTester()
14413 .mr(3)
14414 .nr(4)
14415 .kr(2)
14416 .sr(4)
14417 .m(3)
14418 .n(4)
14419 .k(8)
14420 .qmin(128)
14421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14422 }
14423
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)14424 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
14425 TEST_REQUIRES_X86_AVX;
14426 GemmMicrokernelTester()
14427 .mr(3)
14428 .nr(4)
14429 .kr(2)
14430 .sr(4)
14431 .m(3)
14432 .n(4)
14433 .k(8)
14434 .qmax(128)
14435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14436 }
14437
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)14438 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
14439 TEST_REQUIRES_X86_AVX;
14440 GemmMicrokernelTester()
14441 .mr(3)
14442 .nr(4)
14443 .kr(2)
14444 .sr(4)
14445 .m(3)
14446 .n(4)
14447 .k(8)
14448 .cm_stride(7)
14449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14450 }
14451
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_a_zero_point)14452 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_a_zero_point) {
14453 TEST_REQUIRES_X86_AVX;
14454 for (size_t k = 1; k <= 40; k += 9) {
14455 GemmMicrokernelTester()
14456 .mr(3)
14457 .nr(4)
14458 .kr(2)
14459 .sr(4)
14460 .m(3)
14461 .n(4)
14462 .k(k)
14463 .a_zero_point(0)
14464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14465 }
14466 }
14467
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_b_zero_point)14468 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_b_zero_point) {
14469 TEST_REQUIRES_X86_AVX;
14470 for (size_t k = 1; k <= 40; k += 9) {
14471 GemmMicrokernelTester()
14472 .mr(3)
14473 .nr(4)
14474 .kr(2)
14475 .sr(4)
14476 .m(3)
14477 .n(4)
14478 .k(k)
14479 .b_zero_point(0)
14480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14481 }
14482 }
14483
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_zero_point)14484 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_zero_point) {
14485 TEST_REQUIRES_X86_AVX;
14486 for (size_t k = 1; k <= 40; k += 9) {
14487 GemmMicrokernelTester()
14488 .mr(3)
14489 .nr(4)
14490 .kr(2)
14491 .sr(4)
14492 .m(3)
14493 .n(4)
14494 .k(k)
14495 .a_zero_point(0)
14496 .b_zero_point(0)
14497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14498 }
14499 }
14500 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14501
14502
14503 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)14504 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
14505 TEST_REQUIRES_X86_XOP;
14506 GemmMicrokernelTester()
14507 .mr(3)
14508 .nr(4)
14509 .kr(2)
14510 .sr(4)
14511 .m(3)
14512 .n(4)
14513 .k(8)
14514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14515 }
14516
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)14517 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
14518 TEST_REQUIRES_X86_XOP;
14519 GemmMicrokernelTester()
14520 .mr(3)
14521 .nr(4)
14522 .kr(2)
14523 .sr(4)
14524 .m(3)
14525 .n(4)
14526 .k(8)
14527 .cn_stride(7)
14528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14529 }
14530
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)14531 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
14532 TEST_REQUIRES_X86_XOP;
14533 for (uint32_t n = 1; n <= 4; n++) {
14534 for (uint32_t m = 1; m <= 3; m++) {
14535 GemmMicrokernelTester()
14536 .mr(3)
14537 .nr(4)
14538 .kr(2)
14539 .sr(4)
14540 .m(m)
14541 .n(n)
14542 .k(8)
14543 .iterations(1)
14544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14545 }
14546 }
14547 }
14548
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)14549 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
14550 TEST_REQUIRES_X86_XOP;
14551 for (uint32_t m = 1; m <= 3; m++) {
14552 GemmMicrokernelTester()
14553 .mr(3)
14554 .nr(4)
14555 .kr(2)
14556 .sr(4)
14557 .m(m)
14558 .n(4)
14559 .k(8)
14560 .iterations(1)
14561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14562 }
14563 }
14564
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)14565 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
14566 TEST_REQUIRES_X86_XOP;
14567 for (uint32_t n = 1; n <= 4; n++) {
14568 GemmMicrokernelTester()
14569 .mr(3)
14570 .nr(4)
14571 .kr(2)
14572 .sr(4)
14573 .m(3)
14574 .n(n)
14575 .k(8)
14576 .iterations(1)
14577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14578 }
14579 }
14580
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)14581 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
14582 TEST_REQUIRES_X86_XOP;
14583 for (size_t k = 1; k < 8; k++) {
14584 GemmMicrokernelTester()
14585 .mr(3)
14586 .nr(4)
14587 .kr(2)
14588 .sr(4)
14589 .m(3)
14590 .n(4)
14591 .k(k)
14592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14593 }
14594 }
14595
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)14596 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
14597 TEST_REQUIRES_X86_XOP;
14598 for (size_t k = 1; k < 8; k++) {
14599 for (uint32_t n = 1; n <= 4; n++) {
14600 for (uint32_t m = 1; m <= 3; m++) {
14601 GemmMicrokernelTester()
14602 .mr(3)
14603 .nr(4)
14604 .kr(2)
14605 .sr(4)
14606 .m(m)
14607 .n(n)
14608 .k(k)
14609 .iterations(1)
14610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14611 }
14612 }
14613 }
14614 }
14615
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)14616 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
14617 TEST_REQUIRES_X86_XOP;
14618 for (size_t k = 9; k < 16; k++) {
14619 GemmMicrokernelTester()
14620 .mr(3)
14621 .nr(4)
14622 .kr(2)
14623 .sr(4)
14624 .m(3)
14625 .n(4)
14626 .k(k)
14627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14628 }
14629 }
14630
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)14631 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
14632 TEST_REQUIRES_X86_XOP;
14633 for (size_t k = 9; k < 16; k++) {
14634 for (uint32_t n = 1; n <= 4; n++) {
14635 for (uint32_t m = 1; m <= 3; m++) {
14636 GemmMicrokernelTester()
14637 .mr(3)
14638 .nr(4)
14639 .kr(2)
14640 .sr(4)
14641 .m(m)
14642 .n(n)
14643 .k(k)
14644 .iterations(1)
14645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14646 }
14647 }
14648 }
14649 }
14650
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)14651 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
14652 TEST_REQUIRES_X86_XOP;
14653 for (size_t k = 16; k <= 80; k += 8) {
14654 GemmMicrokernelTester()
14655 .mr(3)
14656 .nr(4)
14657 .kr(2)
14658 .sr(4)
14659 .m(3)
14660 .n(4)
14661 .k(k)
14662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14663 }
14664 }
14665
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)14666 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
14667 TEST_REQUIRES_X86_XOP;
14668 for (size_t k = 16; k <= 80; k += 8) {
14669 for (uint32_t n = 1; n <= 4; n++) {
14670 for (uint32_t m = 1; m <= 3; m++) {
14671 GemmMicrokernelTester()
14672 .mr(3)
14673 .nr(4)
14674 .kr(2)
14675 .sr(4)
14676 .m(m)
14677 .n(n)
14678 .k(k)
14679 .iterations(1)
14680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14681 }
14682 }
14683 }
14684 }
14685
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)14686 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
14687 TEST_REQUIRES_X86_XOP;
14688 for (uint32_t n = 5; n < 8; n++) {
14689 for (size_t k = 1; k <= 40; k += 9) {
14690 GemmMicrokernelTester()
14691 .mr(3)
14692 .nr(4)
14693 .kr(2)
14694 .sr(4)
14695 .m(3)
14696 .n(n)
14697 .k(k)
14698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14699 }
14700 }
14701 }
14702
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)14703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
14704 TEST_REQUIRES_X86_XOP;
14705 for (uint32_t n = 5; n < 8; n++) {
14706 for (size_t k = 1; k <= 40; k += 9) {
14707 GemmMicrokernelTester()
14708 .mr(3)
14709 .nr(4)
14710 .kr(2)
14711 .sr(4)
14712 .m(3)
14713 .n(n)
14714 .k(k)
14715 .cn_stride(7)
14716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14717 }
14718 }
14719 }
14720
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)14721 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
14722 TEST_REQUIRES_X86_XOP;
14723 for (uint32_t n = 5; n < 8; n++) {
14724 for (size_t k = 1; k <= 40; k += 9) {
14725 for (uint32_t m = 1; m <= 3; m++) {
14726 GemmMicrokernelTester()
14727 .mr(3)
14728 .nr(4)
14729 .kr(2)
14730 .sr(4)
14731 .m(m)
14732 .n(n)
14733 .k(k)
14734 .iterations(1)
14735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14736 }
14737 }
14738 }
14739 }
14740
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)14741 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
14742 TEST_REQUIRES_X86_XOP;
14743 for (uint32_t n = 8; n <= 12; n += 4) {
14744 for (size_t k = 1; k <= 40; k += 9) {
14745 GemmMicrokernelTester()
14746 .mr(3)
14747 .nr(4)
14748 .kr(2)
14749 .sr(4)
14750 .m(3)
14751 .n(n)
14752 .k(k)
14753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14754 }
14755 }
14756 }
14757
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)14758 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
14759 TEST_REQUIRES_X86_XOP;
14760 for (uint32_t n = 8; n <= 12; n += 4) {
14761 for (size_t k = 1; k <= 40; k += 9) {
14762 GemmMicrokernelTester()
14763 .mr(3)
14764 .nr(4)
14765 .kr(2)
14766 .sr(4)
14767 .m(3)
14768 .n(n)
14769 .k(k)
14770 .cn_stride(7)
14771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14772 }
14773 }
14774 }
14775
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)14776 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
14777 TEST_REQUIRES_X86_XOP;
14778 for (uint32_t n = 8; n <= 12; n += 4) {
14779 for (size_t k = 1; k <= 40; k += 9) {
14780 for (uint32_t m = 1; m <= 3; m++) {
14781 GemmMicrokernelTester()
14782 .mr(3)
14783 .nr(4)
14784 .kr(2)
14785 .sr(4)
14786 .m(m)
14787 .n(n)
14788 .k(k)
14789 .iterations(1)
14790 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14791 }
14792 }
14793 }
14794 }
14795
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)14796 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
14797 TEST_REQUIRES_X86_XOP;
14798 for (size_t k = 1; k <= 40; k += 9) {
14799 GemmMicrokernelTester()
14800 .mr(3)
14801 .nr(4)
14802 .kr(2)
14803 .sr(4)
14804 .m(3)
14805 .n(4)
14806 .k(k)
14807 .ks(3)
14808 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14809 }
14810 }
14811
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)14812 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
14813 TEST_REQUIRES_X86_XOP;
14814 for (size_t k = 1; k <= 40; k += 9) {
14815 for (uint32_t n = 1; n <= 4; n++) {
14816 for (uint32_t m = 1; m <= 3; m++) {
14817 GemmMicrokernelTester()
14818 .mr(3)
14819 .nr(4)
14820 .kr(2)
14821 .sr(4)
14822 .m(m)
14823 .n(n)
14824 .k(k)
14825 .ks(3)
14826 .iterations(1)
14827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14828 }
14829 }
14830 }
14831 }
14832
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)14833 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
14834 TEST_REQUIRES_X86_XOP;
14835 for (uint32_t n = 5; n < 8; n++) {
14836 for (size_t k = 1; k <= 40; k += 9) {
14837 GemmMicrokernelTester()
14838 .mr(3)
14839 .nr(4)
14840 .kr(2)
14841 .sr(4)
14842 .m(3)
14843 .n(n)
14844 .k(k)
14845 .ks(3)
14846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14847 }
14848 }
14849 }
14850
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)14851 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
14852 TEST_REQUIRES_X86_XOP;
14853 for (uint32_t n = 8; n <= 12; n += 4) {
14854 for (size_t k = 1; k <= 40; k += 9) {
14855 GemmMicrokernelTester()
14856 .mr(3)
14857 .nr(4)
14858 .kr(2)
14859 .sr(4)
14860 .m(3)
14861 .n(n)
14862 .k(k)
14863 .ks(3)
14864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14865 }
14866 }
14867 }
14868
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)14869 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
14870 TEST_REQUIRES_X86_XOP;
14871 for (size_t k = 1; k <= 40; k += 9) {
14872 for (uint32_t n = 1; n <= 4; n++) {
14873 for (uint32_t m = 1; m <= 3; m++) {
14874 GemmMicrokernelTester()
14875 .mr(3)
14876 .nr(4)
14877 .kr(2)
14878 .sr(4)
14879 .m(m)
14880 .n(n)
14881 .k(k)
14882 .cm_stride(7)
14883 .iterations(1)
14884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14885 }
14886 }
14887 }
14888 }
14889
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)14890 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
14891 TEST_REQUIRES_X86_XOP;
14892 for (size_t k = 1; k <= 40; k += 9) {
14893 GemmMicrokernelTester()
14894 .mr(3)
14895 .nr(4)
14896 .kr(2)
14897 .sr(4)
14898 .m(3)
14899 .n(4)
14900 .k(k)
14901 .ks(3)
14902 .a_offset(127)
14903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14904 }
14905 }
14906
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)14907 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
14908 TEST_REQUIRES_X86_XOP;
14909 for (size_t k = 1; k <= 40; k += 9) {
14910 for (uint32_t mz = 0; mz < 3; mz++) {
14911 GemmMicrokernelTester()
14912 .mr(3)
14913 .nr(4)
14914 .kr(2)
14915 .sr(4)
14916 .m(3)
14917 .n(4)
14918 .k(k)
14919 .ks(3)
14920 .a_offset(127)
14921 .zero_index(mz)
14922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14923 }
14924 }
14925 }
14926
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)14927 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
14928 TEST_REQUIRES_X86_XOP;
14929 GemmMicrokernelTester()
14930 .mr(3)
14931 .nr(4)
14932 .kr(2)
14933 .sr(4)
14934 .m(3)
14935 .n(4)
14936 .k(8)
14937 .qmin(128)
14938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14939 }
14940
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)14941 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
14942 TEST_REQUIRES_X86_XOP;
14943 GemmMicrokernelTester()
14944 .mr(3)
14945 .nr(4)
14946 .kr(2)
14947 .sr(4)
14948 .m(3)
14949 .n(4)
14950 .k(8)
14951 .qmax(128)
14952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14953 }
14954
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)14955 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
14956 TEST_REQUIRES_X86_XOP;
14957 GemmMicrokernelTester()
14958 .mr(3)
14959 .nr(4)
14960 .kr(2)
14961 .sr(4)
14962 .m(3)
14963 .n(4)
14964 .k(8)
14965 .cm_stride(7)
14966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14967 }
14968
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_a_zero_point)14969 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_a_zero_point) {
14970 TEST_REQUIRES_X86_XOP;
14971 for (size_t k = 1; k <= 40; k += 9) {
14972 GemmMicrokernelTester()
14973 .mr(3)
14974 .nr(4)
14975 .kr(2)
14976 .sr(4)
14977 .m(3)
14978 .n(4)
14979 .k(k)
14980 .a_zero_point(0)
14981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14982 }
14983 }
14984
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_b_zero_point)14985 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_b_zero_point) {
14986 TEST_REQUIRES_X86_XOP;
14987 for (size_t k = 1; k <= 40; k += 9) {
14988 GemmMicrokernelTester()
14989 .mr(3)
14990 .nr(4)
14991 .kr(2)
14992 .sr(4)
14993 .m(3)
14994 .n(4)
14995 .k(k)
14996 .b_zero_point(0)
14997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14998 }
14999 }
15000
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_zero_point)15001 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_zero_point) {
15002 TEST_REQUIRES_X86_XOP;
15003 for (size_t k = 1; k <= 40; k += 9) {
15004 GemmMicrokernelTester()
15005 .mr(3)
15006 .nr(4)
15007 .kr(2)
15008 .sr(4)
15009 .m(3)
15010 .n(4)
15011 .k(k)
15012 .a_zero_point(0)
15013 .b_zero_point(0)
15014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15015 }
15016 }
15017 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15018
15019
15020 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8)15021 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8) {
15022 TEST_REQUIRES_X86_SSE41;
15023 GemmMicrokernelTester()
15024 .mr(1)
15025 .nr(4)
15026 .kr(2)
15027 .sr(4)
15028 .m(1)
15029 .n(4)
15030 .k(8)
15031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15032 }
15033
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cn)15034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cn) {
15035 TEST_REQUIRES_X86_SSE41;
15036 GemmMicrokernelTester()
15037 .mr(1)
15038 .nr(4)
15039 .kr(2)
15040 .sr(4)
15041 .m(1)
15042 .n(4)
15043 .k(8)
15044 .cn_stride(7)
15045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15046 }
15047
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile)15048 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile) {
15049 TEST_REQUIRES_X86_SSE41;
15050 for (uint32_t n = 1; n <= 4; n++) {
15051 for (uint32_t m = 1; m <= 1; m++) {
15052 GemmMicrokernelTester()
15053 .mr(1)
15054 .nr(4)
15055 .kr(2)
15056 .sr(4)
15057 .m(m)
15058 .n(n)
15059 .k(8)
15060 .iterations(1)
15061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15062 }
15063 }
15064 }
15065
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_m)15066 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
15067 TEST_REQUIRES_X86_SSE41;
15068 for (uint32_t m = 1; m <= 1; m++) {
15069 GemmMicrokernelTester()
15070 .mr(1)
15071 .nr(4)
15072 .kr(2)
15073 .sr(4)
15074 .m(m)
15075 .n(4)
15076 .k(8)
15077 .iterations(1)
15078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15079 }
15080 }
15081
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_n)15082 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
15083 TEST_REQUIRES_X86_SSE41;
15084 for (uint32_t n = 1; n <= 4; n++) {
15085 GemmMicrokernelTester()
15086 .mr(1)
15087 .nr(4)
15088 .kr(2)
15089 .sr(4)
15090 .m(1)
15091 .n(n)
15092 .k(8)
15093 .iterations(1)
15094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15095 }
15096 }
15097
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8)15098 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8) {
15099 TEST_REQUIRES_X86_SSE41;
15100 for (size_t k = 1; k < 8; k++) {
15101 GemmMicrokernelTester()
15102 .mr(1)
15103 .nr(4)
15104 .kr(2)
15105 .sr(4)
15106 .m(1)
15107 .n(4)
15108 .k(k)
15109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15110 }
15111 }
15112
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_subtile)15113 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_subtile) {
15114 TEST_REQUIRES_X86_SSE41;
15115 for (size_t k = 1; k < 8; k++) {
15116 for (uint32_t n = 1; n <= 4; n++) {
15117 for (uint32_t m = 1; m <= 1; m++) {
15118 GemmMicrokernelTester()
15119 .mr(1)
15120 .nr(4)
15121 .kr(2)
15122 .sr(4)
15123 .m(m)
15124 .n(n)
15125 .k(k)
15126 .iterations(1)
15127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15128 }
15129 }
15130 }
15131 }
15132
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8)15133 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8) {
15134 TEST_REQUIRES_X86_SSE41;
15135 for (size_t k = 9; k < 16; k++) {
15136 GemmMicrokernelTester()
15137 .mr(1)
15138 .nr(4)
15139 .kr(2)
15140 .sr(4)
15141 .m(1)
15142 .n(4)
15143 .k(k)
15144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15145 }
15146 }
15147
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_subtile)15148 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_subtile) {
15149 TEST_REQUIRES_X86_SSE41;
15150 for (size_t k = 9; k < 16; k++) {
15151 for (uint32_t n = 1; n <= 4; n++) {
15152 for (uint32_t m = 1; m <= 1; m++) {
15153 GemmMicrokernelTester()
15154 .mr(1)
15155 .nr(4)
15156 .kr(2)
15157 .sr(4)
15158 .m(m)
15159 .n(n)
15160 .k(k)
15161 .iterations(1)
15162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15163 }
15164 }
15165 }
15166 }
15167
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8)15168 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8) {
15169 TEST_REQUIRES_X86_SSE41;
15170 for (size_t k = 16; k <= 80; k += 8) {
15171 GemmMicrokernelTester()
15172 .mr(1)
15173 .nr(4)
15174 .kr(2)
15175 .sr(4)
15176 .m(1)
15177 .n(4)
15178 .k(k)
15179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15180 }
15181 }
15182
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_subtile)15183 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_subtile) {
15184 TEST_REQUIRES_X86_SSE41;
15185 for (size_t k = 16; k <= 80; k += 8) {
15186 for (uint32_t n = 1; n <= 4; n++) {
15187 for (uint32_t m = 1; m <= 1; m++) {
15188 GemmMicrokernelTester()
15189 .mr(1)
15190 .nr(4)
15191 .kr(2)
15192 .sr(4)
15193 .m(m)
15194 .n(n)
15195 .k(k)
15196 .iterations(1)
15197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15198 }
15199 }
15200 }
15201 }
15202
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4)15203 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4) {
15204 TEST_REQUIRES_X86_SSE41;
15205 for (uint32_t n = 5; n < 8; n++) {
15206 for (size_t k = 1; k <= 40; k += 9) {
15207 GemmMicrokernelTester()
15208 .mr(1)
15209 .nr(4)
15210 .kr(2)
15211 .sr(4)
15212 .m(1)
15213 .n(n)
15214 .k(k)
15215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15216 }
15217 }
15218 }
15219
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_cn)15220 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
15221 TEST_REQUIRES_X86_SSE41;
15222 for (uint32_t n = 5; n < 8; n++) {
15223 for (size_t k = 1; k <= 40; k += 9) {
15224 GemmMicrokernelTester()
15225 .mr(1)
15226 .nr(4)
15227 .kr(2)
15228 .sr(4)
15229 .m(1)
15230 .n(n)
15231 .k(k)
15232 .cn_stride(7)
15233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15234 }
15235 }
15236 }
15237
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_subtile)15238 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_subtile) {
15239 TEST_REQUIRES_X86_SSE41;
15240 for (uint32_t n = 5; n < 8; n++) {
15241 for (size_t k = 1; k <= 40; k += 9) {
15242 for (uint32_t m = 1; m <= 1; m++) {
15243 GemmMicrokernelTester()
15244 .mr(1)
15245 .nr(4)
15246 .kr(2)
15247 .sr(4)
15248 .m(m)
15249 .n(n)
15250 .k(k)
15251 .iterations(1)
15252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15253 }
15254 }
15255 }
15256 }
15257
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4)15258 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4) {
15259 TEST_REQUIRES_X86_SSE41;
15260 for (uint32_t n = 8; n <= 12; n += 4) {
15261 for (size_t k = 1; k <= 40; k += 9) {
15262 GemmMicrokernelTester()
15263 .mr(1)
15264 .nr(4)
15265 .kr(2)
15266 .sr(4)
15267 .m(1)
15268 .n(n)
15269 .k(k)
15270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15271 }
15272 }
15273 }
15274
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_cn)15275 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
15276 TEST_REQUIRES_X86_SSE41;
15277 for (uint32_t n = 8; n <= 12; n += 4) {
15278 for (size_t k = 1; k <= 40; k += 9) {
15279 GemmMicrokernelTester()
15280 .mr(1)
15281 .nr(4)
15282 .kr(2)
15283 .sr(4)
15284 .m(1)
15285 .n(n)
15286 .k(k)
15287 .cn_stride(7)
15288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15289 }
15290 }
15291 }
15292
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_subtile)15293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_subtile) {
15294 TEST_REQUIRES_X86_SSE41;
15295 for (uint32_t n = 8; n <= 12; n += 4) {
15296 for (size_t k = 1; k <= 40; k += 9) {
15297 for (uint32_t m = 1; m <= 1; m++) {
15298 GemmMicrokernelTester()
15299 .mr(1)
15300 .nr(4)
15301 .kr(2)
15302 .sr(4)
15303 .m(m)
15304 .n(n)
15305 .k(k)
15306 .iterations(1)
15307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15308 }
15309 }
15310 }
15311 }
15312
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel)15313 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel) {
15314 TEST_REQUIRES_X86_SSE41;
15315 for (size_t k = 1; k <= 40; k += 9) {
15316 GemmMicrokernelTester()
15317 .mr(1)
15318 .nr(4)
15319 .kr(2)
15320 .sr(4)
15321 .m(1)
15322 .n(4)
15323 .k(k)
15324 .ks(3)
15325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15326 }
15327 }
15328
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel_subtile)15329 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel_subtile) {
15330 TEST_REQUIRES_X86_SSE41;
15331 for (size_t k = 1; k <= 40; k += 9) {
15332 for (uint32_t n = 1; n <= 4; n++) {
15333 for (uint32_t m = 1; m <= 1; m++) {
15334 GemmMicrokernelTester()
15335 .mr(1)
15336 .nr(4)
15337 .kr(2)
15338 .sr(4)
15339 .m(m)
15340 .n(n)
15341 .k(k)
15342 .ks(3)
15343 .iterations(1)
15344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15345 }
15346 }
15347 }
15348 }
15349
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_small_kernel)15350 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
15351 TEST_REQUIRES_X86_SSE41;
15352 for (uint32_t n = 5; n < 8; n++) {
15353 for (size_t k = 1; k <= 40; k += 9) {
15354 GemmMicrokernelTester()
15355 .mr(1)
15356 .nr(4)
15357 .kr(2)
15358 .sr(4)
15359 .m(1)
15360 .n(n)
15361 .k(k)
15362 .ks(3)
15363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15364 }
15365 }
15366 }
15367
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_small_kernel)15368 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
15369 TEST_REQUIRES_X86_SSE41;
15370 for (uint32_t n = 8; n <= 12; n += 4) {
15371 for (size_t k = 1; k <= 40; k += 9) {
15372 GemmMicrokernelTester()
15373 .mr(1)
15374 .nr(4)
15375 .kr(2)
15376 .sr(4)
15377 .m(1)
15378 .n(n)
15379 .k(k)
15380 .ks(3)
15381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15382 }
15383 }
15384 }
15385
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm_subtile)15386 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm_subtile) {
15387 TEST_REQUIRES_X86_SSE41;
15388 for (size_t k = 1; k <= 40; k += 9) {
15389 for (uint32_t n = 1; n <= 4; n++) {
15390 for (uint32_t m = 1; m <= 1; m++) {
15391 GemmMicrokernelTester()
15392 .mr(1)
15393 .nr(4)
15394 .kr(2)
15395 .sr(4)
15396 .m(m)
15397 .n(n)
15398 .k(k)
15399 .cm_stride(7)
15400 .iterations(1)
15401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15402 }
15403 }
15404 }
15405 }
15406
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,a_offset)15407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, a_offset) {
15408 TEST_REQUIRES_X86_SSE41;
15409 for (size_t k = 1; k <= 40; k += 9) {
15410 GemmMicrokernelTester()
15411 .mr(1)
15412 .nr(4)
15413 .kr(2)
15414 .sr(4)
15415 .m(1)
15416 .n(4)
15417 .k(k)
15418 .ks(3)
15419 .a_offset(43)
15420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15421 }
15422 }
15423
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,zero)15424 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, zero) {
15425 TEST_REQUIRES_X86_SSE41;
15426 for (size_t k = 1; k <= 40; k += 9) {
15427 for (uint32_t mz = 0; mz < 1; mz++) {
15428 GemmMicrokernelTester()
15429 .mr(1)
15430 .nr(4)
15431 .kr(2)
15432 .sr(4)
15433 .m(1)
15434 .n(4)
15435 .k(k)
15436 .ks(3)
15437 .a_offset(43)
15438 .zero_index(mz)
15439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15440 }
15441 }
15442 }
15443
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmin)15444 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmin) {
15445 TEST_REQUIRES_X86_SSE41;
15446 GemmMicrokernelTester()
15447 .mr(1)
15448 .nr(4)
15449 .kr(2)
15450 .sr(4)
15451 .m(1)
15452 .n(4)
15453 .k(8)
15454 .qmin(128)
15455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15456 }
15457
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmax)15458 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmax) {
15459 TEST_REQUIRES_X86_SSE41;
15460 GemmMicrokernelTester()
15461 .mr(1)
15462 .nr(4)
15463 .kr(2)
15464 .sr(4)
15465 .m(1)
15466 .n(4)
15467 .k(8)
15468 .qmax(128)
15469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15470 }
15471
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm)15472 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm) {
15473 TEST_REQUIRES_X86_SSE41;
15474 GemmMicrokernelTester()
15475 .mr(1)
15476 .nr(4)
15477 .kr(2)
15478 .sr(4)
15479 .m(1)
15480 .n(4)
15481 .k(8)
15482 .cm_stride(7)
15483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15484 }
15485
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_a_zero_point)15486 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_a_zero_point) {
15487 TEST_REQUIRES_X86_SSE41;
15488 for (size_t k = 1; k <= 40; k += 9) {
15489 GemmMicrokernelTester()
15490 .mr(1)
15491 .nr(4)
15492 .kr(2)
15493 .sr(4)
15494 .m(1)
15495 .n(4)
15496 .k(k)
15497 .a_zero_point(0)
15498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15499 }
15500 }
15501
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_b_zero_point)15502 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_b_zero_point) {
15503 TEST_REQUIRES_X86_SSE41;
15504 for (size_t k = 1; k <= 40; k += 9) {
15505 GemmMicrokernelTester()
15506 .mr(1)
15507 .nr(4)
15508 .kr(2)
15509 .sr(4)
15510 .m(1)
15511 .n(4)
15512 .k(k)
15513 .b_zero_point(0)
15514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15515 }
15516 }
15517
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_zero_point)15518 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_zero_point) {
15519 TEST_REQUIRES_X86_SSE41;
15520 for (size_t k = 1; k <= 40; k += 9) {
15521 GemmMicrokernelTester()
15522 .mr(1)
15523 .nr(4)
15524 .kr(2)
15525 .sr(4)
15526 .m(1)
15527 .n(4)
15528 .k(k)
15529 .a_zero_point(0)
15530 .b_zero_point(0)
15531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15532 }
15533 }
15534 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15535
15536
15537 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8)15538 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8) {
15539 TEST_REQUIRES_X86_SSE2;
15540 GemmMicrokernelTester()
15541 .mr(2)
15542 .nr(4)
15543 .kr(2)
15544 .sr(4)
15545 .m(2)
15546 .n(4)
15547 .k(8)
15548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15549 }
15550
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cn)15551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cn) {
15552 TEST_REQUIRES_X86_SSE2;
15553 GemmMicrokernelTester()
15554 .mr(2)
15555 .nr(4)
15556 .kr(2)
15557 .sr(4)
15558 .m(2)
15559 .n(4)
15560 .k(8)
15561 .cn_stride(7)
15562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15563 }
15564
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile)15565 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile) {
15566 TEST_REQUIRES_X86_SSE2;
15567 for (uint32_t n = 1; n <= 4; n++) {
15568 for (uint32_t m = 1; m <= 2; m++) {
15569 GemmMicrokernelTester()
15570 .mr(2)
15571 .nr(4)
15572 .kr(2)
15573 .sr(4)
15574 .m(m)
15575 .n(n)
15576 .k(8)
15577 .iterations(1)
15578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15579 }
15580 }
15581 }
15582
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_m)15583 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
15584 TEST_REQUIRES_X86_SSE2;
15585 for (uint32_t m = 1; m <= 2; m++) {
15586 GemmMicrokernelTester()
15587 .mr(2)
15588 .nr(4)
15589 .kr(2)
15590 .sr(4)
15591 .m(m)
15592 .n(4)
15593 .k(8)
15594 .iterations(1)
15595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15596 }
15597 }
15598
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_n)15599 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
15600 TEST_REQUIRES_X86_SSE2;
15601 for (uint32_t n = 1; n <= 4; n++) {
15602 GemmMicrokernelTester()
15603 .mr(2)
15604 .nr(4)
15605 .kr(2)
15606 .sr(4)
15607 .m(2)
15608 .n(n)
15609 .k(8)
15610 .iterations(1)
15611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15612 }
15613 }
15614
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8)15615 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8) {
15616 TEST_REQUIRES_X86_SSE2;
15617 for (size_t k = 1; k < 8; k++) {
15618 GemmMicrokernelTester()
15619 .mr(2)
15620 .nr(4)
15621 .kr(2)
15622 .sr(4)
15623 .m(2)
15624 .n(4)
15625 .k(k)
15626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15627 }
15628 }
15629
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_subtile)15630 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_subtile) {
15631 TEST_REQUIRES_X86_SSE2;
15632 for (size_t k = 1; k < 8; k++) {
15633 for (uint32_t n = 1; n <= 4; n++) {
15634 for (uint32_t m = 1; m <= 2; m++) {
15635 GemmMicrokernelTester()
15636 .mr(2)
15637 .nr(4)
15638 .kr(2)
15639 .sr(4)
15640 .m(m)
15641 .n(n)
15642 .k(k)
15643 .iterations(1)
15644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15645 }
15646 }
15647 }
15648 }
15649
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8)15650 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8) {
15651 TEST_REQUIRES_X86_SSE2;
15652 for (size_t k = 9; k < 16; k++) {
15653 GemmMicrokernelTester()
15654 .mr(2)
15655 .nr(4)
15656 .kr(2)
15657 .sr(4)
15658 .m(2)
15659 .n(4)
15660 .k(k)
15661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15662 }
15663 }
15664
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_subtile)15665 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_subtile) {
15666 TEST_REQUIRES_X86_SSE2;
15667 for (size_t k = 9; k < 16; k++) {
15668 for (uint32_t n = 1; n <= 4; n++) {
15669 for (uint32_t m = 1; m <= 2; m++) {
15670 GemmMicrokernelTester()
15671 .mr(2)
15672 .nr(4)
15673 .kr(2)
15674 .sr(4)
15675 .m(m)
15676 .n(n)
15677 .k(k)
15678 .iterations(1)
15679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15680 }
15681 }
15682 }
15683 }
15684
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8)15685 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8) {
15686 TEST_REQUIRES_X86_SSE2;
15687 for (size_t k = 16; k <= 80; k += 8) {
15688 GemmMicrokernelTester()
15689 .mr(2)
15690 .nr(4)
15691 .kr(2)
15692 .sr(4)
15693 .m(2)
15694 .n(4)
15695 .k(k)
15696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15697 }
15698 }
15699
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_subtile)15700 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_subtile) {
15701 TEST_REQUIRES_X86_SSE2;
15702 for (size_t k = 16; k <= 80; k += 8) {
15703 for (uint32_t n = 1; n <= 4; n++) {
15704 for (uint32_t m = 1; m <= 2; m++) {
15705 GemmMicrokernelTester()
15706 .mr(2)
15707 .nr(4)
15708 .kr(2)
15709 .sr(4)
15710 .m(m)
15711 .n(n)
15712 .k(k)
15713 .iterations(1)
15714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15715 }
15716 }
15717 }
15718 }
15719
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4)15720 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4) {
15721 TEST_REQUIRES_X86_SSE2;
15722 for (uint32_t n = 5; n < 8; n++) {
15723 for (size_t k = 1; k <= 40; k += 9) {
15724 GemmMicrokernelTester()
15725 .mr(2)
15726 .nr(4)
15727 .kr(2)
15728 .sr(4)
15729 .m(2)
15730 .n(n)
15731 .k(k)
15732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15733 }
15734 }
15735 }
15736
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_cn)15737 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
15738 TEST_REQUIRES_X86_SSE2;
15739 for (uint32_t n = 5; n < 8; n++) {
15740 for (size_t k = 1; k <= 40; k += 9) {
15741 GemmMicrokernelTester()
15742 .mr(2)
15743 .nr(4)
15744 .kr(2)
15745 .sr(4)
15746 .m(2)
15747 .n(n)
15748 .k(k)
15749 .cn_stride(7)
15750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15751 }
15752 }
15753 }
15754
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_subtile)15755 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_subtile) {
15756 TEST_REQUIRES_X86_SSE2;
15757 for (uint32_t n = 5; n < 8; n++) {
15758 for (size_t k = 1; k <= 40; k += 9) {
15759 for (uint32_t m = 1; m <= 2; m++) {
15760 GemmMicrokernelTester()
15761 .mr(2)
15762 .nr(4)
15763 .kr(2)
15764 .sr(4)
15765 .m(m)
15766 .n(n)
15767 .k(k)
15768 .iterations(1)
15769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15770 }
15771 }
15772 }
15773 }
15774
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4)15775 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4) {
15776 TEST_REQUIRES_X86_SSE2;
15777 for (uint32_t n = 8; n <= 12; n += 4) {
15778 for (size_t k = 1; k <= 40; k += 9) {
15779 GemmMicrokernelTester()
15780 .mr(2)
15781 .nr(4)
15782 .kr(2)
15783 .sr(4)
15784 .m(2)
15785 .n(n)
15786 .k(k)
15787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15788 }
15789 }
15790 }
15791
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_cn)15792 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
15793 TEST_REQUIRES_X86_SSE2;
15794 for (uint32_t n = 8; n <= 12; n += 4) {
15795 for (size_t k = 1; k <= 40; k += 9) {
15796 GemmMicrokernelTester()
15797 .mr(2)
15798 .nr(4)
15799 .kr(2)
15800 .sr(4)
15801 .m(2)
15802 .n(n)
15803 .k(k)
15804 .cn_stride(7)
15805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15806 }
15807 }
15808 }
15809
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_subtile)15810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_subtile) {
15811 TEST_REQUIRES_X86_SSE2;
15812 for (uint32_t n = 8; n <= 12; n += 4) {
15813 for (size_t k = 1; k <= 40; k += 9) {
15814 for (uint32_t m = 1; m <= 2; m++) {
15815 GemmMicrokernelTester()
15816 .mr(2)
15817 .nr(4)
15818 .kr(2)
15819 .sr(4)
15820 .m(m)
15821 .n(n)
15822 .k(k)
15823 .iterations(1)
15824 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15825 }
15826 }
15827 }
15828 }
15829
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel)15830 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel) {
15831 TEST_REQUIRES_X86_SSE2;
15832 for (size_t k = 1; k <= 40; k += 9) {
15833 GemmMicrokernelTester()
15834 .mr(2)
15835 .nr(4)
15836 .kr(2)
15837 .sr(4)
15838 .m(2)
15839 .n(4)
15840 .k(k)
15841 .ks(3)
15842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15843 }
15844 }
15845
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel_subtile)15846 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel_subtile) {
15847 TEST_REQUIRES_X86_SSE2;
15848 for (size_t k = 1; k <= 40; k += 9) {
15849 for (uint32_t n = 1; n <= 4; n++) {
15850 for (uint32_t m = 1; m <= 2; m++) {
15851 GemmMicrokernelTester()
15852 .mr(2)
15853 .nr(4)
15854 .kr(2)
15855 .sr(4)
15856 .m(m)
15857 .n(n)
15858 .k(k)
15859 .ks(3)
15860 .iterations(1)
15861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15862 }
15863 }
15864 }
15865 }
15866
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_small_kernel)15867 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
15868 TEST_REQUIRES_X86_SSE2;
15869 for (uint32_t n = 5; n < 8; n++) {
15870 for (size_t k = 1; k <= 40; k += 9) {
15871 GemmMicrokernelTester()
15872 .mr(2)
15873 .nr(4)
15874 .kr(2)
15875 .sr(4)
15876 .m(2)
15877 .n(n)
15878 .k(k)
15879 .ks(3)
15880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15881 }
15882 }
15883 }
15884
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_small_kernel)15885 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
15886 TEST_REQUIRES_X86_SSE2;
15887 for (uint32_t n = 8; n <= 12; n += 4) {
15888 for (size_t k = 1; k <= 40; k += 9) {
15889 GemmMicrokernelTester()
15890 .mr(2)
15891 .nr(4)
15892 .kr(2)
15893 .sr(4)
15894 .m(2)
15895 .n(n)
15896 .k(k)
15897 .ks(3)
15898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15899 }
15900 }
15901 }
15902
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm_subtile)15903 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm_subtile) {
15904 TEST_REQUIRES_X86_SSE2;
15905 for (size_t k = 1; k <= 40; k += 9) {
15906 for (uint32_t n = 1; n <= 4; n++) {
15907 for (uint32_t m = 1; m <= 2; m++) {
15908 GemmMicrokernelTester()
15909 .mr(2)
15910 .nr(4)
15911 .kr(2)
15912 .sr(4)
15913 .m(m)
15914 .n(n)
15915 .k(k)
15916 .cm_stride(7)
15917 .iterations(1)
15918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15919 }
15920 }
15921 }
15922 }
15923
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,a_offset)15924 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, a_offset) {
15925 TEST_REQUIRES_X86_SSE2;
15926 for (size_t k = 1; k <= 40; k += 9) {
15927 GemmMicrokernelTester()
15928 .mr(2)
15929 .nr(4)
15930 .kr(2)
15931 .sr(4)
15932 .m(2)
15933 .n(4)
15934 .k(k)
15935 .ks(3)
15936 .a_offset(83)
15937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15938 }
15939 }
15940
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,zero)15941 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, zero) {
15942 TEST_REQUIRES_X86_SSE2;
15943 for (size_t k = 1; k <= 40; k += 9) {
15944 for (uint32_t mz = 0; mz < 2; mz++) {
15945 GemmMicrokernelTester()
15946 .mr(2)
15947 .nr(4)
15948 .kr(2)
15949 .sr(4)
15950 .m(2)
15951 .n(4)
15952 .k(k)
15953 .ks(3)
15954 .a_offset(83)
15955 .zero_index(mz)
15956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15957 }
15958 }
15959 }
15960
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmin)15961 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmin) {
15962 TEST_REQUIRES_X86_SSE2;
15963 GemmMicrokernelTester()
15964 .mr(2)
15965 .nr(4)
15966 .kr(2)
15967 .sr(4)
15968 .m(2)
15969 .n(4)
15970 .k(8)
15971 .qmin(128)
15972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15973 }
15974
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmax)15975 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmax) {
15976 TEST_REQUIRES_X86_SSE2;
15977 GemmMicrokernelTester()
15978 .mr(2)
15979 .nr(4)
15980 .kr(2)
15981 .sr(4)
15982 .m(2)
15983 .n(4)
15984 .k(8)
15985 .qmax(128)
15986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15987 }
15988
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm)15989 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm) {
15990 TEST_REQUIRES_X86_SSE2;
15991 GemmMicrokernelTester()
15992 .mr(2)
15993 .nr(4)
15994 .kr(2)
15995 .sr(4)
15996 .m(2)
15997 .n(4)
15998 .k(8)
15999 .cm_stride(7)
16000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16001 }
16002
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_a_zero_point)16003 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_a_zero_point) {
16004 TEST_REQUIRES_X86_SSE2;
16005 for (size_t k = 1; k <= 40; k += 9) {
16006 GemmMicrokernelTester()
16007 .mr(2)
16008 .nr(4)
16009 .kr(2)
16010 .sr(4)
16011 .m(2)
16012 .n(4)
16013 .k(k)
16014 .a_zero_point(0)
16015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16016 }
16017 }
16018
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_b_zero_point)16019 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_b_zero_point) {
16020 TEST_REQUIRES_X86_SSE2;
16021 for (size_t k = 1; k <= 40; k += 9) {
16022 GemmMicrokernelTester()
16023 .mr(2)
16024 .nr(4)
16025 .kr(2)
16026 .sr(4)
16027 .m(2)
16028 .n(4)
16029 .k(k)
16030 .b_zero_point(0)
16031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16032 }
16033 }
16034
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_zero_point)16035 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_zero_point) {
16036 TEST_REQUIRES_X86_SSE2;
16037 for (size_t k = 1; k <= 40; k += 9) {
16038 GemmMicrokernelTester()
16039 .mr(2)
16040 .nr(4)
16041 .kr(2)
16042 .sr(4)
16043 .m(2)
16044 .n(4)
16045 .k(k)
16046 .a_zero_point(0)
16047 .b_zero_point(0)
16048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16049 }
16050 }
16051 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16052
16053
16054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)16055 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
16056 TEST_REQUIRES_X86_SSE41;
16057 GemmMicrokernelTester()
16058 .mr(2)
16059 .nr(4)
16060 .kr(2)
16061 .sr(4)
16062 .m(2)
16063 .n(4)
16064 .k(8)
16065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16066 }
16067
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)16068 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
16069 TEST_REQUIRES_X86_SSE41;
16070 GemmMicrokernelTester()
16071 .mr(2)
16072 .nr(4)
16073 .kr(2)
16074 .sr(4)
16075 .m(2)
16076 .n(4)
16077 .k(8)
16078 .cn_stride(7)
16079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16080 }
16081
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)16082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
16083 TEST_REQUIRES_X86_SSE41;
16084 for (uint32_t n = 1; n <= 4; n++) {
16085 for (uint32_t m = 1; m <= 2; m++) {
16086 GemmMicrokernelTester()
16087 .mr(2)
16088 .nr(4)
16089 .kr(2)
16090 .sr(4)
16091 .m(m)
16092 .n(n)
16093 .k(8)
16094 .iterations(1)
16095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16096 }
16097 }
16098 }
16099
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)16100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
16101 TEST_REQUIRES_X86_SSE41;
16102 for (uint32_t m = 1; m <= 2; m++) {
16103 GemmMicrokernelTester()
16104 .mr(2)
16105 .nr(4)
16106 .kr(2)
16107 .sr(4)
16108 .m(m)
16109 .n(4)
16110 .k(8)
16111 .iterations(1)
16112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16113 }
16114 }
16115
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)16116 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
16117 TEST_REQUIRES_X86_SSE41;
16118 for (uint32_t n = 1; n <= 4; n++) {
16119 GemmMicrokernelTester()
16120 .mr(2)
16121 .nr(4)
16122 .kr(2)
16123 .sr(4)
16124 .m(2)
16125 .n(n)
16126 .k(8)
16127 .iterations(1)
16128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16129 }
16130 }
16131
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)16132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
16133 TEST_REQUIRES_X86_SSE41;
16134 for (size_t k = 1; k < 8; k++) {
16135 GemmMicrokernelTester()
16136 .mr(2)
16137 .nr(4)
16138 .kr(2)
16139 .sr(4)
16140 .m(2)
16141 .n(4)
16142 .k(k)
16143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16144 }
16145 }
16146
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)16147 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
16148 TEST_REQUIRES_X86_SSE41;
16149 for (size_t k = 1; k < 8; k++) {
16150 for (uint32_t n = 1; n <= 4; n++) {
16151 for (uint32_t m = 1; m <= 2; m++) {
16152 GemmMicrokernelTester()
16153 .mr(2)
16154 .nr(4)
16155 .kr(2)
16156 .sr(4)
16157 .m(m)
16158 .n(n)
16159 .k(k)
16160 .iterations(1)
16161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16162 }
16163 }
16164 }
16165 }
16166
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)16167 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
16168 TEST_REQUIRES_X86_SSE41;
16169 for (size_t k = 9; k < 16; k++) {
16170 GemmMicrokernelTester()
16171 .mr(2)
16172 .nr(4)
16173 .kr(2)
16174 .sr(4)
16175 .m(2)
16176 .n(4)
16177 .k(k)
16178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16179 }
16180 }
16181
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)16182 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
16183 TEST_REQUIRES_X86_SSE41;
16184 for (size_t k = 9; k < 16; k++) {
16185 for (uint32_t n = 1; n <= 4; n++) {
16186 for (uint32_t m = 1; m <= 2; m++) {
16187 GemmMicrokernelTester()
16188 .mr(2)
16189 .nr(4)
16190 .kr(2)
16191 .sr(4)
16192 .m(m)
16193 .n(n)
16194 .k(k)
16195 .iterations(1)
16196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16197 }
16198 }
16199 }
16200 }
16201
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)16202 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
16203 TEST_REQUIRES_X86_SSE41;
16204 for (size_t k = 16; k <= 80; k += 8) {
16205 GemmMicrokernelTester()
16206 .mr(2)
16207 .nr(4)
16208 .kr(2)
16209 .sr(4)
16210 .m(2)
16211 .n(4)
16212 .k(k)
16213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16214 }
16215 }
16216
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)16217 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
16218 TEST_REQUIRES_X86_SSE41;
16219 for (size_t k = 16; k <= 80; k += 8) {
16220 for (uint32_t n = 1; n <= 4; n++) {
16221 for (uint32_t m = 1; m <= 2; m++) {
16222 GemmMicrokernelTester()
16223 .mr(2)
16224 .nr(4)
16225 .kr(2)
16226 .sr(4)
16227 .m(m)
16228 .n(n)
16229 .k(k)
16230 .iterations(1)
16231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16232 }
16233 }
16234 }
16235 }
16236
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)16237 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
16238 TEST_REQUIRES_X86_SSE41;
16239 for (uint32_t n = 5; n < 8; n++) {
16240 for (size_t k = 1; k <= 40; k += 9) {
16241 GemmMicrokernelTester()
16242 .mr(2)
16243 .nr(4)
16244 .kr(2)
16245 .sr(4)
16246 .m(2)
16247 .n(n)
16248 .k(k)
16249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16250 }
16251 }
16252 }
16253
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)16254 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
16255 TEST_REQUIRES_X86_SSE41;
16256 for (uint32_t n = 5; n < 8; n++) {
16257 for (size_t k = 1; k <= 40; k += 9) {
16258 GemmMicrokernelTester()
16259 .mr(2)
16260 .nr(4)
16261 .kr(2)
16262 .sr(4)
16263 .m(2)
16264 .n(n)
16265 .k(k)
16266 .cn_stride(7)
16267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16268 }
16269 }
16270 }
16271
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)16272 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
16273 TEST_REQUIRES_X86_SSE41;
16274 for (uint32_t n = 5; n < 8; n++) {
16275 for (size_t k = 1; k <= 40; k += 9) {
16276 for (uint32_t m = 1; m <= 2; m++) {
16277 GemmMicrokernelTester()
16278 .mr(2)
16279 .nr(4)
16280 .kr(2)
16281 .sr(4)
16282 .m(m)
16283 .n(n)
16284 .k(k)
16285 .iterations(1)
16286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16287 }
16288 }
16289 }
16290 }
16291
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)16292 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
16293 TEST_REQUIRES_X86_SSE41;
16294 for (uint32_t n = 8; n <= 12; n += 4) {
16295 for (size_t k = 1; k <= 40; k += 9) {
16296 GemmMicrokernelTester()
16297 .mr(2)
16298 .nr(4)
16299 .kr(2)
16300 .sr(4)
16301 .m(2)
16302 .n(n)
16303 .k(k)
16304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16305 }
16306 }
16307 }
16308
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)16309 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
16310 TEST_REQUIRES_X86_SSE41;
16311 for (uint32_t n = 8; n <= 12; n += 4) {
16312 for (size_t k = 1; k <= 40; k += 9) {
16313 GemmMicrokernelTester()
16314 .mr(2)
16315 .nr(4)
16316 .kr(2)
16317 .sr(4)
16318 .m(2)
16319 .n(n)
16320 .k(k)
16321 .cn_stride(7)
16322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16323 }
16324 }
16325 }
16326
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)16327 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
16328 TEST_REQUIRES_X86_SSE41;
16329 for (uint32_t n = 8; n <= 12; n += 4) {
16330 for (size_t k = 1; k <= 40; k += 9) {
16331 for (uint32_t m = 1; m <= 2; m++) {
16332 GemmMicrokernelTester()
16333 .mr(2)
16334 .nr(4)
16335 .kr(2)
16336 .sr(4)
16337 .m(m)
16338 .n(n)
16339 .k(k)
16340 .iterations(1)
16341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16342 }
16343 }
16344 }
16345 }
16346
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel)16347 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel) {
16348 TEST_REQUIRES_X86_SSE41;
16349 for (size_t k = 1; k <= 40; k += 9) {
16350 GemmMicrokernelTester()
16351 .mr(2)
16352 .nr(4)
16353 .kr(2)
16354 .sr(4)
16355 .m(2)
16356 .n(4)
16357 .k(k)
16358 .ks(3)
16359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16360 }
16361 }
16362
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel_subtile)16363 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel_subtile) {
16364 TEST_REQUIRES_X86_SSE41;
16365 for (size_t k = 1; k <= 40; k += 9) {
16366 for (uint32_t n = 1; n <= 4; n++) {
16367 for (uint32_t m = 1; m <= 2; m++) {
16368 GemmMicrokernelTester()
16369 .mr(2)
16370 .nr(4)
16371 .kr(2)
16372 .sr(4)
16373 .m(m)
16374 .n(n)
16375 .k(k)
16376 .ks(3)
16377 .iterations(1)
16378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16379 }
16380 }
16381 }
16382 }
16383
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_small_kernel)16384 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
16385 TEST_REQUIRES_X86_SSE41;
16386 for (uint32_t n = 5; n < 8; n++) {
16387 for (size_t k = 1; k <= 40; k += 9) {
16388 GemmMicrokernelTester()
16389 .mr(2)
16390 .nr(4)
16391 .kr(2)
16392 .sr(4)
16393 .m(2)
16394 .n(n)
16395 .k(k)
16396 .ks(3)
16397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16398 }
16399 }
16400 }
16401
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_small_kernel)16402 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
16403 TEST_REQUIRES_X86_SSE41;
16404 for (uint32_t n = 8; n <= 12; n += 4) {
16405 for (size_t k = 1; k <= 40; k += 9) {
16406 GemmMicrokernelTester()
16407 .mr(2)
16408 .nr(4)
16409 .kr(2)
16410 .sr(4)
16411 .m(2)
16412 .n(n)
16413 .k(k)
16414 .ks(3)
16415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16416 }
16417 }
16418 }
16419
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)16420 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
16421 TEST_REQUIRES_X86_SSE41;
16422 for (size_t k = 1; k <= 40; k += 9) {
16423 for (uint32_t n = 1; n <= 4; n++) {
16424 for (uint32_t m = 1; m <= 2; m++) {
16425 GemmMicrokernelTester()
16426 .mr(2)
16427 .nr(4)
16428 .kr(2)
16429 .sr(4)
16430 .m(m)
16431 .n(n)
16432 .k(k)
16433 .cm_stride(7)
16434 .iterations(1)
16435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16436 }
16437 }
16438 }
16439 }
16440
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,a_offset)16441 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, a_offset) {
16442 TEST_REQUIRES_X86_SSE41;
16443 for (size_t k = 1; k <= 40; k += 9) {
16444 GemmMicrokernelTester()
16445 .mr(2)
16446 .nr(4)
16447 .kr(2)
16448 .sr(4)
16449 .m(2)
16450 .n(4)
16451 .k(k)
16452 .ks(3)
16453 .a_offset(83)
16454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16455 }
16456 }
16457
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,zero)16458 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, zero) {
16459 TEST_REQUIRES_X86_SSE41;
16460 for (size_t k = 1; k <= 40; k += 9) {
16461 for (uint32_t mz = 0; mz < 2; mz++) {
16462 GemmMicrokernelTester()
16463 .mr(2)
16464 .nr(4)
16465 .kr(2)
16466 .sr(4)
16467 .m(2)
16468 .n(4)
16469 .k(k)
16470 .ks(3)
16471 .a_offset(83)
16472 .zero_index(mz)
16473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16474 }
16475 }
16476 }
16477
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)16478 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
16479 TEST_REQUIRES_X86_SSE41;
16480 GemmMicrokernelTester()
16481 .mr(2)
16482 .nr(4)
16483 .kr(2)
16484 .sr(4)
16485 .m(2)
16486 .n(4)
16487 .k(8)
16488 .qmin(128)
16489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16490 }
16491
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)16492 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
16493 TEST_REQUIRES_X86_SSE41;
16494 GemmMicrokernelTester()
16495 .mr(2)
16496 .nr(4)
16497 .kr(2)
16498 .sr(4)
16499 .m(2)
16500 .n(4)
16501 .k(8)
16502 .qmax(128)
16503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16504 }
16505
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)16506 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
16507 TEST_REQUIRES_X86_SSE41;
16508 GemmMicrokernelTester()
16509 .mr(2)
16510 .nr(4)
16511 .kr(2)
16512 .sr(4)
16513 .m(2)
16514 .n(4)
16515 .k(8)
16516 .cm_stride(7)
16517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16518 }
16519
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_a_zero_point)16520 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_a_zero_point) {
16521 TEST_REQUIRES_X86_SSE41;
16522 for (size_t k = 1; k <= 40; k += 9) {
16523 GemmMicrokernelTester()
16524 .mr(2)
16525 .nr(4)
16526 .kr(2)
16527 .sr(4)
16528 .m(2)
16529 .n(4)
16530 .k(k)
16531 .a_zero_point(0)
16532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16533 }
16534 }
16535
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_b_zero_point)16536 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_b_zero_point) {
16537 TEST_REQUIRES_X86_SSE41;
16538 for (size_t k = 1; k <= 40; k += 9) {
16539 GemmMicrokernelTester()
16540 .mr(2)
16541 .nr(4)
16542 .kr(2)
16543 .sr(4)
16544 .m(2)
16545 .n(4)
16546 .k(k)
16547 .b_zero_point(0)
16548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16549 }
16550 }
16551
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_zero_point)16552 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_zero_point) {
16553 TEST_REQUIRES_X86_SSE41;
16554 for (size_t k = 1; k <= 40; k += 9) {
16555 GemmMicrokernelTester()
16556 .mr(2)
16557 .nr(4)
16558 .kr(2)
16559 .sr(4)
16560 .m(2)
16561 .n(4)
16562 .k(k)
16563 .a_zero_point(0)
16564 .b_zero_point(0)
16565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16566 }
16567 }
16568 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16569
16570
16571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8)16572 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8) {
16573 TEST_REQUIRES_X86_SSE2;
16574 GemmMicrokernelTester()
16575 .mr(3)
16576 .nr(4)
16577 .kr(2)
16578 .sr(4)
16579 .m(3)
16580 .n(4)
16581 .k(8)
16582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16583 }
16584
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cn)16585 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cn) {
16586 TEST_REQUIRES_X86_SSE2;
16587 GemmMicrokernelTester()
16588 .mr(3)
16589 .nr(4)
16590 .kr(2)
16591 .sr(4)
16592 .m(3)
16593 .n(4)
16594 .k(8)
16595 .cn_stride(7)
16596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16597 }
16598
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile)16599 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile) {
16600 TEST_REQUIRES_X86_SSE2;
16601 for (uint32_t n = 1; n <= 4; n++) {
16602 for (uint32_t m = 1; m <= 3; m++) {
16603 GemmMicrokernelTester()
16604 .mr(3)
16605 .nr(4)
16606 .kr(2)
16607 .sr(4)
16608 .m(m)
16609 .n(n)
16610 .k(8)
16611 .iterations(1)
16612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16613 }
16614 }
16615 }
16616
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_m)16617 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
16618 TEST_REQUIRES_X86_SSE2;
16619 for (uint32_t m = 1; m <= 3; m++) {
16620 GemmMicrokernelTester()
16621 .mr(3)
16622 .nr(4)
16623 .kr(2)
16624 .sr(4)
16625 .m(m)
16626 .n(4)
16627 .k(8)
16628 .iterations(1)
16629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16630 }
16631 }
16632
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_n)16633 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
16634 TEST_REQUIRES_X86_SSE2;
16635 for (uint32_t n = 1; n <= 4; n++) {
16636 GemmMicrokernelTester()
16637 .mr(3)
16638 .nr(4)
16639 .kr(2)
16640 .sr(4)
16641 .m(3)
16642 .n(n)
16643 .k(8)
16644 .iterations(1)
16645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16646 }
16647 }
16648
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8)16649 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8) {
16650 TEST_REQUIRES_X86_SSE2;
16651 for (size_t k = 1; k < 8; k++) {
16652 GemmMicrokernelTester()
16653 .mr(3)
16654 .nr(4)
16655 .kr(2)
16656 .sr(4)
16657 .m(3)
16658 .n(4)
16659 .k(k)
16660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16661 }
16662 }
16663
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_subtile)16664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_subtile) {
16665 TEST_REQUIRES_X86_SSE2;
16666 for (size_t k = 1; k < 8; k++) {
16667 for (uint32_t n = 1; n <= 4; n++) {
16668 for (uint32_t m = 1; m <= 3; m++) {
16669 GemmMicrokernelTester()
16670 .mr(3)
16671 .nr(4)
16672 .kr(2)
16673 .sr(4)
16674 .m(m)
16675 .n(n)
16676 .k(k)
16677 .iterations(1)
16678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16679 }
16680 }
16681 }
16682 }
16683
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8)16684 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8) {
16685 TEST_REQUIRES_X86_SSE2;
16686 for (size_t k = 9; k < 16; k++) {
16687 GemmMicrokernelTester()
16688 .mr(3)
16689 .nr(4)
16690 .kr(2)
16691 .sr(4)
16692 .m(3)
16693 .n(4)
16694 .k(k)
16695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16696 }
16697 }
16698
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_subtile)16699 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_subtile) {
16700 TEST_REQUIRES_X86_SSE2;
16701 for (size_t k = 9; k < 16; k++) {
16702 for (uint32_t n = 1; n <= 4; n++) {
16703 for (uint32_t m = 1; m <= 3; m++) {
16704 GemmMicrokernelTester()
16705 .mr(3)
16706 .nr(4)
16707 .kr(2)
16708 .sr(4)
16709 .m(m)
16710 .n(n)
16711 .k(k)
16712 .iterations(1)
16713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16714 }
16715 }
16716 }
16717 }
16718
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8)16719 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8) {
16720 TEST_REQUIRES_X86_SSE2;
16721 for (size_t k = 16; k <= 80; k += 8) {
16722 GemmMicrokernelTester()
16723 .mr(3)
16724 .nr(4)
16725 .kr(2)
16726 .sr(4)
16727 .m(3)
16728 .n(4)
16729 .k(k)
16730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16731 }
16732 }
16733
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_subtile)16734 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_subtile) {
16735 TEST_REQUIRES_X86_SSE2;
16736 for (size_t k = 16; k <= 80; k += 8) {
16737 for (uint32_t n = 1; n <= 4; n++) {
16738 for (uint32_t m = 1; m <= 3; m++) {
16739 GemmMicrokernelTester()
16740 .mr(3)
16741 .nr(4)
16742 .kr(2)
16743 .sr(4)
16744 .m(m)
16745 .n(n)
16746 .k(k)
16747 .iterations(1)
16748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16749 }
16750 }
16751 }
16752 }
16753
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4)16754 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4) {
16755 TEST_REQUIRES_X86_SSE2;
16756 for (uint32_t n = 5; n < 8; n++) {
16757 for (size_t k = 1; k <= 40; k += 9) {
16758 GemmMicrokernelTester()
16759 .mr(3)
16760 .nr(4)
16761 .kr(2)
16762 .sr(4)
16763 .m(3)
16764 .n(n)
16765 .k(k)
16766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16767 }
16768 }
16769 }
16770
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_cn)16771 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
16772 TEST_REQUIRES_X86_SSE2;
16773 for (uint32_t n = 5; n < 8; n++) {
16774 for (size_t k = 1; k <= 40; k += 9) {
16775 GemmMicrokernelTester()
16776 .mr(3)
16777 .nr(4)
16778 .kr(2)
16779 .sr(4)
16780 .m(3)
16781 .n(n)
16782 .k(k)
16783 .cn_stride(7)
16784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16785 }
16786 }
16787 }
16788
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_subtile)16789 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_subtile) {
16790 TEST_REQUIRES_X86_SSE2;
16791 for (uint32_t n = 5; n < 8; n++) {
16792 for (size_t k = 1; k <= 40; k += 9) {
16793 for (uint32_t m = 1; m <= 3; m++) {
16794 GemmMicrokernelTester()
16795 .mr(3)
16796 .nr(4)
16797 .kr(2)
16798 .sr(4)
16799 .m(m)
16800 .n(n)
16801 .k(k)
16802 .iterations(1)
16803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16804 }
16805 }
16806 }
16807 }
16808
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4)16809 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4) {
16810 TEST_REQUIRES_X86_SSE2;
16811 for (uint32_t n = 8; n <= 12; n += 4) {
16812 for (size_t k = 1; k <= 40; k += 9) {
16813 GemmMicrokernelTester()
16814 .mr(3)
16815 .nr(4)
16816 .kr(2)
16817 .sr(4)
16818 .m(3)
16819 .n(n)
16820 .k(k)
16821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16822 }
16823 }
16824 }
16825
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_cn)16826 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
16827 TEST_REQUIRES_X86_SSE2;
16828 for (uint32_t n = 8; n <= 12; n += 4) {
16829 for (size_t k = 1; k <= 40; k += 9) {
16830 GemmMicrokernelTester()
16831 .mr(3)
16832 .nr(4)
16833 .kr(2)
16834 .sr(4)
16835 .m(3)
16836 .n(n)
16837 .k(k)
16838 .cn_stride(7)
16839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16840 }
16841 }
16842 }
16843
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_subtile)16844 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_subtile) {
16845 TEST_REQUIRES_X86_SSE2;
16846 for (uint32_t n = 8; n <= 12; n += 4) {
16847 for (size_t k = 1; k <= 40; k += 9) {
16848 for (uint32_t m = 1; m <= 3; m++) {
16849 GemmMicrokernelTester()
16850 .mr(3)
16851 .nr(4)
16852 .kr(2)
16853 .sr(4)
16854 .m(m)
16855 .n(n)
16856 .k(k)
16857 .iterations(1)
16858 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16859 }
16860 }
16861 }
16862 }
16863
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel)16864 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel) {
16865 TEST_REQUIRES_X86_SSE2;
16866 for (size_t k = 1; k <= 40; k += 9) {
16867 GemmMicrokernelTester()
16868 .mr(3)
16869 .nr(4)
16870 .kr(2)
16871 .sr(4)
16872 .m(3)
16873 .n(4)
16874 .k(k)
16875 .ks(3)
16876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16877 }
16878 }
16879
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel_subtile)16880 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel_subtile) {
16881 TEST_REQUIRES_X86_SSE2;
16882 for (size_t k = 1; k <= 40; k += 9) {
16883 for (uint32_t n = 1; n <= 4; n++) {
16884 for (uint32_t m = 1; m <= 3; m++) {
16885 GemmMicrokernelTester()
16886 .mr(3)
16887 .nr(4)
16888 .kr(2)
16889 .sr(4)
16890 .m(m)
16891 .n(n)
16892 .k(k)
16893 .ks(3)
16894 .iterations(1)
16895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16896 }
16897 }
16898 }
16899 }
16900
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_small_kernel)16901 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
16902 TEST_REQUIRES_X86_SSE2;
16903 for (uint32_t n = 5; n < 8; n++) {
16904 for (size_t k = 1; k <= 40; k += 9) {
16905 GemmMicrokernelTester()
16906 .mr(3)
16907 .nr(4)
16908 .kr(2)
16909 .sr(4)
16910 .m(3)
16911 .n(n)
16912 .k(k)
16913 .ks(3)
16914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16915 }
16916 }
16917 }
16918
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_small_kernel)16919 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
16920 TEST_REQUIRES_X86_SSE2;
16921 for (uint32_t n = 8; n <= 12; n += 4) {
16922 for (size_t k = 1; k <= 40; k += 9) {
16923 GemmMicrokernelTester()
16924 .mr(3)
16925 .nr(4)
16926 .kr(2)
16927 .sr(4)
16928 .m(3)
16929 .n(n)
16930 .k(k)
16931 .ks(3)
16932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16933 }
16934 }
16935 }
16936
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm_subtile)16937 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm_subtile) {
16938 TEST_REQUIRES_X86_SSE2;
16939 for (size_t k = 1; k <= 40; k += 9) {
16940 for (uint32_t n = 1; n <= 4; n++) {
16941 for (uint32_t m = 1; m <= 3; m++) {
16942 GemmMicrokernelTester()
16943 .mr(3)
16944 .nr(4)
16945 .kr(2)
16946 .sr(4)
16947 .m(m)
16948 .n(n)
16949 .k(k)
16950 .cm_stride(7)
16951 .iterations(1)
16952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16953 }
16954 }
16955 }
16956 }
16957
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,a_offset)16958 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, a_offset) {
16959 TEST_REQUIRES_X86_SSE2;
16960 for (size_t k = 1; k <= 40; k += 9) {
16961 GemmMicrokernelTester()
16962 .mr(3)
16963 .nr(4)
16964 .kr(2)
16965 .sr(4)
16966 .m(3)
16967 .n(4)
16968 .k(k)
16969 .ks(3)
16970 .a_offset(127)
16971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16972 }
16973 }
16974
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,zero)16975 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, zero) {
16976 TEST_REQUIRES_X86_SSE2;
16977 for (size_t k = 1; k <= 40; k += 9) {
16978 for (uint32_t mz = 0; mz < 3; mz++) {
16979 GemmMicrokernelTester()
16980 .mr(3)
16981 .nr(4)
16982 .kr(2)
16983 .sr(4)
16984 .m(3)
16985 .n(4)
16986 .k(k)
16987 .ks(3)
16988 .a_offset(127)
16989 .zero_index(mz)
16990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16991 }
16992 }
16993 }
16994
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmin)16995 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmin) {
16996 TEST_REQUIRES_X86_SSE2;
16997 GemmMicrokernelTester()
16998 .mr(3)
16999 .nr(4)
17000 .kr(2)
17001 .sr(4)
17002 .m(3)
17003 .n(4)
17004 .k(8)
17005 .qmin(128)
17006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17007 }
17008
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmax)17009 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmax) {
17010 TEST_REQUIRES_X86_SSE2;
17011 GemmMicrokernelTester()
17012 .mr(3)
17013 .nr(4)
17014 .kr(2)
17015 .sr(4)
17016 .m(3)
17017 .n(4)
17018 .k(8)
17019 .qmax(128)
17020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17021 }
17022
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm)17023 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm) {
17024 TEST_REQUIRES_X86_SSE2;
17025 GemmMicrokernelTester()
17026 .mr(3)
17027 .nr(4)
17028 .kr(2)
17029 .sr(4)
17030 .m(3)
17031 .n(4)
17032 .k(8)
17033 .cm_stride(7)
17034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17035 }
17036
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_a_zero_point)17037 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_a_zero_point) {
17038 TEST_REQUIRES_X86_SSE2;
17039 for (size_t k = 1; k <= 40; k += 9) {
17040 GemmMicrokernelTester()
17041 .mr(3)
17042 .nr(4)
17043 .kr(2)
17044 .sr(4)
17045 .m(3)
17046 .n(4)
17047 .k(k)
17048 .a_zero_point(0)
17049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17050 }
17051 }
17052
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_b_zero_point)17053 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_b_zero_point) {
17054 TEST_REQUIRES_X86_SSE2;
17055 for (size_t k = 1; k <= 40; k += 9) {
17056 GemmMicrokernelTester()
17057 .mr(3)
17058 .nr(4)
17059 .kr(2)
17060 .sr(4)
17061 .m(3)
17062 .n(4)
17063 .k(k)
17064 .b_zero_point(0)
17065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17066 }
17067 }
17068
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_zero_point)17069 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_zero_point) {
17070 TEST_REQUIRES_X86_SSE2;
17071 for (size_t k = 1; k <= 40; k += 9) {
17072 GemmMicrokernelTester()
17073 .mr(3)
17074 .nr(4)
17075 .kr(2)
17076 .sr(4)
17077 .m(3)
17078 .n(4)
17079 .k(k)
17080 .a_zero_point(0)
17081 .b_zero_point(0)
17082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17083 }
17084 }
17085 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17086
17087
17088 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8)17089 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8) {
17090 TEST_REQUIRES_X86_SSE41;
17091 GemmMicrokernelTester()
17092 .mr(3)
17093 .nr(4)
17094 .kr(2)
17095 .sr(4)
17096 .m(3)
17097 .n(4)
17098 .k(8)
17099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17100 }
17101
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cn)17102 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cn) {
17103 TEST_REQUIRES_X86_SSE41;
17104 GemmMicrokernelTester()
17105 .mr(3)
17106 .nr(4)
17107 .kr(2)
17108 .sr(4)
17109 .m(3)
17110 .n(4)
17111 .k(8)
17112 .cn_stride(7)
17113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17114 }
17115
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile)17116 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile) {
17117 TEST_REQUIRES_X86_SSE41;
17118 for (uint32_t n = 1; n <= 4; n++) {
17119 for (uint32_t m = 1; m <= 3; m++) {
17120 GemmMicrokernelTester()
17121 .mr(3)
17122 .nr(4)
17123 .kr(2)
17124 .sr(4)
17125 .m(m)
17126 .n(n)
17127 .k(8)
17128 .iterations(1)
17129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17130 }
17131 }
17132 }
17133
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_m)17134 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
17135 TEST_REQUIRES_X86_SSE41;
17136 for (uint32_t m = 1; m <= 3; m++) {
17137 GemmMicrokernelTester()
17138 .mr(3)
17139 .nr(4)
17140 .kr(2)
17141 .sr(4)
17142 .m(m)
17143 .n(4)
17144 .k(8)
17145 .iterations(1)
17146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17147 }
17148 }
17149
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_n)17150 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
17151 TEST_REQUIRES_X86_SSE41;
17152 for (uint32_t n = 1; n <= 4; n++) {
17153 GemmMicrokernelTester()
17154 .mr(3)
17155 .nr(4)
17156 .kr(2)
17157 .sr(4)
17158 .m(3)
17159 .n(n)
17160 .k(8)
17161 .iterations(1)
17162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17163 }
17164 }
17165
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8)17166 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8) {
17167 TEST_REQUIRES_X86_SSE41;
17168 for (size_t k = 1; k < 8; k++) {
17169 GemmMicrokernelTester()
17170 .mr(3)
17171 .nr(4)
17172 .kr(2)
17173 .sr(4)
17174 .m(3)
17175 .n(4)
17176 .k(k)
17177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17178 }
17179 }
17180
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_subtile)17181 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_subtile) {
17182 TEST_REQUIRES_X86_SSE41;
17183 for (size_t k = 1; k < 8; k++) {
17184 for (uint32_t n = 1; n <= 4; n++) {
17185 for (uint32_t m = 1; m <= 3; m++) {
17186 GemmMicrokernelTester()
17187 .mr(3)
17188 .nr(4)
17189 .kr(2)
17190 .sr(4)
17191 .m(m)
17192 .n(n)
17193 .k(k)
17194 .iterations(1)
17195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17196 }
17197 }
17198 }
17199 }
17200
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8)17201 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8) {
17202 TEST_REQUIRES_X86_SSE41;
17203 for (size_t k = 9; k < 16; k++) {
17204 GemmMicrokernelTester()
17205 .mr(3)
17206 .nr(4)
17207 .kr(2)
17208 .sr(4)
17209 .m(3)
17210 .n(4)
17211 .k(k)
17212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17213 }
17214 }
17215
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_subtile)17216 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_subtile) {
17217 TEST_REQUIRES_X86_SSE41;
17218 for (size_t k = 9; k < 16; k++) {
17219 for (uint32_t n = 1; n <= 4; n++) {
17220 for (uint32_t m = 1; m <= 3; m++) {
17221 GemmMicrokernelTester()
17222 .mr(3)
17223 .nr(4)
17224 .kr(2)
17225 .sr(4)
17226 .m(m)
17227 .n(n)
17228 .k(k)
17229 .iterations(1)
17230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17231 }
17232 }
17233 }
17234 }
17235
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8)17236 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8) {
17237 TEST_REQUIRES_X86_SSE41;
17238 for (size_t k = 16; k <= 80; k += 8) {
17239 GemmMicrokernelTester()
17240 .mr(3)
17241 .nr(4)
17242 .kr(2)
17243 .sr(4)
17244 .m(3)
17245 .n(4)
17246 .k(k)
17247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17248 }
17249 }
17250
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_subtile)17251 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_subtile) {
17252 TEST_REQUIRES_X86_SSE41;
17253 for (size_t k = 16; k <= 80; k += 8) {
17254 for (uint32_t n = 1; n <= 4; n++) {
17255 for (uint32_t m = 1; m <= 3; m++) {
17256 GemmMicrokernelTester()
17257 .mr(3)
17258 .nr(4)
17259 .kr(2)
17260 .sr(4)
17261 .m(m)
17262 .n(n)
17263 .k(k)
17264 .iterations(1)
17265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17266 }
17267 }
17268 }
17269 }
17270
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4)17271 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4) {
17272 TEST_REQUIRES_X86_SSE41;
17273 for (uint32_t n = 5; n < 8; n++) {
17274 for (size_t k = 1; k <= 40; k += 9) {
17275 GemmMicrokernelTester()
17276 .mr(3)
17277 .nr(4)
17278 .kr(2)
17279 .sr(4)
17280 .m(3)
17281 .n(n)
17282 .k(k)
17283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17284 }
17285 }
17286 }
17287
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_cn)17288 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
17289 TEST_REQUIRES_X86_SSE41;
17290 for (uint32_t n = 5; n < 8; n++) {
17291 for (size_t k = 1; k <= 40; k += 9) {
17292 GemmMicrokernelTester()
17293 .mr(3)
17294 .nr(4)
17295 .kr(2)
17296 .sr(4)
17297 .m(3)
17298 .n(n)
17299 .k(k)
17300 .cn_stride(7)
17301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17302 }
17303 }
17304 }
17305
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_subtile)17306 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_subtile) {
17307 TEST_REQUIRES_X86_SSE41;
17308 for (uint32_t n = 5; n < 8; n++) {
17309 for (size_t k = 1; k <= 40; k += 9) {
17310 for (uint32_t m = 1; m <= 3; m++) {
17311 GemmMicrokernelTester()
17312 .mr(3)
17313 .nr(4)
17314 .kr(2)
17315 .sr(4)
17316 .m(m)
17317 .n(n)
17318 .k(k)
17319 .iterations(1)
17320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17321 }
17322 }
17323 }
17324 }
17325
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4)17326 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4) {
17327 TEST_REQUIRES_X86_SSE41;
17328 for (uint32_t n = 8; n <= 12; n += 4) {
17329 for (size_t k = 1; k <= 40; k += 9) {
17330 GemmMicrokernelTester()
17331 .mr(3)
17332 .nr(4)
17333 .kr(2)
17334 .sr(4)
17335 .m(3)
17336 .n(n)
17337 .k(k)
17338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17339 }
17340 }
17341 }
17342
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_cn)17343 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
17344 TEST_REQUIRES_X86_SSE41;
17345 for (uint32_t n = 8; n <= 12; n += 4) {
17346 for (size_t k = 1; k <= 40; k += 9) {
17347 GemmMicrokernelTester()
17348 .mr(3)
17349 .nr(4)
17350 .kr(2)
17351 .sr(4)
17352 .m(3)
17353 .n(n)
17354 .k(k)
17355 .cn_stride(7)
17356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17357 }
17358 }
17359 }
17360
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_subtile)17361 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_subtile) {
17362 TEST_REQUIRES_X86_SSE41;
17363 for (uint32_t n = 8; n <= 12; n += 4) {
17364 for (size_t k = 1; k <= 40; k += 9) {
17365 for (uint32_t m = 1; m <= 3; m++) {
17366 GemmMicrokernelTester()
17367 .mr(3)
17368 .nr(4)
17369 .kr(2)
17370 .sr(4)
17371 .m(m)
17372 .n(n)
17373 .k(k)
17374 .iterations(1)
17375 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17376 }
17377 }
17378 }
17379 }
17380
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel)17381 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel) {
17382 TEST_REQUIRES_X86_SSE41;
17383 for (size_t k = 1; k <= 40; k += 9) {
17384 GemmMicrokernelTester()
17385 .mr(3)
17386 .nr(4)
17387 .kr(2)
17388 .sr(4)
17389 .m(3)
17390 .n(4)
17391 .k(k)
17392 .ks(3)
17393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17394 }
17395 }
17396
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel_subtile)17397 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel_subtile) {
17398 TEST_REQUIRES_X86_SSE41;
17399 for (size_t k = 1; k <= 40; k += 9) {
17400 for (uint32_t n = 1; n <= 4; n++) {
17401 for (uint32_t m = 1; m <= 3; m++) {
17402 GemmMicrokernelTester()
17403 .mr(3)
17404 .nr(4)
17405 .kr(2)
17406 .sr(4)
17407 .m(m)
17408 .n(n)
17409 .k(k)
17410 .ks(3)
17411 .iterations(1)
17412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17413 }
17414 }
17415 }
17416 }
17417
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_small_kernel)17418 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
17419 TEST_REQUIRES_X86_SSE41;
17420 for (uint32_t n = 5; n < 8; n++) {
17421 for (size_t k = 1; k <= 40; k += 9) {
17422 GemmMicrokernelTester()
17423 .mr(3)
17424 .nr(4)
17425 .kr(2)
17426 .sr(4)
17427 .m(3)
17428 .n(n)
17429 .k(k)
17430 .ks(3)
17431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17432 }
17433 }
17434 }
17435
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_small_kernel)17436 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
17437 TEST_REQUIRES_X86_SSE41;
17438 for (uint32_t n = 8; n <= 12; n += 4) {
17439 for (size_t k = 1; k <= 40; k += 9) {
17440 GemmMicrokernelTester()
17441 .mr(3)
17442 .nr(4)
17443 .kr(2)
17444 .sr(4)
17445 .m(3)
17446 .n(n)
17447 .k(k)
17448 .ks(3)
17449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17450 }
17451 }
17452 }
17453
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm_subtile)17454 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm_subtile) {
17455 TEST_REQUIRES_X86_SSE41;
17456 for (size_t k = 1; k <= 40; k += 9) {
17457 for (uint32_t n = 1; n <= 4; n++) {
17458 for (uint32_t m = 1; m <= 3; m++) {
17459 GemmMicrokernelTester()
17460 .mr(3)
17461 .nr(4)
17462 .kr(2)
17463 .sr(4)
17464 .m(m)
17465 .n(n)
17466 .k(k)
17467 .cm_stride(7)
17468 .iterations(1)
17469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17470 }
17471 }
17472 }
17473 }
17474
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,a_offset)17475 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, a_offset) {
17476 TEST_REQUIRES_X86_SSE41;
17477 for (size_t k = 1; k <= 40; k += 9) {
17478 GemmMicrokernelTester()
17479 .mr(3)
17480 .nr(4)
17481 .kr(2)
17482 .sr(4)
17483 .m(3)
17484 .n(4)
17485 .k(k)
17486 .ks(3)
17487 .a_offset(127)
17488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17489 }
17490 }
17491
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,zero)17492 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, zero) {
17493 TEST_REQUIRES_X86_SSE41;
17494 for (size_t k = 1; k <= 40; k += 9) {
17495 for (uint32_t mz = 0; mz < 3; mz++) {
17496 GemmMicrokernelTester()
17497 .mr(3)
17498 .nr(4)
17499 .kr(2)
17500 .sr(4)
17501 .m(3)
17502 .n(4)
17503 .k(k)
17504 .ks(3)
17505 .a_offset(127)
17506 .zero_index(mz)
17507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17508 }
17509 }
17510 }
17511
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmin)17512 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmin) {
17513 TEST_REQUIRES_X86_SSE41;
17514 GemmMicrokernelTester()
17515 .mr(3)
17516 .nr(4)
17517 .kr(2)
17518 .sr(4)
17519 .m(3)
17520 .n(4)
17521 .k(8)
17522 .qmin(128)
17523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17524 }
17525
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmax)17526 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmax) {
17527 TEST_REQUIRES_X86_SSE41;
17528 GemmMicrokernelTester()
17529 .mr(3)
17530 .nr(4)
17531 .kr(2)
17532 .sr(4)
17533 .m(3)
17534 .n(4)
17535 .k(8)
17536 .qmax(128)
17537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17538 }
17539
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm)17540 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm) {
17541 TEST_REQUIRES_X86_SSE41;
17542 GemmMicrokernelTester()
17543 .mr(3)
17544 .nr(4)
17545 .kr(2)
17546 .sr(4)
17547 .m(3)
17548 .n(4)
17549 .k(8)
17550 .cm_stride(7)
17551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17552 }
17553
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_a_zero_point)17554 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_a_zero_point) {
17555 TEST_REQUIRES_X86_SSE41;
17556 for (size_t k = 1; k <= 40; k += 9) {
17557 GemmMicrokernelTester()
17558 .mr(3)
17559 .nr(4)
17560 .kr(2)
17561 .sr(4)
17562 .m(3)
17563 .n(4)
17564 .k(k)
17565 .a_zero_point(0)
17566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17567 }
17568 }
17569
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_b_zero_point)17570 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_b_zero_point) {
17571 TEST_REQUIRES_X86_SSE41;
17572 for (size_t k = 1; k <= 40; k += 9) {
17573 GemmMicrokernelTester()
17574 .mr(3)
17575 .nr(4)
17576 .kr(2)
17577 .sr(4)
17578 .m(3)
17579 .n(4)
17580 .k(k)
17581 .b_zero_point(0)
17582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17583 }
17584 }
17585
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_zero_point)17586 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_zero_point) {
17587 TEST_REQUIRES_X86_SSE41;
17588 for (size_t k = 1; k <= 40; k += 9) {
17589 GemmMicrokernelTester()
17590 .mr(3)
17591 .nr(4)
17592 .kr(2)
17593 .sr(4)
17594 .m(3)
17595 .n(4)
17596 .k(k)
17597 .a_zero_point(0)
17598 .b_zero_point(0)
17599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17600 }
17601 }
17602 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17603
17604
17605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)17606 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
17607 TEST_REQUIRES_X86_AVX;
17608 GemmMicrokernelTester()
17609 .mr(1)
17610 .nr(4)
17611 .kr(2)
17612 .sr(4)
17613 .m(1)
17614 .n(4)
17615 .k(8)
17616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17617 }
17618
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)17619 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
17620 TEST_REQUIRES_X86_AVX;
17621 GemmMicrokernelTester()
17622 .mr(1)
17623 .nr(4)
17624 .kr(2)
17625 .sr(4)
17626 .m(1)
17627 .n(4)
17628 .k(8)
17629 .cn_stride(7)
17630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17631 }
17632
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)17633 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
17634 TEST_REQUIRES_X86_AVX;
17635 for (uint32_t n = 1; n <= 4; n++) {
17636 for (uint32_t m = 1; m <= 1; m++) {
17637 GemmMicrokernelTester()
17638 .mr(1)
17639 .nr(4)
17640 .kr(2)
17641 .sr(4)
17642 .m(m)
17643 .n(n)
17644 .k(8)
17645 .iterations(1)
17646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17647 }
17648 }
17649 }
17650
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)17651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
17652 TEST_REQUIRES_X86_AVX;
17653 for (uint32_t m = 1; m <= 1; m++) {
17654 GemmMicrokernelTester()
17655 .mr(1)
17656 .nr(4)
17657 .kr(2)
17658 .sr(4)
17659 .m(m)
17660 .n(4)
17661 .k(8)
17662 .iterations(1)
17663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17664 }
17665 }
17666
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)17667 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
17668 TEST_REQUIRES_X86_AVX;
17669 for (uint32_t n = 1; n <= 4; n++) {
17670 GemmMicrokernelTester()
17671 .mr(1)
17672 .nr(4)
17673 .kr(2)
17674 .sr(4)
17675 .m(1)
17676 .n(n)
17677 .k(8)
17678 .iterations(1)
17679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17680 }
17681 }
17682
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)17683 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
17684 TEST_REQUIRES_X86_AVX;
17685 for (size_t k = 1; k < 8; k++) {
17686 GemmMicrokernelTester()
17687 .mr(1)
17688 .nr(4)
17689 .kr(2)
17690 .sr(4)
17691 .m(1)
17692 .n(4)
17693 .k(k)
17694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17695 }
17696 }
17697
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)17698 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
17699 TEST_REQUIRES_X86_AVX;
17700 for (size_t k = 1; k < 8; k++) {
17701 for (uint32_t n = 1; n <= 4; n++) {
17702 for (uint32_t m = 1; m <= 1; m++) {
17703 GemmMicrokernelTester()
17704 .mr(1)
17705 .nr(4)
17706 .kr(2)
17707 .sr(4)
17708 .m(m)
17709 .n(n)
17710 .k(k)
17711 .iterations(1)
17712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17713 }
17714 }
17715 }
17716 }
17717
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)17718 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
17719 TEST_REQUIRES_X86_AVX;
17720 for (size_t k = 9; k < 16; k++) {
17721 GemmMicrokernelTester()
17722 .mr(1)
17723 .nr(4)
17724 .kr(2)
17725 .sr(4)
17726 .m(1)
17727 .n(4)
17728 .k(k)
17729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17730 }
17731 }
17732
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)17733 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
17734 TEST_REQUIRES_X86_AVX;
17735 for (size_t k = 9; k < 16; k++) {
17736 for (uint32_t n = 1; n <= 4; n++) {
17737 for (uint32_t m = 1; m <= 1; m++) {
17738 GemmMicrokernelTester()
17739 .mr(1)
17740 .nr(4)
17741 .kr(2)
17742 .sr(4)
17743 .m(m)
17744 .n(n)
17745 .k(k)
17746 .iterations(1)
17747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17748 }
17749 }
17750 }
17751 }
17752
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)17753 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
17754 TEST_REQUIRES_X86_AVX;
17755 for (size_t k = 16; k <= 80; k += 8) {
17756 GemmMicrokernelTester()
17757 .mr(1)
17758 .nr(4)
17759 .kr(2)
17760 .sr(4)
17761 .m(1)
17762 .n(4)
17763 .k(k)
17764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17765 }
17766 }
17767
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)17768 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
17769 TEST_REQUIRES_X86_AVX;
17770 for (size_t k = 16; k <= 80; k += 8) {
17771 for (uint32_t n = 1; n <= 4; n++) {
17772 for (uint32_t m = 1; m <= 1; m++) {
17773 GemmMicrokernelTester()
17774 .mr(1)
17775 .nr(4)
17776 .kr(2)
17777 .sr(4)
17778 .m(m)
17779 .n(n)
17780 .k(k)
17781 .iterations(1)
17782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17783 }
17784 }
17785 }
17786 }
17787
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)17788 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
17789 TEST_REQUIRES_X86_AVX;
17790 for (uint32_t n = 5; n < 8; n++) {
17791 for (size_t k = 1; k <= 40; k += 9) {
17792 GemmMicrokernelTester()
17793 .mr(1)
17794 .nr(4)
17795 .kr(2)
17796 .sr(4)
17797 .m(1)
17798 .n(n)
17799 .k(k)
17800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17801 }
17802 }
17803 }
17804
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)17805 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
17806 TEST_REQUIRES_X86_AVX;
17807 for (uint32_t n = 5; n < 8; n++) {
17808 for (size_t k = 1; k <= 40; k += 9) {
17809 GemmMicrokernelTester()
17810 .mr(1)
17811 .nr(4)
17812 .kr(2)
17813 .sr(4)
17814 .m(1)
17815 .n(n)
17816 .k(k)
17817 .cn_stride(7)
17818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17819 }
17820 }
17821 }
17822
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)17823 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
17824 TEST_REQUIRES_X86_AVX;
17825 for (uint32_t n = 5; n < 8; n++) {
17826 for (size_t k = 1; k <= 40; k += 9) {
17827 for (uint32_t m = 1; m <= 1; m++) {
17828 GemmMicrokernelTester()
17829 .mr(1)
17830 .nr(4)
17831 .kr(2)
17832 .sr(4)
17833 .m(m)
17834 .n(n)
17835 .k(k)
17836 .iterations(1)
17837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17838 }
17839 }
17840 }
17841 }
17842
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)17843 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
17844 TEST_REQUIRES_X86_AVX;
17845 for (uint32_t n = 8; n <= 12; n += 4) {
17846 for (size_t k = 1; k <= 40; k += 9) {
17847 GemmMicrokernelTester()
17848 .mr(1)
17849 .nr(4)
17850 .kr(2)
17851 .sr(4)
17852 .m(1)
17853 .n(n)
17854 .k(k)
17855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17856 }
17857 }
17858 }
17859
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)17860 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
17861 TEST_REQUIRES_X86_AVX;
17862 for (uint32_t n = 8; n <= 12; n += 4) {
17863 for (size_t k = 1; k <= 40; k += 9) {
17864 GemmMicrokernelTester()
17865 .mr(1)
17866 .nr(4)
17867 .kr(2)
17868 .sr(4)
17869 .m(1)
17870 .n(n)
17871 .k(k)
17872 .cn_stride(7)
17873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17874 }
17875 }
17876 }
17877
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)17878 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
17879 TEST_REQUIRES_X86_AVX;
17880 for (uint32_t n = 8; n <= 12; n += 4) {
17881 for (size_t k = 1; k <= 40; k += 9) {
17882 for (uint32_t m = 1; m <= 1; m++) {
17883 GemmMicrokernelTester()
17884 .mr(1)
17885 .nr(4)
17886 .kr(2)
17887 .sr(4)
17888 .m(m)
17889 .n(n)
17890 .k(k)
17891 .iterations(1)
17892 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17893 }
17894 }
17895 }
17896 }
17897
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)17898 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
17899 TEST_REQUIRES_X86_AVX;
17900 for (size_t k = 1; k <= 40; k += 9) {
17901 GemmMicrokernelTester()
17902 .mr(1)
17903 .nr(4)
17904 .kr(2)
17905 .sr(4)
17906 .m(1)
17907 .n(4)
17908 .k(k)
17909 .ks(3)
17910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17911 }
17912 }
17913
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)17914 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
17915 TEST_REQUIRES_X86_AVX;
17916 for (size_t k = 1; k <= 40; k += 9) {
17917 for (uint32_t n = 1; n <= 4; n++) {
17918 for (uint32_t m = 1; m <= 1; m++) {
17919 GemmMicrokernelTester()
17920 .mr(1)
17921 .nr(4)
17922 .kr(2)
17923 .sr(4)
17924 .m(m)
17925 .n(n)
17926 .k(k)
17927 .ks(3)
17928 .iterations(1)
17929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17930 }
17931 }
17932 }
17933 }
17934
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)17935 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
17936 TEST_REQUIRES_X86_AVX;
17937 for (uint32_t n = 5; n < 8; n++) {
17938 for (size_t k = 1; k <= 40; k += 9) {
17939 GemmMicrokernelTester()
17940 .mr(1)
17941 .nr(4)
17942 .kr(2)
17943 .sr(4)
17944 .m(1)
17945 .n(n)
17946 .k(k)
17947 .ks(3)
17948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17949 }
17950 }
17951 }
17952
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)17953 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
17954 TEST_REQUIRES_X86_AVX;
17955 for (uint32_t n = 8; n <= 12; n += 4) {
17956 for (size_t k = 1; k <= 40; k += 9) {
17957 GemmMicrokernelTester()
17958 .mr(1)
17959 .nr(4)
17960 .kr(2)
17961 .sr(4)
17962 .m(1)
17963 .n(n)
17964 .k(k)
17965 .ks(3)
17966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17967 }
17968 }
17969 }
17970
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)17971 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
17972 TEST_REQUIRES_X86_AVX;
17973 for (size_t k = 1; k <= 40; k += 9) {
17974 for (uint32_t n = 1; n <= 4; n++) {
17975 for (uint32_t m = 1; m <= 1; m++) {
17976 GemmMicrokernelTester()
17977 .mr(1)
17978 .nr(4)
17979 .kr(2)
17980 .sr(4)
17981 .m(m)
17982 .n(n)
17983 .k(k)
17984 .cm_stride(7)
17985 .iterations(1)
17986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17987 }
17988 }
17989 }
17990 }
17991
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)17992 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
17993 TEST_REQUIRES_X86_AVX;
17994 for (size_t k = 1; k <= 40; k += 9) {
17995 GemmMicrokernelTester()
17996 .mr(1)
17997 .nr(4)
17998 .kr(2)
17999 .sr(4)
18000 .m(1)
18001 .n(4)
18002 .k(k)
18003 .ks(3)
18004 .a_offset(43)
18005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18006 }
18007 }
18008
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)18009 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
18010 TEST_REQUIRES_X86_AVX;
18011 for (size_t k = 1; k <= 40; k += 9) {
18012 for (uint32_t mz = 0; mz < 1; mz++) {
18013 GemmMicrokernelTester()
18014 .mr(1)
18015 .nr(4)
18016 .kr(2)
18017 .sr(4)
18018 .m(1)
18019 .n(4)
18020 .k(k)
18021 .ks(3)
18022 .a_offset(43)
18023 .zero_index(mz)
18024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)18029 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
18030 TEST_REQUIRES_X86_AVX;
18031 GemmMicrokernelTester()
18032 .mr(1)
18033 .nr(4)
18034 .kr(2)
18035 .sr(4)
18036 .m(1)
18037 .n(4)
18038 .k(8)
18039 .qmin(128)
18040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18041 }
18042
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)18043 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
18044 TEST_REQUIRES_X86_AVX;
18045 GemmMicrokernelTester()
18046 .mr(1)
18047 .nr(4)
18048 .kr(2)
18049 .sr(4)
18050 .m(1)
18051 .n(4)
18052 .k(8)
18053 .qmax(128)
18054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18055 }
18056
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)18057 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
18058 TEST_REQUIRES_X86_AVX;
18059 GemmMicrokernelTester()
18060 .mr(1)
18061 .nr(4)
18062 .kr(2)
18063 .sr(4)
18064 .m(1)
18065 .n(4)
18066 .k(8)
18067 .cm_stride(7)
18068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18069 }
18070
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_a_zero_point)18071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_a_zero_point) {
18072 TEST_REQUIRES_X86_AVX;
18073 for (size_t k = 1; k <= 40; k += 9) {
18074 GemmMicrokernelTester()
18075 .mr(1)
18076 .nr(4)
18077 .kr(2)
18078 .sr(4)
18079 .m(1)
18080 .n(4)
18081 .k(k)
18082 .a_zero_point(0)
18083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18084 }
18085 }
18086
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_b_zero_point)18087 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_b_zero_point) {
18088 TEST_REQUIRES_X86_AVX;
18089 for (size_t k = 1; k <= 40; k += 9) {
18090 GemmMicrokernelTester()
18091 .mr(1)
18092 .nr(4)
18093 .kr(2)
18094 .sr(4)
18095 .m(1)
18096 .n(4)
18097 .k(k)
18098 .b_zero_point(0)
18099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18100 }
18101 }
18102
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_zero_point)18103 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_zero_point) {
18104 TEST_REQUIRES_X86_AVX;
18105 for (size_t k = 1; k <= 40; k += 9) {
18106 GemmMicrokernelTester()
18107 .mr(1)
18108 .nr(4)
18109 .kr(2)
18110 .sr(4)
18111 .m(1)
18112 .n(4)
18113 .k(k)
18114 .a_zero_point(0)
18115 .b_zero_point(0)
18116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18117 }
18118 }
18119 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18120
18121
18122 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)18123 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
18124 TEST_REQUIRES_X86_AVX;
18125 GemmMicrokernelTester()
18126 .mr(2)
18127 .nr(4)
18128 .kr(2)
18129 .sr(4)
18130 .m(2)
18131 .n(4)
18132 .k(8)
18133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18134 }
18135
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)18136 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
18137 TEST_REQUIRES_X86_AVX;
18138 GemmMicrokernelTester()
18139 .mr(2)
18140 .nr(4)
18141 .kr(2)
18142 .sr(4)
18143 .m(2)
18144 .n(4)
18145 .k(8)
18146 .cn_stride(7)
18147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18148 }
18149
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)18150 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
18151 TEST_REQUIRES_X86_AVX;
18152 for (uint32_t n = 1; n <= 4; n++) {
18153 for (uint32_t m = 1; m <= 2; m++) {
18154 GemmMicrokernelTester()
18155 .mr(2)
18156 .nr(4)
18157 .kr(2)
18158 .sr(4)
18159 .m(m)
18160 .n(n)
18161 .k(8)
18162 .iterations(1)
18163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18164 }
18165 }
18166 }
18167
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)18168 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
18169 TEST_REQUIRES_X86_AVX;
18170 for (uint32_t m = 1; m <= 2; m++) {
18171 GemmMicrokernelTester()
18172 .mr(2)
18173 .nr(4)
18174 .kr(2)
18175 .sr(4)
18176 .m(m)
18177 .n(4)
18178 .k(8)
18179 .iterations(1)
18180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18181 }
18182 }
18183
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)18184 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
18185 TEST_REQUIRES_X86_AVX;
18186 for (uint32_t n = 1; n <= 4; n++) {
18187 GemmMicrokernelTester()
18188 .mr(2)
18189 .nr(4)
18190 .kr(2)
18191 .sr(4)
18192 .m(2)
18193 .n(n)
18194 .k(8)
18195 .iterations(1)
18196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18197 }
18198 }
18199
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)18200 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
18201 TEST_REQUIRES_X86_AVX;
18202 for (size_t k = 1; k < 8; k++) {
18203 GemmMicrokernelTester()
18204 .mr(2)
18205 .nr(4)
18206 .kr(2)
18207 .sr(4)
18208 .m(2)
18209 .n(4)
18210 .k(k)
18211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18212 }
18213 }
18214
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)18215 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
18216 TEST_REQUIRES_X86_AVX;
18217 for (size_t k = 1; k < 8; k++) {
18218 for (uint32_t n = 1; n <= 4; n++) {
18219 for (uint32_t m = 1; m <= 2; m++) {
18220 GemmMicrokernelTester()
18221 .mr(2)
18222 .nr(4)
18223 .kr(2)
18224 .sr(4)
18225 .m(m)
18226 .n(n)
18227 .k(k)
18228 .iterations(1)
18229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18230 }
18231 }
18232 }
18233 }
18234
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)18235 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
18236 TEST_REQUIRES_X86_AVX;
18237 for (size_t k = 9; k < 16; k++) {
18238 GemmMicrokernelTester()
18239 .mr(2)
18240 .nr(4)
18241 .kr(2)
18242 .sr(4)
18243 .m(2)
18244 .n(4)
18245 .k(k)
18246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18247 }
18248 }
18249
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)18250 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
18251 TEST_REQUIRES_X86_AVX;
18252 for (size_t k = 9; k < 16; k++) {
18253 for (uint32_t n = 1; n <= 4; n++) {
18254 for (uint32_t m = 1; m <= 2; m++) {
18255 GemmMicrokernelTester()
18256 .mr(2)
18257 .nr(4)
18258 .kr(2)
18259 .sr(4)
18260 .m(m)
18261 .n(n)
18262 .k(k)
18263 .iterations(1)
18264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18265 }
18266 }
18267 }
18268 }
18269
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)18270 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
18271 TEST_REQUIRES_X86_AVX;
18272 for (size_t k = 16; k <= 80; k += 8) {
18273 GemmMicrokernelTester()
18274 .mr(2)
18275 .nr(4)
18276 .kr(2)
18277 .sr(4)
18278 .m(2)
18279 .n(4)
18280 .k(k)
18281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18282 }
18283 }
18284
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)18285 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
18286 TEST_REQUIRES_X86_AVX;
18287 for (size_t k = 16; k <= 80; k += 8) {
18288 for (uint32_t n = 1; n <= 4; n++) {
18289 for (uint32_t m = 1; m <= 2; m++) {
18290 GemmMicrokernelTester()
18291 .mr(2)
18292 .nr(4)
18293 .kr(2)
18294 .sr(4)
18295 .m(m)
18296 .n(n)
18297 .k(k)
18298 .iterations(1)
18299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18300 }
18301 }
18302 }
18303 }
18304
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)18305 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
18306 TEST_REQUIRES_X86_AVX;
18307 for (uint32_t n = 5; n < 8; n++) {
18308 for (size_t k = 1; k <= 40; k += 9) {
18309 GemmMicrokernelTester()
18310 .mr(2)
18311 .nr(4)
18312 .kr(2)
18313 .sr(4)
18314 .m(2)
18315 .n(n)
18316 .k(k)
18317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18318 }
18319 }
18320 }
18321
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)18322 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
18323 TEST_REQUIRES_X86_AVX;
18324 for (uint32_t n = 5; n < 8; n++) {
18325 for (size_t k = 1; k <= 40; k += 9) {
18326 GemmMicrokernelTester()
18327 .mr(2)
18328 .nr(4)
18329 .kr(2)
18330 .sr(4)
18331 .m(2)
18332 .n(n)
18333 .k(k)
18334 .cn_stride(7)
18335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18336 }
18337 }
18338 }
18339
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)18340 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
18341 TEST_REQUIRES_X86_AVX;
18342 for (uint32_t n = 5; n < 8; n++) {
18343 for (size_t k = 1; k <= 40; k += 9) {
18344 for (uint32_t m = 1; m <= 2; m++) {
18345 GemmMicrokernelTester()
18346 .mr(2)
18347 .nr(4)
18348 .kr(2)
18349 .sr(4)
18350 .m(m)
18351 .n(n)
18352 .k(k)
18353 .iterations(1)
18354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18355 }
18356 }
18357 }
18358 }
18359
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)18360 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
18361 TEST_REQUIRES_X86_AVX;
18362 for (uint32_t n = 8; n <= 12; n += 4) {
18363 for (size_t k = 1; k <= 40; k += 9) {
18364 GemmMicrokernelTester()
18365 .mr(2)
18366 .nr(4)
18367 .kr(2)
18368 .sr(4)
18369 .m(2)
18370 .n(n)
18371 .k(k)
18372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18373 }
18374 }
18375 }
18376
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)18377 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
18378 TEST_REQUIRES_X86_AVX;
18379 for (uint32_t n = 8; n <= 12; n += 4) {
18380 for (size_t k = 1; k <= 40; k += 9) {
18381 GemmMicrokernelTester()
18382 .mr(2)
18383 .nr(4)
18384 .kr(2)
18385 .sr(4)
18386 .m(2)
18387 .n(n)
18388 .k(k)
18389 .cn_stride(7)
18390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18391 }
18392 }
18393 }
18394
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)18395 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
18396 TEST_REQUIRES_X86_AVX;
18397 for (uint32_t n = 8; n <= 12; n += 4) {
18398 for (size_t k = 1; k <= 40; k += 9) {
18399 for (uint32_t m = 1; m <= 2; m++) {
18400 GemmMicrokernelTester()
18401 .mr(2)
18402 .nr(4)
18403 .kr(2)
18404 .sr(4)
18405 .m(m)
18406 .n(n)
18407 .k(k)
18408 .iterations(1)
18409 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18410 }
18411 }
18412 }
18413 }
18414
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel)18415 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel) {
18416 TEST_REQUIRES_X86_AVX;
18417 for (size_t k = 1; k <= 40; k += 9) {
18418 GemmMicrokernelTester()
18419 .mr(2)
18420 .nr(4)
18421 .kr(2)
18422 .sr(4)
18423 .m(2)
18424 .n(4)
18425 .k(k)
18426 .ks(3)
18427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18428 }
18429 }
18430
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel_subtile)18431 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel_subtile) {
18432 TEST_REQUIRES_X86_AVX;
18433 for (size_t k = 1; k <= 40; k += 9) {
18434 for (uint32_t n = 1; n <= 4; n++) {
18435 for (uint32_t m = 1; m <= 2; m++) {
18436 GemmMicrokernelTester()
18437 .mr(2)
18438 .nr(4)
18439 .kr(2)
18440 .sr(4)
18441 .m(m)
18442 .n(n)
18443 .k(k)
18444 .ks(3)
18445 .iterations(1)
18446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18447 }
18448 }
18449 }
18450 }
18451
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_small_kernel)18452 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
18453 TEST_REQUIRES_X86_AVX;
18454 for (uint32_t n = 5; n < 8; n++) {
18455 for (size_t k = 1; k <= 40; k += 9) {
18456 GemmMicrokernelTester()
18457 .mr(2)
18458 .nr(4)
18459 .kr(2)
18460 .sr(4)
18461 .m(2)
18462 .n(n)
18463 .k(k)
18464 .ks(3)
18465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18466 }
18467 }
18468 }
18469
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_small_kernel)18470 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_small_kernel) {
18471 TEST_REQUIRES_X86_AVX;
18472 for (uint32_t n = 8; n <= 12; n += 4) {
18473 for (size_t k = 1; k <= 40; k += 9) {
18474 GemmMicrokernelTester()
18475 .mr(2)
18476 .nr(4)
18477 .kr(2)
18478 .sr(4)
18479 .m(2)
18480 .n(n)
18481 .k(k)
18482 .ks(3)
18483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18484 }
18485 }
18486 }
18487
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)18488 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
18489 TEST_REQUIRES_X86_AVX;
18490 for (size_t k = 1; k <= 40; k += 9) {
18491 for (uint32_t n = 1; n <= 4; n++) {
18492 for (uint32_t m = 1; m <= 2; m++) {
18493 GemmMicrokernelTester()
18494 .mr(2)
18495 .nr(4)
18496 .kr(2)
18497 .sr(4)
18498 .m(m)
18499 .n(n)
18500 .k(k)
18501 .cm_stride(7)
18502 .iterations(1)
18503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18504 }
18505 }
18506 }
18507 }
18508
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,a_offset)18509 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, a_offset) {
18510 TEST_REQUIRES_X86_AVX;
18511 for (size_t k = 1; k <= 40; k += 9) {
18512 GemmMicrokernelTester()
18513 .mr(2)
18514 .nr(4)
18515 .kr(2)
18516 .sr(4)
18517 .m(2)
18518 .n(4)
18519 .k(k)
18520 .ks(3)
18521 .a_offset(83)
18522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18523 }
18524 }
18525
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,zero)18526 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, zero) {
18527 TEST_REQUIRES_X86_AVX;
18528 for (size_t k = 1; k <= 40; k += 9) {
18529 for (uint32_t mz = 0; mz < 2; mz++) {
18530 GemmMicrokernelTester()
18531 .mr(2)
18532 .nr(4)
18533 .kr(2)
18534 .sr(4)
18535 .m(2)
18536 .n(4)
18537 .k(k)
18538 .ks(3)
18539 .a_offset(83)
18540 .zero_index(mz)
18541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18542 }
18543 }
18544 }
18545
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)18546 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
18547 TEST_REQUIRES_X86_AVX;
18548 GemmMicrokernelTester()
18549 .mr(2)
18550 .nr(4)
18551 .kr(2)
18552 .sr(4)
18553 .m(2)
18554 .n(4)
18555 .k(8)
18556 .qmin(128)
18557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18558 }
18559
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)18560 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
18561 TEST_REQUIRES_X86_AVX;
18562 GemmMicrokernelTester()
18563 .mr(2)
18564 .nr(4)
18565 .kr(2)
18566 .sr(4)
18567 .m(2)
18568 .n(4)
18569 .k(8)
18570 .qmax(128)
18571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18572 }
18573
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)18574 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
18575 TEST_REQUIRES_X86_AVX;
18576 GemmMicrokernelTester()
18577 .mr(2)
18578 .nr(4)
18579 .kr(2)
18580 .sr(4)
18581 .m(2)
18582 .n(4)
18583 .k(8)
18584 .cm_stride(7)
18585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18586 }
18587
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_a_zero_point)18588 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_a_zero_point) {
18589 TEST_REQUIRES_X86_AVX;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 GemmMicrokernelTester()
18592 .mr(2)
18593 .nr(4)
18594 .kr(2)
18595 .sr(4)
18596 .m(2)
18597 .n(4)
18598 .k(k)
18599 .a_zero_point(0)
18600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18601 }
18602 }
18603
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_b_zero_point)18604 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_b_zero_point) {
18605 TEST_REQUIRES_X86_AVX;
18606 for (size_t k = 1; k <= 40; k += 9) {
18607 GemmMicrokernelTester()
18608 .mr(2)
18609 .nr(4)
18610 .kr(2)
18611 .sr(4)
18612 .m(2)
18613 .n(4)
18614 .k(k)
18615 .b_zero_point(0)
18616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18617 }
18618 }
18619
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_zero_point)18620 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_zero_point) {
18621 TEST_REQUIRES_X86_AVX;
18622 for (size_t k = 1; k <= 40; k += 9) {
18623 GemmMicrokernelTester()
18624 .mr(2)
18625 .nr(4)
18626 .kr(2)
18627 .sr(4)
18628 .m(2)
18629 .n(4)
18630 .k(k)
18631 .a_zero_point(0)
18632 .b_zero_point(0)
18633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18634 }
18635 }
18636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18637
18638
18639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)18640 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
18641 TEST_REQUIRES_X86_XOP;
18642 GemmMicrokernelTester()
18643 .mr(3)
18644 .nr(4)
18645 .kr(2)
18646 .sr(4)
18647 .m(3)
18648 .n(4)
18649 .k(8)
18650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18651 }
18652
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)18653 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
18654 TEST_REQUIRES_X86_XOP;
18655 GemmMicrokernelTester()
18656 .mr(3)
18657 .nr(4)
18658 .kr(2)
18659 .sr(4)
18660 .m(3)
18661 .n(4)
18662 .k(8)
18663 .cn_stride(7)
18664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18665 }
18666
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)18667 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
18668 TEST_REQUIRES_X86_XOP;
18669 for (uint32_t n = 1; n <= 4; n++) {
18670 for (uint32_t m = 1; m <= 3; m++) {
18671 GemmMicrokernelTester()
18672 .mr(3)
18673 .nr(4)
18674 .kr(2)
18675 .sr(4)
18676 .m(m)
18677 .n(n)
18678 .k(8)
18679 .iterations(1)
18680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18681 }
18682 }
18683 }
18684
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)18685 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
18686 TEST_REQUIRES_X86_XOP;
18687 for (uint32_t m = 1; m <= 3; m++) {
18688 GemmMicrokernelTester()
18689 .mr(3)
18690 .nr(4)
18691 .kr(2)
18692 .sr(4)
18693 .m(m)
18694 .n(4)
18695 .k(8)
18696 .iterations(1)
18697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18698 }
18699 }
18700
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)18701 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
18702 TEST_REQUIRES_X86_XOP;
18703 for (uint32_t n = 1; n <= 4; n++) {
18704 GemmMicrokernelTester()
18705 .mr(3)
18706 .nr(4)
18707 .kr(2)
18708 .sr(4)
18709 .m(3)
18710 .n(n)
18711 .k(8)
18712 .iterations(1)
18713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18714 }
18715 }
18716
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)18717 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
18718 TEST_REQUIRES_X86_XOP;
18719 for (size_t k = 1; k < 8; k++) {
18720 GemmMicrokernelTester()
18721 .mr(3)
18722 .nr(4)
18723 .kr(2)
18724 .sr(4)
18725 .m(3)
18726 .n(4)
18727 .k(k)
18728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18729 }
18730 }
18731
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)18732 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
18733 TEST_REQUIRES_X86_XOP;
18734 for (size_t k = 1; k < 8; k++) {
18735 for (uint32_t n = 1; n <= 4; n++) {
18736 for (uint32_t m = 1; m <= 3; m++) {
18737 GemmMicrokernelTester()
18738 .mr(3)
18739 .nr(4)
18740 .kr(2)
18741 .sr(4)
18742 .m(m)
18743 .n(n)
18744 .k(k)
18745 .iterations(1)
18746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18747 }
18748 }
18749 }
18750 }
18751
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)18752 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
18753 TEST_REQUIRES_X86_XOP;
18754 for (size_t k = 9; k < 16; k++) {
18755 GemmMicrokernelTester()
18756 .mr(3)
18757 .nr(4)
18758 .kr(2)
18759 .sr(4)
18760 .m(3)
18761 .n(4)
18762 .k(k)
18763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18764 }
18765 }
18766
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)18767 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
18768 TEST_REQUIRES_X86_XOP;
18769 for (size_t k = 9; k < 16; k++) {
18770 for (uint32_t n = 1; n <= 4; n++) {
18771 for (uint32_t m = 1; m <= 3; m++) {
18772 GemmMicrokernelTester()
18773 .mr(3)
18774 .nr(4)
18775 .kr(2)
18776 .sr(4)
18777 .m(m)
18778 .n(n)
18779 .k(k)
18780 .iterations(1)
18781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18782 }
18783 }
18784 }
18785 }
18786
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)18787 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
18788 TEST_REQUIRES_X86_XOP;
18789 for (size_t k = 16; k <= 80; k += 8) {
18790 GemmMicrokernelTester()
18791 .mr(3)
18792 .nr(4)
18793 .kr(2)
18794 .sr(4)
18795 .m(3)
18796 .n(4)
18797 .k(k)
18798 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18799 }
18800 }
18801
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)18802 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
18803 TEST_REQUIRES_X86_XOP;
18804 for (size_t k = 16; k <= 80; k += 8) {
18805 for (uint32_t n = 1; n <= 4; n++) {
18806 for (uint32_t m = 1; m <= 3; m++) {
18807 GemmMicrokernelTester()
18808 .mr(3)
18809 .nr(4)
18810 .kr(2)
18811 .sr(4)
18812 .m(m)
18813 .n(n)
18814 .k(k)
18815 .iterations(1)
18816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18817 }
18818 }
18819 }
18820 }
18821
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)18822 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
18823 TEST_REQUIRES_X86_XOP;
18824 for (uint32_t n = 5; n < 8; n++) {
18825 for (size_t k = 1; k <= 40; k += 9) {
18826 GemmMicrokernelTester()
18827 .mr(3)
18828 .nr(4)
18829 .kr(2)
18830 .sr(4)
18831 .m(3)
18832 .n(n)
18833 .k(k)
18834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18835 }
18836 }
18837 }
18838
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)18839 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
18840 TEST_REQUIRES_X86_XOP;
18841 for (uint32_t n = 5; n < 8; n++) {
18842 for (size_t k = 1; k <= 40; k += 9) {
18843 GemmMicrokernelTester()
18844 .mr(3)
18845 .nr(4)
18846 .kr(2)
18847 .sr(4)
18848 .m(3)
18849 .n(n)
18850 .k(k)
18851 .cn_stride(7)
18852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18853 }
18854 }
18855 }
18856
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)18857 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
18858 TEST_REQUIRES_X86_XOP;
18859 for (uint32_t n = 5; n < 8; n++) {
18860 for (size_t k = 1; k <= 40; k += 9) {
18861 for (uint32_t m = 1; m <= 3; m++) {
18862 GemmMicrokernelTester()
18863 .mr(3)
18864 .nr(4)
18865 .kr(2)
18866 .sr(4)
18867 .m(m)
18868 .n(n)
18869 .k(k)
18870 .iterations(1)
18871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18872 }
18873 }
18874 }
18875 }
18876
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)18877 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
18878 TEST_REQUIRES_X86_XOP;
18879 for (uint32_t n = 8; n <= 12; n += 4) {
18880 for (size_t k = 1; k <= 40; k += 9) {
18881 GemmMicrokernelTester()
18882 .mr(3)
18883 .nr(4)
18884 .kr(2)
18885 .sr(4)
18886 .m(3)
18887 .n(n)
18888 .k(k)
18889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18890 }
18891 }
18892 }
18893
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)18894 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
18895 TEST_REQUIRES_X86_XOP;
18896 for (uint32_t n = 8; n <= 12; n += 4) {
18897 for (size_t k = 1; k <= 40; k += 9) {
18898 GemmMicrokernelTester()
18899 .mr(3)
18900 .nr(4)
18901 .kr(2)
18902 .sr(4)
18903 .m(3)
18904 .n(n)
18905 .k(k)
18906 .cn_stride(7)
18907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18908 }
18909 }
18910 }
18911
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)18912 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
18913 TEST_REQUIRES_X86_XOP;
18914 for (uint32_t n = 8; n <= 12; n += 4) {
18915 for (size_t k = 1; k <= 40; k += 9) {
18916 for (uint32_t m = 1; m <= 3; m++) {
18917 GemmMicrokernelTester()
18918 .mr(3)
18919 .nr(4)
18920 .kr(2)
18921 .sr(4)
18922 .m(m)
18923 .n(n)
18924 .k(k)
18925 .iterations(1)
18926 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18927 }
18928 }
18929 }
18930 }
18931
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel)18932 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel) {
18933 TEST_REQUIRES_X86_XOP;
18934 for (size_t k = 1; k <= 40; k += 9) {
18935 GemmMicrokernelTester()
18936 .mr(3)
18937 .nr(4)
18938 .kr(2)
18939 .sr(4)
18940 .m(3)
18941 .n(4)
18942 .k(k)
18943 .ks(3)
18944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18945 }
18946 }
18947
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel_subtile)18948 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel_subtile) {
18949 TEST_REQUIRES_X86_XOP;
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 for (uint32_t n = 1; n <= 4; n++) {
18952 for (uint32_t m = 1; m <= 3; m++) {
18953 GemmMicrokernelTester()
18954 .mr(3)
18955 .nr(4)
18956 .kr(2)
18957 .sr(4)
18958 .m(m)
18959 .n(n)
18960 .k(k)
18961 .ks(3)
18962 .iterations(1)
18963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18964 }
18965 }
18966 }
18967 }
18968
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_small_kernel)18969 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
18970 TEST_REQUIRES_X86_XOP;
18971 for (uint32_t n = 5; n < 8; n++) {
18972 for (size_t k = 1; k <= 40; k += 9) {
18973 GemmMicrokernelTester()
18974 .mr(3)
18975 .nr(4)
18976 .kr(2)
18977 .sr(4)
18978 .m(3)
18979 .n(n)
18980 .k(k)
18981 .ks(3)
18982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18983 }
18984 }
18985 }
18986
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_small_kernel)18987 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_small_kernel) {
18988 TEST_REQUIRES_X86_XOP;
18989 for (uint32_t n = 8; n <= 12; n += 4) {
18990 for (size_t k = 1; k <= 40; k += 9) {
18991 GemmMicrokernelTester()
18992 .mr(3)
18993 .nr(4)
18994 .kr(2)
18995 .sr(4)
18996 .m(3)
18997 .n(n)
18998 .k(k)
18999 .ks(3)
19000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19001 }
19002 }
19003 }
19004
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)19005 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
19006 TEST_REQUIRES_X86_XOP;
19007 for (size_t k = 1; k <= 40; k += 9) {
19008 for (uint32_t n = 1; n <= 4; n++) {
19009 for (uint32_t m = 1; m <= 3; m++) {
19010 GemmMicrokernelTester()
19011 .mr(3)
19012 .nr(4)
19013 .kr(2)
19014 .sr(4)
19015 .m(m)
19016 .n(n)
19017 .k(k)
19018 .cm_stride(7)
19019 .iterations(1)
19020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19021 }
19022 }
19023 }
19024 }
19025
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,a_offset)19026 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, a_offset) {
19027 TEST_REQUIRES_X86_XOP;
19028 for (size_t k = 1; k <= 40; k += 9) {
19029 GemmMicrokernelTester()
19030 .mr(3)
19031 .nr(4)
19032 .kr(2)
19033 .sr(4)
19034 .m(3)
19035 .n(4)
19036 .k(k)
19037 .ks(3)
19038 .a_offset(127)
19039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19040 }
19041 }
19042
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,zero)19043 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, zero) {
19044 TEST_REQUIRES_X86_XOP;
19045 for (size_t k = 1; k <= 40; k += 9) {
19046 for (uint32_t mz = 0; mz < 3; mz++) {
19047 GemmMicrokernelTester()
19048 .mr(3)
19049 .nr(4)
19050 .kr(2)
19051 .sr(4)
19052 .m(3)
19053 .n(4)
19054 .k(k)
19055 .ks(3)
19056 .a_offset(127)
19057 .zero_index(mz)
19058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19059 }
19060 }
19061 }
19062
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)19063 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
19064 TEST_REQUIRES_X86_XOP;
19065 GemmMicrokernelTester()
19066 .mr(3)
19067 .nr(4)
19068 .kr(2)
19069 .sr(4)
19070 .m(3)
19071 .n(4)
19072 .k(8)
19073 .qmin(128)
19074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19075 }
19076
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)19077 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
19078 TEST_REQUIRES_X86_XOP;
19079 GemmMicrokernelTester()
19080 .mr(3)
19081 .nr(4)
19082 .kr(2)
19083 .sr(4)
19084 .m(3)
19085 .n(4)
19086 .k(8)
19087 .qmax(128)
19088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19089 }
19090
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)19091 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
19092 TEST_REQUIRES_X86_XOP;
19093 GemmMicrokernelTester()
19094 .mr(3)
19095 .nr(4)
19096 .kr(2)
19097 .sr(4)
19098 .m(3)
19099 .n(4)
19100 .k(8)
19101 .cm_stride(7)
19102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19103 }
19104
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_a_zero_point)19105 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_a_zero_point) {
19106 TEST_REQUIRES_X86_XOP;
19107 for (size_t k = 1; k <= 40; k += 9) {
19108 GemmMicrokernelTester()
19109 .mr(3)
19110 .nr(4)
19111 .kr(2)
19112 .sr(4)
19113 .m(3)
19114 .n(4)
19115 .k(k)
19116 .a_zero_point(0)
19117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19118 }
19119 }
19120
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_b_zero_point)19121 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_b_zero_point) {
19122 TEST_REQUIRES_X86_XOP;
19123 for (size_t k = 1; k <= 40; k += 9) {
19124 GemmMicrokernelTester()
19125 .mr(3)
19126 .nr(4)
19127 .kr(2)
19128 .sr(4)
19129 .m(3)
19130 .n(4)
19131 .k(k)
19132 .b_zero_point(0)
19133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19134 }
19135 }
19136
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_zero_point)19137 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_zero_point) {
19138 TEST_REQUIRES_X86_XOP;
19139 for (size_t k = 1; k <= 40; k += 9) {
19140 GemmMicrokernelTester()
19141 .mr(3)
19142 .nr(4)
19143 .kr(2)
19144 .sr(4)
19145 .m(3)
19146 .n(4)
19147 .k(k)
19148 .a_zero_point(0)
19149 .b_zero_point(0)
19150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19151 }
19152 }
19153 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19154
19155
19156 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)19157 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
19158 TEST_REQUIRES_X86_XOP;
19159 GemmMicrokernelTester()
19160 .mr(4)
19161 .nr(4)
19162 .kr(2)
19163 .sr(4)
19164 .m(4)
19165 .n(4)
19166 .k(8)
19167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19168 }
19169
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)19170 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
19171 TEST_REQUIRES_X86_XOP;
19172 GemmMicrokernelTester()
19173 .mr(4)
19174 .nr(4)
19175 .kr(2)
19176 .sr(4)
19177 .m(4)
19178 .n(4)
19179 .k(8)
19180 .cn_stride(7)
19181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19182 }
19183
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)19184 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
19185 TEST_REQUIRES_X86_XOP;
19186 for (uint32_t n = 1; n <= 4; n++) {
19187 for (uint32_t m = 1; m <= 4; m++) {
19188 GemmMicrokernelTester()
19189 .mr(4)
19190 .nr(4)
19191 .kr(2)
19192 .sr(4)
19193 .m(m)
19194 .n(n)
19195 .k(8)
19196 .iterations(1)
19197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19198 }
19199 }
19200 }
19201
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)19202 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19203 TEST_REQUIRES_X86_XOP;
19204 for (uint32_t m = 1; m <= 4; m++) {
19205 GemmMicrokernelTester()
19206 .mr(4)
19207 .nr(4)
19208 .kr(2)
19209 .sr(4)
19210 .m(m)
19211 .n(4)
19212 .k(8)
19213 .iterations(1)
19214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19215 }
19216 }
19217
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)19218 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19219 TEST_REQUIRES_X86_XOP;
19220 for (uint32_t n = 1; n <= 4; n++) {
19221 GemmMicrokernelTester()
19222 .mr(4)
19223 .nr(4)
19224 .kr(2)
19225 .sr(4)
19226 .m(4)
19227 .n(n)
19228 .k(8)
19229 .iterations(1)
19230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19231 }
19232 }
19233
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)19234 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
19235 TEST_REQUIRES_X86_XOP;
19236 for (size_t k = 1; k < 8; k++) {
19237 GemmMicrokernelTester()
19238 .mr(4)
19239 .nr(4)
19240 .kr(2)
19241 .sr(4)
19242 .m(4)
19243 .n(4)
19244 .k(k)
19245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19246 }
19247 }
19248
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)19249 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
19250 TEST_REQUIRES_X86_XOP;
19251 for (size_t k = 1; k < 8; k++) {
19252 for (uint32_t n = 1; n <= 4; n++) {
19253 for (uint32_t m = 1; m <= 4; m++) {
19254 GemmMicrokernelTester()
19255 .mr(4)
19256 .nr(4)
19257 .kr(2)
19258 .sr(4)
19259 .m(m)
19260 .n(n)
19261 .k(k)
19262 .iterations(1)
19263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19264 }
19265 }
19266 }
19267 }
19268
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)19269 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
19270 TEST_REQUIRES_X86_XOP;
19271 for (size_t k = 9; k < 16; k++) {
19272 GemmMicrokernelTester()
19273 .mr(4)
19274 .nr(4)
19275 .kr(2)
19276 .sr(4)
19277 .m(4)
19278 .n(4)
19279 .k(k)
19280 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19281 }
19282 }
19283
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)19284 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
19285 TEST_REQUIRES_X86_XOP;
19286 for (size_t k = 9; k < 16; k++) {
19287 for (uint32_t n = 1; n <= 4; n++) {
19288 for (uint32_t m = 1; m <= 4; m++) {
19289 GemmMicrokernelTester()
19290 .mr(4)
19291 .nr(4)
19292 .kr(2)
19293 .sr(4)
19294 .m(m)
19295 .n(n)
19296 .k(k)
19297 .iterations(1)
19298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19299 }
19300 }
19301 }
19302 }
19303
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)19304 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
19305 TEST_REQUIRES_X86_XOP;
19306 for (size_t k = 16; k <= 80; k += 8) {
19307 GemmMicrokernelTester()
19308 .mr(4)
19309 .nr(4)
19310 .kr(2)
19311 .sr(4)
19312 .m(4)
19313 .n(4)
19314 .k(k)
19315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19316 }
19317 }
19318
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)19319 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
19320 TEST_REQUIRES_X86_XOP;
19321 for (size_t k = 16; k <= 80; k += 8) {
19322 for (uint32_t n = 1; n <= 4; n++) {
19323 for (uint32_t m = 1; m <= 4; m++) {
19324 GemmMicrokernelTester()
19325 .mr(4)
19326 .nr(4)
19327 .kr(2)
19328 .sr(4)
19329 .m(m)
19330 .n(n)
19331 .k(k)
19332 .iterations(1)
19333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19334 }
19335 }
19336 }
19337 }
19338
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)19339 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
19340 TEST_REQUIRES_X86_XOP;
19341 for (uint32_t n = 5; n < 8; n++) {
19342 for (size_t k = 1; k <= 40; k += 9) {
19343 GemmMicrokernelTester()
19344 .mr(4)
19345 .nr(4)
19346 .kr(2)
19347 .sr(4)
19348 .m(4)
19349 .n(n)
19350 .k(k)
19351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19352 }
19353 }
19354 }
19355
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)19356 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19357 TEST_REQUIRES_X86_XOP;
19358 for (uint32_t n = 5; n < 8; n++) {
19359 for (size_t k = 1; k <= 40; k += 9) {
19360 GemmMicrokernelTester()
19361 .mr(4)
19362 .nr(4)
19363 .kr(2)
19364 .sr(4)
19365 .m(4)
19366 .n(n)
19367 .k(k)
19368 .cn_stride(7)
19369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19370 }
19371 }
19372 }
19373
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)19374 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
19375 TEST_REQUIRES_X86_XOP;
19376 for (uint32_t n = 5; n < 8; n++) {
19377 for (size_t k = 1; k <= 40; k += 9) {
19378 for (uint32_t m = 1; m <= 4; m++) {
19379 GemmMicrokernelTester()
19380 .mr(4)
19381 .nr(4)
19382 .kr(2)
19383 .sr(4)
19384 .m(m)
19385 .n(n)
19386 .k(k)
19387 .iterations(1)
19388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19389 }
19390 }
19391 }
19392 }
19393
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)19394 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
19395 TEST_REQUIRES_X86_XOP;
19396 for (uint32_t n = 8; n <= 12; n += 4) {
19397 for (size_t k = 1; k <= 40; k += 9) {
19398 GemmMicrokernelTester()
19399 .mr(4)
19400 .nr(4)
19401 .kr(2)
19402 .sr(4)
19403 .m(4)
19404 .n(n)
19405 .k(k)
19406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19407 }
19408 }
19409 }
19410
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)19411 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19412 TEST_REQUIRES_X86_XOP;
19413 for (uint32_t n = 8; n <= 12; n += 4) {
19414 for (size_t k = 1; k <= 40; k += 9) {
19415 GemmMicrokernelTester()
19416 .mr(4)
19417 .nr(4)
19418 .kr(2)
19419 .sr(4)
19420 .m(4)
19421 .n(n)
19422 .k(k)
19423 .cn_stride(7)
19424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19425 }
19426 }
19427 }
19428
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)19429 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
19430 TEST_REQUIRES_X86_XOP;
19431 for (uint32_t n = 8; n <= 12; n += 4) {
19432 for (size_t k = 1; k <= 40; k += 9) {
19433 for (uint32_t m = 1; m <= 4; m++) {
19434 GemmMicrokernelTester()
19435 .mr(4)
19436 .nr(4)
19437 .kr(2)
19438 .sr(4)
19439 .m(m)
19440 .n(n)
19441 .k(k)
19442 .iterations(1)
19443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19444 }
19445 }
19446 }
19447 }
19448
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel)19449 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel) {
19450 TEST_REQUIRES_X86_XOP;
19451 for (size_t k = 1; k <= 40; k += 9) {
19452 GemmMicrokernelTester()
19453 .mr(4)
19454 .nr(4)
19455 .kr(2)
19456 .sr(4)
19457 .m(4)
19458 .n(4)
19459 .k(k)
19460 .ks(3)
19461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19462 }
19463 }
19464
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel_subtile)19465 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel_subtile) {
19466 TEST_REQUIRES_X86_XOP;
19467 for (size_t k = 1; k <= 40; k += 9) {
19468 for (uint32_t n = 1; n <= 4; n++) {
19469 for (uint32_t m = 1; m <= 4; m++) {
19470 GemmMicrokernelTester()
19471 .mr(4)
19472 .nr(4)
19473 .kr(2)
19474 .sr(4)
19475 .m(m)
19476 .n(n)
19477 .k(k)
19478 .ks(3)
19479 .iterations(1)
19480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19481 }
19482 }
19483 }
19484 }
19485
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_small_kernel)19486 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
19487 TEST_REQUIRES_X86_XOP;
19488 for (uint32_t n = 5; n < 8; n++) {
19489 for (size_t k = 1; k <= 40; k += 9) {
19490 GemmMicrokernelTester()
19491 .mr(4)
19492 .nr(4)
19493 .kr(2)
19494 .sr(4)
19495 .m(4)
19496 .n(n)
19497 .k(k)
19498 .ks(3)
19499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19500 }
19501 }
19502 }
19503
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_small_kernel)19504 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_small_kernel) {
19505 TEST_REQUIRES_X86_XOP;
19506 for (uint32_t n = 8; n <= 12; n += 4) {
19507 for (size_t k = 1; k <= 40; k += 9) {
19508 GemmMicrokernelTester()
19509 .mr(4)
19510 .nr(4)
19511 .kr(2)
19512 .sr(4)
19513 .m(4)
19514 .n(n)
19515 .k(k)
19516 .ks(3)
19517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19518 }
19519 }
19520 }
19521
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)19522 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
19523 TEST_REQUIRES_X86_XOP;
19524 for (size_t k = 1; k <= 40; k += 9) {
19525 for (uint32_t n = 1; n <= 4; n++) {
19526 for (uint32_t m = 1; m <= 4; m++) {
19527 GemmMicrokernelTester()
19528 .mr(4)
19529 .nr(4)
19530 .kr(2)
19531 .sr(4)
19532 .m(m)
19533 .n(n)
19534 .k(k)
19535 .cm_stride(7)
19536 .iterations(1)
19537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19538 }
19539 }
19540 }
19541 }
19542
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,a_offset)19543 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, a_offset) {
19544 TEST_REQUIRES_X86_XOP;
19545 for (size_t k = 1; k <= 40; k += 9) {
19546 GemmMicrokernelTester()
19547 .mr(4)
19548 .nr(4)
19549 .kr(2)
19550 .sr(4)
19551 .m(4)
19552 .n(4)
19553 .k(k)
19554 .ks(3)
19555 .a_offset(163)
19556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19557 }
19558 }
19559
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,zero)19560 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, zero) {
19561 TEST_REQUIRES_X86_XOP;
19562 for (size_t k = 1; k <= 40; k += 9) {
19563 for (uint32_t mz = 0; mz < 4; mz++) {
19564 GemmMicrokernelTester()
19565 .mr(4)
19566 .nr(4)
19567 .kr(2)
19568 .sr(4)
19569 .m(4)
19570 .n(4)
19571 .k(k)
19572 .ks(3)
19573 .a_offset(163)
19574 .zero_index(mz)
19575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19576 }
19577 }
19578 }
19579
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)19580 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
19581 TEST_REQUIRES_X86_XOP;
19582 GemmMicrokernelTester()
19583 .mr(4)
19584 .nr(4)
19585 .kr(2)
19586 .sr(4)
19587 .m(4)
19588 .n(4)
19589 .k(8)
19590 .qmin(128)
19591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19592 }
19593
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)19594 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
19595 TEST_REQUIRES_X86_XOP;
19596 GemmMicrokernelTester()
19597 .mr(4)
19598 .nr(4)
19599 .kr(2)
19600 .sr(4)
19601 .m(4)
19602 .n(4)
19603 .k(8)
19604 .qmax(128)
19605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19606 }
19607
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)19608 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
19609 TEST_REQUIRES_X86_XOP;
19610 GemmMicrokernelTester()
19611 .mr(4)
19612 .nr(4)
19613 .kr(2)
19614 .sr(4)
19615 .m(4)
19616 .n(4)
19617 .k(8)
19618 .cm_stride(7)
19619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19620 }
19621
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_a_zero_point)19622 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_a_zero_point) {
19623 TEST_REQUIRES_X86_XOP;
19624 for (size_t k = 1; k <= 40; k += 9) {
19625 GemmMicrokernelTester()
19626 .mr(4)
19627 .nr(4)
19628 .kr(2)
19629 .sr(4)
19630 .m(4)
19631 .n(4)
19632 .k(k)
19633 .a_zero_point(0)
19634 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19635 }
19636 }
19637
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_b_zero_point)19638 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_b_zero_point) {
19639 TEST_REQUIRES_X86_XOP;
19640 for (size_t k = 1; k <= 40; k += 9) {
19641 GemmMicrokernelTester()
19642 .mr(4)
19643 .nr(4)
19644 .kr(2)
19645 .sr(4)
19646 .m(4)
19647 .n(4)
19648 .k(k)
19649 .b_zero_point(0)
19650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19651 }
19652 }
19653
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_zero_point)19654 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_zero_point) {
19655 TEST_REQUIRES_X86_XOP;
19656 for (size_t k = 1; k <= 40; k += 9) {
19657 GemmMicrokernelTester()
19658 .mr(4)
19659 .nr(4)
19660 .kr(2)
19661 .sr(4)
19662 .m(4)
19663 .n(4)
19664 .k(k)
19665 .a_zero_point(0)
19666 .b_zero_point(0)
19667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19668 }
19669 }
19670 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19671
19672
19673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)19674 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
19675 TEST_REQUIRES_X86_SSE2;
19676 GemmMicrokernelTester()
19677 .mr(3)
19678 .nr(4)
19679 .kr(8)
19680 .sr(1)
19681 .m(3)
19682 .n(4)
19683 .k(8)
19684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19685 }
19686
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)19687 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
19688 TEST_REQUIRES_X86_SSE2;
19689 GemmMicrokernelTester()
19690 .mr(3)
19691 .nr(4)
19692 .kr(8)
19693 .sr(1)
19694 .m(3)
19695 .n(4)
19696 .k(8)
19697 .cn_stride(7)
19698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19699 }
19700
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)19701 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
19702 TEST_REQUIRES_X86_SSE2;
19703 for (uint32_t n = 1; n <= 4; n++) {
19704 for (uint32_t m = 1; m <= 3; m++) {
19705 GemmMicrokernelTester()
19706 .mr(3)
19707 .nr(4)
19708 .kr(8)
19709 .sr(1)
19710 .m(m)
19711 .n(n)
19712 .k(8)
19713 .iterations(1)
19714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19715 }
19716 }
19717 }
19718
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)19719 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
19720 TEST_REQUIRES_X86_SSE2;
19721 for (uint32_t m = 1; m <= 3; m++) {
19722 GemmMicrokernelTester()
19723 .mr(3)
19724 .nr(4)
19725 .kr(8)
19726 .sr(1)
19727 .m(m)
19728 .n(4)
19729 .k(8)
19730 .iterations(1)
19731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19732 }
19733 }
19734
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)19735 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
19736 TEST_REQUIRES_X86_SSE2;
19737 for (uint32_t n = 1; n <= 4; n++) {
19738 GemmMicrokernelTester()
19739 .mr(3)
19740 .nr(4)
19741 .kr(8)
19742 .sr(1)
19743 .m(3)
19744 .n(n)
19745 .k(8)
19746 .iterations(1)
19747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19748 }
19749 }
19750
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)19751 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
19752 TEST_REQUIRES_X86_SSE2;
19753 for (size_t k = 1; k < 8; k++) {
19754 GemmMicrokernelTester()
19755 .mr(3)
19756 .nr(4)
19757 .kr(8)
19758 .sr(1)
19759 .m(3)
19760 .n(4)
19761 .k(k)
19762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19763 }
19764 }
19765
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)19766 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
19767 TEST_REQUIRES_X86_SSE2;
19768 for (size_t k = 1; k < 8; k++) {
19769 for (uint32_t n = 1; n <= 4; n++) {
19770 for (uint32_t m = 1; m <= 3; m++) {
19771 GemmMicrokernelTester()
19772 .mr(3)
19773 .nr(4)
19774 .kr(8)
19775 .sr(1)
19776 .m(m)
19777 .n(n)
19778 .k(k)
19779 .iterations(1)
19780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19781 }
19782 }
19783 }
19784 }
19785
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)19786 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
19787 TEST_REQUIRES_X86_SSE2;
19788 for (size_t k = 9; k < 16; k++) {
19789 GemmMicrokernelTester()
19790 .mr(3)
19791 .nr(4)
19792 .kr(8)
19793 .sr(1)
19794 .m(3)
19795 .n(4)
19796 .k(k)
19797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19798 }
19799 }
19800
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)19801 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
19802 TEST_REQUIRES_X86_SSE2;
19803 for (size_t k = 9; k < 16; k++) {
19804 for (uint32_t n = 1; n <= 4; n++) {
19805 for (uint32_t m = 1; m <= 3; m++) {
19806 GemmMicrokernelTester()
19807 .mr(3)
19808 .nr(4)
19809 .kr(8)
19810 .sr(1)
19811 .m(m)
19812 .n(n)
19813 .k(k)
19814 .iterations(1)
19815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19816 }
19817 }
19818 }
19819 }
19820
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)19821 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
19822 TEST_REQUIRES_X86_SSE2;
19823 for (size_t k = 16; k <= 80; k += 8) {
19824 GemmMicrokernelTester()
19825 .mr(3)
19826 .nr(4)
19827 .kr(8)
19828 .sr(1)
19829 .m(3)
19830 .n(4)
19831 .k(k)
19832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19833 }
19834 }
19835
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)19836 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
19837 TEST_REQUIRES_X86_SSE2;
19838 for (size_t k = 16; k <= 80; k += 8) {
19839 for (uint32_t n = 1; n <= 4; n++) {
19840 for (uint32_t m = 1; m <= 3; m++) {
19841 GemmMicrokernelTester()
19842 .mr(3)
19843 .nr(4)
19844 .kr(8)
19845 .sr(1)
19846 .m(m)
19847 .n(n)
19848 .k(k)
19849 .iterations(1)
19850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19851 }
19852 }
19853 }
19854 }
19855
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)19856 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
19857 TEST_REQUIRES_X86_SSE2;
19858 for (uint32_t n = 5; n < 8; n++) {
19859 for (size_t k = 1; k <= 40; k += 9) {
19860 GemmMicrokernelTester()
19861 .mr(3)
19862 .nr(4)
19863 .kr(8)
19864 .sr(1)
19865 .m(3)
19866 .n(n)
19867 .k(k)
19868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19869 }
19870 }
19871 }
19872
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)19873 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
19874 TEST_REQUIRES_X86_SSE2;
19875 for (uint32_t n = 5; n < 8; n++) {
19876 for (size_t k = 1; k <= 40; k += 9) {
19877 GemmMicrokernelTester()
19878 .mr(3)
19879 .nr(4)
19880 .kr(8)
19881 .sr(1)
19882 .m(3)
19883 .n(n)
19884 .k(k)
19885 .cn_stride(7)
19886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19887 }
19888 }
19889 }
19890
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)19891 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
19892 TEST_REQUIRES_X86_SSE2;
19893 for (uint32_t n = 5; n < 8; n++) {
19894 for (size_t k = 1; k <= 40; k += 9) {
19895 for (uint32_t m = 1; m <= 3; m++) {
19896 GemmMicrokernelTester()
19897 .mr(3)
19898 .nr(4)
19899 .kr(8)
19900 .sr(1)
19901 .m(m)
19902 .n(n)
19903 .k(k)
19904 .iterations(1)
19905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19906 }
19907 }
19908 }
19909 }
19910
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)19911 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
19912 TEST_REQUIRES_X86_SSE2;
19913 for (uint32_t n = 8; n <= 12; n += 4) {
19914 for (size_t k = 1; k <= 40; k += 9) {
19915 GemmMicrokernelTester()
19916 .mr(3)
19917 .nr(4)
19918 .kr(8)
19919 .sr(1)
19920 .m(3)
19921 .n(n)
19922 .k(k)
19923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19924 }
19925 }
19926 }
19927
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)19928 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
19929 TEST_REQUIRES_X86_SSE2;
19930 for (uint32_t n = 8; n <= 12; n += 4) {
19931 for (size_t k = 1; k <= 40; k += 9) {
19932 GemmMicrokernelTester()
19933 .mr(3)
19934 .nr(4)
19935 .kr(8)
19936 .sr(1)
19937 .m(3)
19938 .n(n)
19939 .k(k)
19940 .cn_stride(7)
19941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19942 }
19943 }
19944 }
19945
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)19946 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
19947 TEST_REQUIRES_X86_SSE2;
19948 for (uint32_t n = 8; n <= 12; n += 4) {
19949 for (size_t k = 1; k <= 40; k += 9) {
19950 for (uint32_t m = 1; m <= 3; m++) {
19951 GemmMicrokernelTester()
19952 .mr(3)
19953 .nr(4)
19954 .kr(8)
19955 .sr(1)
19956 .m(m)
19957 .n(n)
19958 .k(k)
19959 .iterations(1)
19960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19961 }
19962 }
19963 }
19964 }
19965
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel)19966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
19967 TEST_REQUIRES_X86_SSE2;
19968 for (size_t k = 1; k <= 40; k += 9) {
19969 GemmMicrokernelTester()
19970 .mr(3)
19971 .nr(4)
19972 .kr(8)
19973 .sr(1)
19974 .m(3)
19975 .n(4)
19976 .k(k)
19977 .ks(3)
19978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19979 }
19980 }
19981
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel_subtile)19982 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
19983 TEST_REQUIRES_X86_SSE2;
19984 for (size_t k = 1; k <= 40; k += 9) {
19985 for (uint32_t n = 1; n <= 4; n++) {
19986 for (uint32_t m = 1; m <= 3; m++) {
19987 GemmMicrokernelTester()
19988 .mr(3)
19989 .nr(4)
19990 .kr(8)
19991 .sr(1)
19992 .m(m)
19993 .n(n)
19994 .k(k)
19995 .ks(3)
19996 .iterations(1)
19997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19998 }
19999 }
20000 }
20001 }
20002
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_small_kernel)20003 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
20004 TEST_REQUIRES_X86_SSE2;
20005 for (uint32_t n = 5; n < 8; n++) {
20006 for (size_t k = 1; k <= 40; k += 9) {
20007 GemmMicrokernelTester()
20008 .mr(3)
20009 .nr(4)
20010 .kr(8)
20011 .sr(1)
20012 .m(3)
20013 .n(n)
20014 .k(k)
20015 .ks(3)
20016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20017 }
20018 }
20019 }
20020
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_small_kernel)20021 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
20022 TEST_REQUIRES_X86_SSE2;
20023 for (uint32_t n = 8; n <= 12; n += 4) {
20024 for (size_t k = 1; k <= 40; k += 9) {
20025 GemmMicrokernelTester()
20026 .mr(3)
20027 .nr(4)
20028 .kr(8)
20029 .sr(1)
20030 .m(3)
20031 .n(n)
20032 .k(k)
20033 .ks(3)
20034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20035 }
20036 }
20037 }
20038
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)20039 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
20040 TEST_REQUIRES_X86_SSE2;
20041 for (size_t k = 1; k <= 40; k += 9) {
20042 for (uint32_t n = 1; n <= 4; n++) {
20043 for (uint32_t m = 1; m <= 3; m++) {
20044 GemmMicrokernelTester()
20045 .mr(3)
20046 .nr(4)
20047 .kr(8)
20048 .sr(1)
20049 .m(m)
20050 .n(n)
20051 .k(k)
20052 .cm_stride(7)
20053 .iterations(1)
20054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20055 }
20056 }
20057 }
20058 }
20059
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,a_offset)20060 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
20061 TEST_REQUIRES_X86_SSE2;
20062 for (size_t k = 1; k <= 40; k += 9) {
20063 GemmMicrokernelTester()
20064 .mr(3)
20065 .nr(4)
20066 .kr(8)
20067 .sr(1)
20068 .m(3)
20069 .n(4)
20070 .k(k)
20071 .ks(3)
20072 .a_offset(127)
20073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20074 }
20075 }
20076
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,zero)20077 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
20078 TEST_REQUIRES_X86_SSE2;
20079 for (size_t k = 1; k <= 40; k += 9) {
20080 for (uint32_t mz = 0; mz < 3; mz++) {
20081 GemmMicrokernelTester()
20082 .mr(3)
20083 .nr(4)
20084 .kr(8)
20085 .sr(1)
20086 .m(3)
20087 .n(4)
20088 .k(k)
20089 .ks(3)
20090 .a_offset(127)
20091 .zero_index(mz)
20092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20093 }
20094 }
20095 }
20096
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)20097 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
20098 TEST_REQUIRES_X86_SSE2;
20099 GemmMicrokernelTester()
20100 .mr(3)
20101 .nr(4)
20102 .kr(8)
20103 .sr(1)
20104 .m(3)
20105 .n(4)
20106 .k(8)
20107 .qmin(128)
20108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20109 }
20110
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)20111 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
20112 TEST_REQUIRES_X86_SSE2;
20113 GemmMicrokernelTester()
20114 .mr(3)
20115 .nr(4)
20116 .kr(8)
20117 .sr(1)
20118 .m(3)
20119 .n(4)
20120 .k(8)
20121 .qmax(128)
20122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20123 }
20124
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)20125 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
20126 TEST_REQUIRES_X86_SSE2;
20127 GemmMicrokernelTester()
20128 .mr(3)
20129 .nr(4)
20130 .kr(8)
20131 .sr(1)
20132 .m(3)
20133 .n(4)
20134 .k(8)
20135 .cm_stride(7)
20136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20137 }
20138
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_a_zero_point)20139 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_a_zero_point) {
20140 TEST_REQUIRES_X86_SSE2;
20141 for (size_t k = 1; k <= 40; k += 9) {
20142 GemmMicrokernelTester()
20143 .mr(3)
20144 .nr(4)
20145 .kr(8)
20146 .sr(1)
20147 .m(3)
20148 .n(4)
20149 .k(k)
20150 .a_zero_point(0)
20151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20152 }
20153 }
20154
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_b_zero_point)20155 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_b_zero_point) {
20156 TEST_REQUIRES_X86_SSE2;
20157 for (size_t k = 1; k <= 40; k += 9) {
20158 GemmMicrokernelTester()
20159 .mr(3)
20160 .nr(4)
20161 .kr(8)
20162 .sr(1)
20163 .m(3)
20164 .n(4)
20165 .k(k)
20166 .b_zero_point(0)
20167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20168 }
20169 }
20170
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_zero_point)20171 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_zero_point) {
20172 TEST_REQUIRES_X86_SSE2;
20173 for (size_t k = 1; k <= 40; k += 9) {
20174 GemmMicrokernelTester()
20175 .mr(3)
20176 .nr(4)
20177 .kr(8)
20178 .sr(1)
20179 .m(3)
20180 .n(4)
20181 .k(k)
20182 .a_zero_point(0)
20183 .b_zero_point(0)
20184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20185 }
20186 }
20187 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20188
20189
20190 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)20191 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
20192 TEST_REQUIRES_X86_SSE41;
20193 GemmMicrokernelTester()
20194 .mr(3)
20195 .nr(4)
20196 .kr(8)
20197 .sr(1)
20198 .m(3)
20199 .n(4)
20200 .k(8)
20201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20202 }
20203
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)20204 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
20205 TEST_REQUIRES_X86_SSE41;
20206 GemmMicrokernelTester()
20207 .mr(3)
20208 .nr(4)
20209 .kr(8)
20210 .sr(1)
20211 .m(3)
20212 .n(4)
20213 .k(8)
20214 .cn_stride(7)
20215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20216 }
20217
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)20218 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
20219 TEST_REQUIRES_X86_SSE41;
20220 for (uint32_t n = 1; n <= 4; n++) {
20221 for (uint32_t m = 1; m <= 3; m++) {
20222 GemmMicrokernelTester()
20223 .mr(3)
20224 .nr(4)
20225 .kr(8)
20226 .sr(1)
20227 .m(m)
20228 .n(n)
20229 .k(8)
20230 .iterations(1)
20231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20232 }
20233 }
20234 }
20235
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)20236 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
20237 TEST_REQUIRES_X86_SSE41;
20238 for (uint32_t m = 1; m <= 3; m++) {
20239 GemmMicrokernelTester()
20240 .mr(3)
20241 .nr(4)
20242 .kr(8)
20243 .sr(1)
20244 .m(m)
20245 .n(4)
20246 .k(8)
20247 .iterations(1)
20248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20249 }
20250 }
20251
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)20252 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
20253 TEST_REQUIRES_X86_SSE41;
20254 for (uint32_t n = 1; n <= 4; n++) {
20255 GemmMicrokernelTester()
20256 .mr(3)
20257 .nr(4)
20258 .kr(8)
20259 .sr(1)
20260 .m(3)
20261 .n(n)
20262 .k(8)
20263 .iterations(1)
20264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20265 }
20266 }
20267
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)20268 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
20269 TEST_REQUIRES_X86_SSE41;
20270 for (size_t k = 1; k < 8; k++) {
20271 GemmMicrokernelTester()
20272 .mr(3)
20273 .nr(4)
20274 .kr(8)
20275 .sr(1)
20276 .m(3)
20277 .n(4)
20278 .k(k)
20279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20280 }
20281 }
20282
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)20283 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
20284 TEST_REQUIRES_X86_SSE41;
20285 for (size_t k = 1; k < 8; k++) {
20286 for (uint32_t n = 1; n <= 4; n++) {
20287 for (uint32_t m = 1; m <= 3; m++) {
20288 GemmMicrokernelTester()
20289 .mr(3)
20290 .nr(4)
20291 .kr(8)
20292 .sr(1)
20293 .m(m)
20294 .n(n)
20295 .k(k)
20296 .iterations(1)
20297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20298 }
20299 }
20300 }
20301 }
20302
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)20303 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
20304 TEST_REQUIRES_X86_SSE41;
20305 for (size_t k = 9; k < 16; k++) {
20306 GemmMicrokernelTester()
20307 .mr(3)
20308 .nr(4)
20309 .kr(8)
20310 .sr(1)
20311 .m(3)
20312 .n(4)
20313 .k(k)
20314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20315 }
20316 }
20317
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)20318 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
20319 TEST_REQUIRES_X86_SSE41;
20320 for (size_t k = 9; k < 16; k++) {
20321 for (uint32_t n = 1; n <= 4; n++) {
20322 for (uint32_t m = 1; m <= 3; m++) {
20323 GemmMicrokernelTester()
20324 .mr(3)
20325 .nr(4)
20326 .kr(8)
20327 .sr(1)
20328 .m(m)
20329 .n(n)
20330 .k(k)
20331 .iterations(1)
20332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20333 }
20334 }
20335 }
20336 }
20337
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)20338 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
20339 TEST_REQUIRES_X86_SSE41;
20340 for (size_t k = 16; k <= 80; k += 8) {
20341 GemmMicrokernelTester()
20342 .mr(3)
20343 .nr(4)
20344 .kr(8)
20345 .sr(1)
20346 .m(3)
20347 .n(4)
20348 .k(k)
20349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20350 }
20351 }
20352
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)20353 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
20354 TEST_REQUIRES_X86_SSE41;
20355 for (size_t k = 16; k <= 80; k += 8) {
20356 for (uint32_t n = 1; n <= 4; n++) {
20357 for (uint32_t m = 1; m <= 3; m++) {
20358 GemmMicrokernelTester()
20359 .mr(3)
20360 .nr(4)
20361 .kr(8)
20362 .sr(1)
20363 .m(m)
20364 .n(n)
20365 .k(k)
20366 .iterations(1)
20367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20368 }
20369 }
20370 }
20371 }
20372
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)20373 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
20374 TEST_REQUIRES_X86_SSE41;
20375 for (uint32_t n = 5; n < 8; n++) {
20376 for (size_t k = 1; k <= 40; k += 9) {
20377 GemmMicrokernelTester()
20378 .mr(3)
20379 .nr(4)
20380 .kr(8)
20381 .sr(1)
20382 .m(3)
20383 .n(n)
20384 .k(k)
20385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20386 }
20387 }
20388 }
20389
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)20390 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
20391 TEST_REQUIRES_X86_SSE41;
20392 for (uint32_t n = 5; n < 8; n++) {
20393 for (size_t k = 1; k <= 40; k += 9) {
20394 GemmMicrokernelTester()
20395 .mr(3)
20396 .nr(4)
20397 .kr(8)
20398 .sr(1)
20399 .m(3)
20400 .n(n)
20401 .k(k)
20402 .cn_stride(7)
20403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20404 }
20405 }
20406 }
20407
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)20408 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
20409 TEST_REQUIRES_X86_SSE41;
20410 for (uint32_t n = 5; n < 8; n++) {
20411 for (size_t k = 1; k <= 40; k += 9) {
20412 for (uint32_t m = 1; m <= 3; m++) {
20413 GemmMicrokernelTester()
20414 .mr(3)
20415 .nr(4)
20416 .kr(8)
20417 .sr(1)
20418 .m(m)
20419 .n(n)
20420 .k(k)
20421 .iterations(1)
20422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20423 }
20424 }
20425 }
20426 }
20427
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)20428 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
20429 TEST_REQUIRES_X86_SSE41;
20430 for (uint32_t n = 8; n <= 12; n += 4) {
20431 for (size_t k = 1; k <= 40; k += 9) {
20432 GemmMicrokernelTester()
20433 .mr(3)
20434 .nr(4)
20435 .kr(8)
20436 .sr(1)
20437 .m(3)
20438 .n(n)
20439 .k(k)
20440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20441 }
20442 }
20443 }
20444
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)20445 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
20446 TEST_REQUIRES_X86_SSE41;
20447 for (uint32_t n = 8; n <= 12; n += 4) {
20448 for (size_t k = 1; k <= 40; k += 9) {
20449 GemmMicrokernelTester()
20450 .mr(3)
20451 .nr(4)
20452 .kr(8)
20453 .sr(1)
20454 .m(3)
20455 .n(n)
20456 .k(k)
20457 .cn_stride(7)
20458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20459 }
20460 }
20461 }
20462
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)20463 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
20464 TEST_REQUIRES_X86_SSE41;
20465 for (uint32_t n = 8; n <= 12; n += 4) {
20466 for (size_t k = 1; k <= 40; k += 9) {
20467 for (uint32_t m = 1; m <= 3; m++) {
20468 GemmMicrokernelTester()
20469 .mr(3)
20470 .nr(4)
20471 .kr(8)
20472 .sr(1)
20473 .m(m)
20474 .n(n)
20475 .k(k)
20476 .iterations(1)
20477 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20478 }
20479 }
20480 }
20481 }
20482
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)20483 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
20484 TEST_REQUIRES_X86_SSE41;
20485 for (size_t k = 1; k <= 40; k += 9) {
20486 GemmMicrokernelTester()
20487 .mr(3)
20488 .nr(4)
20489 .kr(8)
20490 .sr(1)
20491 .m(3)
20492 .n(4)
20493 .k(k)
20494 .ks(3)
20495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20496 }
20497 }
20498
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)20499 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
20500 TEST_REQUIRES_X86_SSE41;
20501 for (size_t k = 1; k <= 40; k += 9) {
20502 for (uint32_t n = 1; n <= 4; n++) {
20503 for (uint32_t m = 1; m <= 3; m++) {
20504 GemmMicrokernelTester()
20505 .mr(3)
20506 .nr(4)
20507 .kr(8)
20508 .sr(1)
20509 .m(m)
20510 .n(n)
20511 .k(k)
20512 .ks(3)
20513 .iterations(1)
20514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20515 }
20516 }
20517 }
20518 }
20519
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)20520 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
20521 TEST_REQUIRES_X86_SSE41;
20522 for (uint32_t n = 5; n < 8; n++) {
20523 for (size_t k = 1; k <= 40; k += 9) {
20524 GemmMicrokernelTester()
20525 .mr(3)
20526 .nr(4)
20527 .kr(8)
20528 .sr(1)
20529 .m(3)
20530 .n(n)
20531 .k(k)
20532 .ks(3)
20533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20534 }
20535 }
20536 }
20537
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)20538 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
20539 TEST_REQUIRES_X86_SSE41;
20540 for (uint32_t n = 8; n <= 12; n += 4) {
20541 for (size_t k = 1; k <= 40; k += 9) {
20542 GemmMicrokernelTester()
20543 .mr(3)
20544 .nr(4)
20545 .kr(8)
20546 .sr(1)
20547 .m(3)
20548 .n(n)
20549 .k(k)
20550 .ks(3)
20551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20552 }
20553 }
20554 }
20555
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)20556 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
20557 TEST_REQUIRES_X86_SSE41;
20558 for (size_t k = 1; k <= 40; k += 9) {
20559 for (uint32_t n = 1; n <= 4; n++) {
20560 for (uint32_t m = 1; m <= 3; m++) {
20561 GemmMicrokernelTester()
20562 .mr(3)
20563 .nr(4)
20564 .kr(8)
20565 .sr(1)
20566 .m(m)
20567 .n(n)
20568 .k(k)
20569 .cm_stride(7)
20570 .iterations(1)
20571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20572 }
20573 }
20574 }
20575 }
20576
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)20577 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
20578 TEST_REQUIRES_X86_SSE41;
20579 for (size_t k = 1; k <= 40; k += 9) {
20580 GemmMicrokernelTester()
20581 .mr(3)
20582 .nr(4)
20583 .kr(8)
20584 .sr(1)
20585 .m(3)
20586 .n(4)
20587 .k(k)
20588 .ks(3)
20589 .a_offset(127)
20590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20591 }
20592 }
20593
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)20594 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
20595 TEST_REQUIRES_X86_SSE41;
20596 for (size_t k = 1; k <= 40; k += 9) {
20597 for (uint32_t mz = 0; mz < 3; mz++) {
20598 GemmMicrokernelTester()
20599 .mr(3)
20600 .nr(4)
20601 .kr(8)
20602 .sr(1)
20603 .m(3)
20604 .n(4)
20605 .k(k)
20606 .ks(3)
20607 .a_offset(127)
20608 .zero_index(mz)
20609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20610 }
20611 }
20612 }
20613
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)20614 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
20615 TEST_REQUIRES_X86_SSE41;
20616 GemmMicrokernelTester()
20617 .mr(3)
20618 .nr(4)
20619 .kr(8)
20620 .sr(1)
20621 .m(3)
20622 .n(4)
20623 .k(8)
20624 .qmin(128)
20625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20626 }
20627
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)20628 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
20629 TEST_REQUIRES_X86_SSE41;
20630 GemmMicrokernelTester()
20631 .mr(3)
20632 .nr(4)
20633 .kr(8)
20634 .sr(1)
20635 .m(3)
20636 .n(4)
20637 .k(8)
20638 .qmax(128)
20639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20640 }
20641
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)20642 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
20643 TEST_REQUIRES_X86_SSE41;
20644 GemmMicrokernelTester()
20645 .mr(3)
20646 .nr(4)
20647 .kr(8)
20648 .sr(1)
20649 .m(3)
20650 .n(4)
20651 .k(8)
20652 .cm_stride(7)
20653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20654 }
20655
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_a_zero_point)20656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_a_zero_point) {
20657 TEST_REQUIRES_X86_SSE41;
20658 for (size_t k = 1; k <= 40; k += 9) {
20659 GemmMicrokernelTester()
20660 .mr(3)
20661 .nr(4)
20662 .kr(8)
20663 .sr(1)
20664 .m(3)
20665 .n(4)
20666 .k(k)
20667 .a_zero_point(0)
20668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20669 }
20670 }
20671
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_b_zero_point)20672 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_b_zero_point) {
20673 TEST_REQUIRES_X86_SSE41;
20674 for (size_t k = 1; k <= 40; k += 9) {
20675 GemmMicrokernelTester()
20676 .mr(3)
20677 .nr(4)
20678 .kr(8)
20679 .sr(1)
20680 .m(3)
20681 .n(4)
20682 .k(k)
20683 .b_zero_point(0)
20684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20685 }
20686 }
20687
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_zero_point)20688 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_zero_point) {
20689 TEST_REQUIRES_X86_SSE41;
20690 for (size_t k = 1; k <= 40; k += 9) {
20691 GemmMicrokernelTester()
20692 .mr(3)
20693 .nr(4)
20694 .kr(8)
20695 .sr(1)
20696 .m(3)
20697 .n(4)
20698 .k(k)
20699 .a_zero_point(0)
20700 .b_zero_point(0)
20701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20702 }
20703 }
20704 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20705
20706
20707 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8)20708 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
20709 TEST_REQUIRES_X86_AVX;
20710 GemmMicrokernelTester()
20711 .mr(1)
20712 .nr(4)
20713 .kr(8)
20714 .sr(1)
20715 .m(1)
20716 .n(4)
20717 .k(8)
20718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20719 }
20720
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cn)20721 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
20722 TEST_REQUIRES_X86_AVX;
20723 GemmMicrokernelTester()
20724 .mr(1)
20725 .nr(4)
20726 .kr(8)
20727 .sr(1)
20728 .m(1)
20729 .n(4)
20730 .k(8)
20731 .cn_stride(7)
20732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20733 }
20734
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile)20735 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
20736 TEST_REQUIRES_X86_AVX;
20737 for (uint32_t n = 1; n <= 4; n++) {
20738 for (uint32_t m = 1; m <= 1; m++) {
20739 GemmMicrokernelTester()
20740 .mr(1)
20741 .nr(4)
20742 .kr(8)
20743 .sr(1)
20744 .m(m)
20745 .n(n)
20746 .k(8)
20747 .iterations(1)
20748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20749 }
20750 }
20751 }
20752
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_m)20753 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
20754 TEST_REQUIRES_X86_AVX;
20755 for (uint32_t m = 1; m <= 1; m++) {
20756 GemmMicrokernelTester()
20757 .mr(1)
20758 .nr(4)
20759 .kr(8)
20760 .sr(1)
20761 .m(m)
20762 .n(4)
20763 .k(8)
20764 .iterations(1)
20765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20766 }
20767 }
20768
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_n)20769 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
20770 TEST_REQUIRES_X86_AVX;
20771 for (uint32_t n = 1; n <= 4; n++) {
20772 GemmMicrokernelTester()
20773 .mr(1)
20774 .nr(4)
20775 .kr(8)
20776 .sr(1)
20777 .m(1)
20778 .n(n)
20779 .k(8)
20780 .iterations(1)
20781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20782 }
20783 }
20784
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8)20785 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
20786 TEST_REQUIRES_X86_AVX;
20787 for (size_t k = 1; k < 8; k++) {
20788 GemmMicrokernelTester()
20789 .mr(1)
20790 .nr(4)
20791 .kr(8)
20792 .sr(1)
20793 .m(1)
20794 .n(4)
20795 .k(k)
20796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20797 }
20798 }
20799
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_subtile)20800 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
20801 TEST_REQUIRES_X86_AVX;
20802 for (size_t k = 1; k < 8; k++) {
20803 for (uint32_t n = 1; n <= 4; n++) {
20804 for (uint32_t m = 1; m <= 1; m++) {
20805 GemmMicrokernelTester()
20806 .mr(1)
20807 .nr(4)
20808 .kr(8)
20809 .sr(1)
20810 .m(m)
20811 .n(n)
20812 .k(k)
20813 .iterations(1)
20814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20815 }
20816 }
20817 }
20818 }
20819
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8)20820 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
20821 TEST_REQUIRES_X86_AVX;
20822 for (size_t k = 9; k < 16; k++) {
20823 GemmMicrokernelTester()
20824 .mr(1)
20825 .nr(4)
20826 .kr(8)
20827 .sr(1)
20828 .m(1)
20829 .n(4)
20830 .k(k)
20831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20832 }
20833 }
20834
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_subtile)20835 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
20836 TEST_REQUIRES_X86_AVX;
20837 for (size_t k = 9; k < 16; k++) {
20838 for (uint32_t n = 1; n <= 4; n++) {
20839 for (uint32_t m = 1; m <= 1; m++) {
20840 GemmMicrokernelTester()
20841 .mr(1)
20842 .nr(4)
20843 .kr(8)
20844 .sr(1)
20845 .m(m)
20846 .n(n)
20847 .k(k)
20848 .iterations(1)
20849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20850 }
20851 }
20852 }
20853 }
20854
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8)20855 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
20856 TEST_REQUIRES_X86_AVX;
20857 for (size_t k = 16; k <= 80; k += 8) {
20858 GemmMicrokernelTester()
20859 .mr(1)
20860 .nr(4)
20861 .kr(8)
20862 .sr(1)
20863 .m(1)
20864 .n(4)
20865 .k(k)
20866 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20867 }
20868 }
20869
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_subtile)20870 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
20871 TEST_REQUIRES_X86_AVX;
20872 for (size_t k = 16; k <= 80; k += 8) {
20873 for (uint32_t n = 1; n <= 4; n++) {
20874 for (uint32_t m = 1; m <= 1; m++) {
20875 GemmMicrokernelTester()
20876 .mr(1)
20877 .nr(4)
20878 .kr(8)
20879 .sr(1)
20880 .m(m)
20881 .n(n)
20882 .k(k)
20883 .iterations(1)
20884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20885 }
20886 }
20887 }
20888 }
20889
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4)20890 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
20891 TEST_REQUIRES_X86_AVX;
20892 for (uint32_t n = 5; n < 8; n++) {
20893 for (size_t k = 1; k <= 40; k += 9) {
20894 GemmMicrokernelTester()
20895 .mr(1)
20896 .nr(4)
20897 .kr(8)
20898 .sr(1)
20899 .m(1)
20900 .n(n)
20901 .k(k)
20902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20903 }
20904 }
20905 }
20906
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_cn)20907 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
20908 TEST_REQUIRES_X86_AVX;
20909 for (uint32_t n = 5; n < 8; n++) {
20910 for (size_t k = 1; k <= 40; k += 9) {
20911 GemmMicrokernelTester()
20912 .mr(1)
20913 .nr(4)
20914 .kr(8)
20915 .sr(1)
20916 .m(1)
20917 .n(n)
20918 .k(k)
20919 .cn_stride(7)
20920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20921 }
20922 }
20923 }
20924
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_subtile)20925 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
20926 TEST_REQUIRES_X86_AVX;
20927 for (uint32_t n = 5; n < 8; n++) {
20928 for (size_t k = 1; k <= 40; k += 9) {
20929 for (uint32_t m = 1; m <= 1; m++) {
20930 GemmMicrokernelTester()
20931 .mr(1)
20932 .nr(4)
20933 .kr(8)
20934 .sr(1)
20935 .m(m)
20936 .n(n)
20937 .k(k)
20938 .iterations(1)
20939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20940 }
20941 }
20942 }
20943 }
20944
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4)20945 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
20946 TEST_REQUIRES_X86_AVX;
20947 for (uint32_t n = 8; n <= 12; n += 4) {
20948 for (size_t k = 1; k <= 40; k += 9) {
20949 GemmMicrokernelTester()
20950 .mr(1)
20951 .nr(4)
20952 .kr(8)
20953 .sr(1)
20954 .m(1)
20955 .n(n)
20956 .k(k)
20957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20958 }
20959 }
20960 }
20961
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_cn)20962 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
20963 TEST_REQUIRES_X86_AVX;
20964 for (uint32_t n = 8; n <= 12; n += 4) {
20965 for (size_t k = 1; k <= 40; k += 9) {
20966 GemmMicrokernelTester()
20967 .mr(1)
20968 .nr(4)
20969 .kr(8)
20970 .sr(1)
20971 .m(1)
20972 .n(n)
20973 .k(k)
20974 .cn_stride(7)
20975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20976 }
20977 }
20978 }
20979
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_subtile)20980 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
20981 TEST_REQUIRES_X86_AVX;
20982 for (uint32_t n = 8; n <= 12; n += 4) {
20983 for (size_t k = 1; k <= 40; k += 9) {
20984 for (uint32_t m = 1; m <= 1; m++) {
20985 GemmMicrokernelTester()
20986 .mr(1)
20987 .nr(4)
20988 .kr(8)
20989 .sr(1)
20990 .m(m)
20991 .n(n)
20992 .k(k)
20993 .iterations(1)
20994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20995 }
20996 }
20997 }
20998 }
20999
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel)21000 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel) {
21001 TEST_REQUIRES_X86_AVX;
21002 for (size_t k = 1; k <= 40; k += 9) {
21003 GemmMicrokernelTester()
21004 .mr(1)
21005 .nr(4)
21006 .kr(8)
21007 .sr(1)
21008 .m(1)
21009 .n(4)
21010 .k(k)
21011 .ks(3)
21012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21013 }
21014 }
21015
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel_subtile)21016 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel_subtile) {
21017 TEST_REQUIRES_X86_AVX;
21018 for (size_t k = 1; k <= 40; k += 9) {
21019 for (uint32_t n = 1; n <= 4; n++) {
21020 for (uint32_t m = 1; m <= 1; m++) {
21021 GemmMicrokernelTester()
21022 .mr(1)
21023 .nr(4)
21024 .kr(8)
21025 .sr(1)
21026 .m(m)
21027 .n(n)
21028 .k(k)
21029 .ks(3)
21030 .iterations(1)
21031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21032 }
21033 }
21034 }
21035 }
21036
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_small_kernel)21037 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
21038 TEST_REQUIRES_X86_AVX;
21039 for (uint32_t n = 5; n < 8; n++) {
21040 for (size_t k = 1; k <= 40; k += 9) {
21041 GemmMicrokernelTester()
21042 .mr(1)
21043 .nr(4)
21044 .kr(8)
21045 .sr(1)
21046 .m(1)
21047 .n(n)
21048 .k(k)
21049 .ks(3)
21050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21051 }
21052 }
21053 }
21054
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_small_kernel)21055 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_small_kernel) {
21056 TEST_REQUIRES_X86_AVX;
21057 for (uint32_t n = 8; n <= 12; n += 4) {
21058 for (size_t k = 1; k <= 40; k += 9) {
21059 GemmMicrokernelTester()
21060 .mr(1)
21061 .nr(4)
21062 .kr(8)
21063 .sr(1)
21064 .m(1)
21065 .n(n)
21066 .k(k)
21067 .ks(3)
21068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21069 }
21070 }
21071 }
21072
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm_subtile)21073 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
21074 TEST_REQUIRES_X86_AVX;
21075 for (size_t k = 1; k <= 40; k += 9) {
21076 for (uint32_t n = 1; n <= 4; n++) {
21077 for (uint32_t m = 1; m <= 1; m++) {
21078 GemmMicrokernelTester()
21079 .mr(1)
21080 .nr(4)
21081 .kr(8)
21082 .sr(1)
21083 .m(m)
21084 .n(n)
21085 .k(k)
21086 .cm_stride(7)
21087 .iterations(1)
21088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21089 }
21090 }
21091 }
21092 }
21093
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,a_offset)21094 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, a_offset) {
21095 TEST_REQUIRES_X86_AVX;
21096 for (size_t k = 1; k <= 40; k += 9) {
21097 GemmMicrokernelTester()
21098 .mr(1)
21099 .nr(4)
21100 .kr(8)
21101 .sr(1)
21102 .m(1)
21103 .n(4)
21104 .k(k)
21105 .ks(3)
21106 .a_offset(43)
21107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21108 }
21109 }
21110
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,zero)21111 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, zero) {
21112 TEST_REQUIRES_X86_AVX;
21113 for (size_t k = 1; k <= 40; k += 9) {
21114 for (uint32_t mz = 0; mz < 1; mz++) {
21115 GemmMicrokernelTester()
21116 .mr(1)
21117 .nr(4)
21118 .kr(8)
21119 .sr(1)
21120 .m(1)
21121 .n(4)
21122 .k(k)
21123 .ks(3)
21124 .a_offset(43)
21125 .zero_index(mz)
21126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21127 }
21128 }
21129 }
21130
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmin)21131 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
21132 TEST_REQUIRES_X86_AVX;
21133 GemmMicrokernelTester()
21134 .mr(1)
21135 .nr(4)
21136 .kr(8)
21137 .sr(1)
21138 .m(1)
21139 .n(4)
21140 .k(8)
21141 .qmin(128)
21142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21143 }
21144
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmax)21145 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
21146 TEST_REQUIRES_X86_AVX;
21147 GemmMicrokernelTester()
21148 .mr(1)
21149 .nr(4)
21150 .kr(8)
21151 .sr(1)
21152 .m(1)
21153 .n(4)
21154 .k(8)
21155 .qmax(128)
21156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21157 }
21158
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm)21159 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
21160 TEST_REQUIRES_X86_AVX;
21161 GemmMicrokernelTester()
21162 .mr(1)
21163 .nr(4)
21164 .kr(8)
21165 .sr(1)
21166 .m(1)
21167 .n(4)
21168 .k(8)
21169 .cm_stride(7)
21170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21171 }
21172
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_a_zero_point)21173 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_a_zero_point) {
21174 TEST_REQUIRES_X86_AVX;
21175 for (size_t k = 1; k <= 40; k += 9) {
21176 GemmMicrokernelTester()
21177 .mr(1)
21178 .nr(4)
21179 .kr(8)
21180 .sr(1)
21181 .m(1)
21182 .n(4)
21183 .k(k)
21184 .a_zero_point(0)
21185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21186 }
21187 }
21188
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_b_zero_point)21189 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_b_zero_point) {
21190 TEST_REQUIRES_X86_AVX;
21191 for (size_t k = 1; k <= 40; k += 9) {
21192 GemmMicrokernelTester()
21193 .mr(1)
21194 .nr(4)
21195 .kr(8)
21196 .sr(1)
21197 .m(1)
21198 .n(4)
21199 .k(k)
21200 .b_zero_point(0)
21201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21202 }
21203 }
21204
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_zero_point)21205 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_zero_point) {
21206 TEST_REQUIRES_X86_AVX;
21207 for (size_t k = 1; k <= 40; k += 9) {
21208 GemmMicrokernelTester()
21209 .mr(1)
21210 .nr(4)
21211 .kr(8)
21212 .sr(1)
21213 .m(1)
21214 .n(4)
21215 .k(k)
21216 .a_zero_point(0)
21217 .b_zero_point(0)
21218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21219 }
21220 }
21221 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21222
21223
21224 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8)21225 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
21226 TEST_REQUIRES_X86_XOP;
21227 GemmMicrokernelTester()
21228 .mr(1)
21229 .nr(4)
21230 .kr(8)
21231 .sr(1)
21232 .m(1)
21233 .n(4)
21234 .k(8)
21235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21236 }
21237
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cn)21238 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
21239 TEST_REQUIRES_X86_XOP;
21240 GemmMicrokernelTester()
21241 .mr(1)
21242 .nr(4)
21243 .kr(8)
21244 .sr(1)
21245 .m(1)
21246 .n(4)
21247 .k(8)
21248 .cn_stride(7)
21249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21250 }
21251
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile)21252 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
21253 TEST_REQUIRES_X86_XOP;
21254 for (uint32_t n = 1; n <= 4; n++) {
21255 for (uint32_t m = 1; m <= 1; m++) {
21256 GemmMicrokernelTester()
21257 .mr(1)
21258 .nr(4)
21259 .kr(8)
21260 .sr(1)
21261 .m(m)
21262 .n(n)
21263 .k(8)
21264 .iterations(1)
21265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21266 }
21267 }
21268 }
21269
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_m)21270 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
21271 TEST_REQUIRES_X86_XOP;
21272 for (uint32_t m = 1; m <= 1; m++) {
21273 GemmMicrokernelTester()
21274 .mr(1)
21275 .nr(4)
21276 .kr(8)
21277 .sr(1)
21278 .m(m)
21279 .n(4)
21280 .k(8)
21281 .iterations(1)
21282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21283 }
21284 }
21285
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_n)21286 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
21287 TEST_REQUIRES_X86_XOP;
21288 for (uint32_t n = 1; n <= 4; n++) {
21289 GemmMicrokernelTester()
21290 .mr(1)
21291 .nr(4)
21292 .kr(8)
21293 .sr(1)
21294 .m(1)
21295 .n(n)
21296 .k(8)
21297 .iterations(1)
21298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21299 }
21300 }
21301
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8)21302 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
21303 TEST_REQUIRES_X86_XOP;
21304 for (size_t k = 1; k < 8; k++) {
21305 GemmMicrokernelTester()
21306 .mr(1)
21307 .nr(4)
21308 .kr(8)
21309 .sr(1)
21310 .m(1)
21311 .n(4)
21312 .k(k)
21313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21314 }
21315 }
21316
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_subtile)21317 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
21318 TEST_REQUIRES_X86_XOP;
21319 for (size_t k = 1; k < 8; k++) {
21320 for (uint32_t n = 1; n <= 4; n++) {
21321 for (uint32_t m = 1; m <= 1; m++) {
21322 GemmMicrokernelTester()
21323 .mr(1)
21324 .nr(4)
21325 .kr(8)
21326 .sr(1)
21327 .m(m)
21328 .n(n)
21329 .k(k)
21330 .iterations(1)
21331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21332 }
21333 }
21334 }
21335 }
21336
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8)21337 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
21338 TEST_REQUIRES_X86_XOP;
21339 for (size_t k = 9; k < 16; k++) {
21340 GemmMicrokernelTester()
21341 .mr(1)
21342 .nr(4)
21343 .kr(8)
21344 .sr(1)
21345 .m(1)
21346 .n(4)
21347 .k(k)
21348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21349 }
21350 }
21351
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_subtile)21352 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
21353 TEST_REQUIRES_X86_XOP;
21354 for (size_t k = 9; k < 16; k++) {
21355 for (uint32_t n = 1; n <= 4; n++) {
21356 for (uint32_t m = 1; m <= 1; m++) {
21357 GemmMicrokernelTester()
21358 .mr(1)
21359 .nr(4)
21360 .kr(8)
21361 .sr(1)
21362 .m(m)
21363 .n(n)
21364 .k(k)
21365 .iterations(1)
21366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21367 }
21368 }
21369 }
21370 }
21371
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8)21372 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
21373 TEST_REQUIRES_X86_XOP;
21374 for (size_t k = 16; k <= 80; k += 8) {
21375 GemmMicrokernelTester()
21376 .mr(1)
21377 .nr(4)
21378 .kr(8)
21379 .sr(1)
21380 .m(1)
21381 .n(4)
21382 .k(k)
21383 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21384 }
21385 }
21386
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_subtile)21387 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
21388 TEST_REQUIRES_X86_XOP;
21389 for (size_t k = 16; k <= 80; k += 8) {
21390 for (uint32_t n = 1; n <= 4; n++) {
21391 for (uint32_t m = 1; m <= 1; m++) {
21392 GemmMicrokernelTester()
21393 .mr(1)
21394 .nr(4)
21395 .kr(8)
21396 .sr(1)
21397 .m(m)
21398 .n(n)
21399 .k(k)
21400 .iterations(1)
21401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21402 }
21403 }
21404 }
21405 }
21406
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4)21407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
21408 TEST_REQUIRES_X86_XOP;
21409 for (uint32_t n = 5; n < 8; n++) {
21410 for (size_t k = 1; k <= 40; k += 9) {
21411 GemmMicrokernelTester()
21412 .mr(1)
21413 .nr(4)
21414 .kr(8)
21415 .sr(1)
21416 .m(1)
21417 .n(n)
21418 .k(k)
21419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21420 }
21421 }
21422 }
21423
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_cn)21424 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
21425 TEST_REQUIRES_X86_XOP;
21426 for (uint32_t n = 5; n < 8; n++) {
21427 for (size_t k = 1; k <= 40; k += 9) {
21428 GemmMicrokernelTester()
21429 .mr(1)
21430 .nr(4)
21431 .kr(8)
21432 .sr(1)
21433 .m(1)
21434 .n(n)
21435 .k(k)
21436 .cn_stride(7)
21437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21438 }
21439 }
21440 }
21441
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_subtile)21442 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
21443 TEST_REQUIRES_X86_XOP;
21444 for (uint32_t n = 5; n < 8; n++) {
21445 for (size_t k = 1; k <= 40; k += 9) {
21446 for (uint32_t m = 1; m <= 1; m++) {
21447 GemmMicrokernelTester()
21448 .mr(1)
21449 .nr(4)
21450 .kr(8)
21451 .sr(1)
21452 .m(m)
21453 .n(n)
21454 .k(k)
21455 .iterations(1)
21456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21457 }
21458 }
21459 }
21460 }
21461
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4)21462 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
21463 TEST_REQUIRES_X86_XOP;
21464 for (uint32_t n = 8; n <= 12; n += 4) {
21465 for (size_t k = 1; k <= 40; k += 9) {
21466 GemmMicrokernelTester()
21467 .mr(1)
21468 .nr(4)
21469 .kr(8)
21470 .sr(1)
21471 .m(1)
21472 .n(n)
21473 .k(k)
21474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21475 }
21476 }
21477 }
21478
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_cn)21479 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
21480 TEST_REQUIRES_X86_XOP;
21481 for (uint32_t n = 8; n <= 12; n += 4) {
21482 for (size_t k = 1; k <= 40; k += 9) {
21483 GemmMicrokernelTester()
21484 .mr(1)
21485 .nr(4)
21486 .kr(8)
21487 .sr(1)
21488 .m(1)
21489 .n(n)
21490 .k(k)
21491 .cn_stride(7)
21492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21493 }
21494 }
21495 }
21496
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_subtile)21497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
21498 TEST_REQUIRES_X86_XOP;
21499 for (uint32_t n = 8; n <= 12; n += 4) {
21500 for (size_t k = 1; k <= 40; k += 9) {
21501 for (uint32_t m = 1; m <= 1; m++) {
21502 GemmMicrokernelTester()
21503 .mr(1)
21504 .nr(4)
21505 .kr(8)
21506 .sr(1)
21507 .m(m)
21508 .n(n)
21509 .k(k)
21510 .iterations(1)
21511 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21512 }
21513 }
21514 }
21515 }
21516
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel)21517 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel) {
21518 TEST_REQUIRES_X86_XOP;
21519 for (size_t k = 1; k <= 40; k += 9) {
21520 GemmMicrokernelTester()
21521 .mr(1)
21522 .nr(4)
21523 .kr(8)
21524 .sr(1)
21525 .m(1)
21526 .n(4)
21527 .k(k)
21528 .ks(3)
21529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21530 }
21531 }
21532
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel_subtile)21533 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel_subtile) {
21534 TEST_REQUIRES_X86_XOP;
21535 for (size_t k = 1; k <= 40; k += 9) {
21536 for (uint32_t n = 1; n <= 4; n++) {
21537 for (uint32_t m = 1; m <= 1; m++) {
21538 GemmMicrokernelTester()
21539 .mr(1)
21540 .nr(4)
21541 .kr(8)
21542 .sr(1)
21543 .m(m)
21544 .n(n)
21545 .k(k)
21546 .ks(3)
21547 .iterations(1)
21548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21549 }
21550 }
21551 }
21552 }
21553
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_small_kernel)21554 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
21555 TEST_REQUIRES_X86_XOP;
21556 for (uint32_t n = 5; n < 8; n++) {
21557 for (size_t k = 1; k <= 40; k += 9) {
21558 GemmMicrokernelTester()
21559 .mr(1)
21560 .nr(4)
21561 .kr(8)
21562 .sr(1)
21563 .m(1)
21564 .n(n)
21565 .k(k)
21566 .ks(3)
21567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21568 }
21569 }
21570 }
21571
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_small_kernel)21572 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_small_kernel) {
21573 TEST_REQUIRES_X86_XOP;
21574 for (uint32_t n = 8; n <= 12; n += 4) {
21575 for (size_t k = 1; k <= 40; k += 9) {
21576 GemmMicrokernelTester()
21577 .mr(1)
21578 .nr(4)
21579 .kr(8)
21580 .sr(1)
21581 .m(1)
21582 .n(n)
21583 .k(k)
21584 .ks(3)
21585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21586 }
21587 }
21588 }
21589
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm_subtile)21590 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
21591 TEST_REQUIRES_X86_XOP;
21592 for (size_t k = 1; k <= 40; k += 9) {
21593 for (uint32_t n = 1; n <= 4; n++) {
21594 for (uint32_t m = 1; m <= 1; m++) {
21595 GemmMicrokernelTester()
21596 .mr(1)
21597 .nr(4)
21598 .kr(8)
21599 .sr(1)
21600 .m(m)
21601 .n(n)
21602 .k(k)
21603 .cm_stride(7)
21604 .iterations(1)
21605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21606 }
21607 }
21608 }
21609 }
21610
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,a_offset)21611 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, a_offset) {
21612 TEST_REQUIRES_X86_XOP;
21613 for (size_t k = 1; k <= 40; k += 9) {
21614 GemmMicrokernelTester()
21615 .mr(1)
21616 .nr(4)
21617 .kr(8)
21618 .sr(1)
21619 .m(1)
21620 .n(4)
21621 .k(k)
21622 .ks(3)
21623 .a_offset(43)
21624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21625 }
21626 }
21627
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,zero)21628 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, zero) {
21629 TEST_REQUIRES_X86_XOP;
21630 for (size_t k = 1; k <= 40; k += 9) {
21631 for (uint32_t mz = 0; mz < 1; mz++) {
21632 GemmMicrokernelTester()
21633 .mr(1)
21634 .nr(4)
21635 .kr(8)
21636 .sr(1)
21637 .m(1)
21638 .n(4)
21639 .k(k)
21640 .ks(3)
21641 .a_offset(43)
21642 .zero_index(mz)
21643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21644 }
21645 }
21646 }
21647
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmin)21648 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
21649 TEST_REQUIRES_X86_XOP;
21650 GemmMicrokernelTester()
21651 .mr(1)
21652 .nr(4)
21653 .kr(8)
21654 .sr(1)
21655 .m(1)
21656 .n(4)
21657 .k(8)
21658 .qmin(128)
21659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21660 }
21661
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmax)21662 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
21663 TEST_REQUIRES_X86_XOP;
21664 GemmMicrokernelTester()
21665 .mr(1)
21666 .nr(4)
21667 .kr(8)
21668 .sr(1)
21669 .m(1)
21670 .n(4)
21671 .k(8)
21672 .qmax(128)
21673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21674 }
21675
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm)21676 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
21677 TEST_REQUIRES_X86_XOP;
21678 GemmMicrokernelTester()
21679 .mr(1)
21680 .nr(4)
21681 .kr(8)
21682 .sr(1)
21683 .m(1)
21684 .n(4)
21685 .k(8)
21686 .cm_stride(7)
21687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21688 }
21689
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_a_zero_point)21690 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_a_zero_point) {
21691 TEST_REQUIRES_X86_XOP;
21692 for (size_t k = 1; k <= 40; k += 9) {
21693 GemmMicrokernelTester()
21694 .mr(1)
21695 .nr(4)
21696 .kr(8)
21697 .sr(1)
21698 .m(1)
21699 .n(4)
21700 .k(k)
21701 .a_zero_point(0)
21702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21703 }
21704 }
21705
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_b_zero_point)21706 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_b_zero_point) {
21707 TEST_REQUIRES_X86_XOP;
21708 for (size_t k = 1; k <= 40; k += 9) {
21709 GemmMicrokernelTester()
21710 .mr(1)
21711 .nr(4)
21712 .kr(8)
21713 .sr(1)
21714 .m(1)
21715 .n(4)
21716 .k(k)
21717 .b_zero_point(0)
21718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21719 }
21720 }
21721
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_zero_point)21722 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_zero_point) {
21723 TEST_REQUIRES_X86_XOP;
21724 for (size_t k = 1; k <= 40; k += 9) {
21725 GemmMicrokernelTester()
21726 .mr(1)
21727 .nr(4)
21728 .kr(8)
21729 .sr(1)
21730 .m(1)
21731 .n(4)
21732 .k(k)
21733 .a_zero_point(0)
21734 .b_zero_point(0)
21735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21736 }
21737 }
21738 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21739
21740
21741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)21742 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
21743 TEST_REQUIRES_X86_SSE2;
21744 GemmMicrokernelTester()
21745 .mr(1)
21746 .nr(4)
21747 .kr(8)
21748 .sr(1)
21749 .m(1)
21750 .n(4)
21751 .k(8)
21752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21753 }
21754
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)21755 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
21756 TEST_REQUIRES_X86_SSE2;
21757 GemmMicrokernelTester()
21758 .mr(1)
21759 .nr(4)
21760 .kr(8)
21761 .sr(1)
21762 .m(1)
21763 .n(4)
21764 .k(8)
21765 .cn_stride(7)
21766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21767 }
21768
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)21769 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
21770 TEST_REQUIRES_X86_SSE2;
21771 for (uint32_t n = 1; n <= 4; n++) {
21772 for (uint32_t m = 1; m <= 1; m++) {
21773 GemmMicrokernelTester()
21774 .mr(1)
21775 .nr(4)
21776 .kr(8)
21777 .sr(1)
21778 .m(m)
21779 .n(n)
21780 .k(8)
21781 .iterations(1)
21782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21783 }
21784 }
21785 }
21786
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)21787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
21788 TEST_REQUIRES_X86_SSE2;
21789 for (uint32_t m = 1; m <= 1; m++) {
21790 GemmMicrokernelTester()
21791 .mr(1)
21792 .nr(4)
21793 .kr(8)
21794 .sr(1)
21795 .m(m)
21796 .n(4)
21797 .k(8)
21798 .iterations(1)
21799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21800 }
21801 }
21802
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)21803 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
21804 TEST_REQUIRES_X86_SSE2;
21805 for (uint32_t n = 1; n <= 4; n++) {
21806 GemmMicrokernelTester()
21807 .mr(1)
21808 .nr(4)
21809 .kr(8)
21810 .sr(1)
21811 .m(1)
21812 .n(n)
21813 .k(8)
21814 .iterations(1)
21815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21816 }
21817 }
21818
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)21819 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
21820 TEST_REQUIRES_X86_SSE2;
21821 for (size_t k = 1; k < 8; k++) {
21822 GemmMicrokernelTester()
21823 .mr(1)
21824 .nr(4)
21825 .kr(8)
21826 .sr(1)
21827 .m(1)
21828 .n(4)
21829 .k(k)
21830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21831 }
21832 }
21833
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)21834 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
21835 TEST_REQUIRES_X86_SSE2;
21836 for (size_t k = 1; k < 8; k++) {
21837 for (uint32_t n = 1; n <= 4; n++) {
21838 for (uint32_t m = 1; m <= 1; m++) {
21839 GemmMicrokernelTester()
21840 .mr(1)
21841 .nr(4)
21842 .kr(8)
21843 .sr(1)
21844 .m(m)
21845 .n(n)
21846 .k(k)
21847 .iterations(1)
21848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21849 }
21850 }
21851 }
21852 }
21853
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)21854 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
21855 TEST_REQUIRES_X86_SSE2;
21856 for (size_t k = 9; k < 16; k++) {
21857 GemmMicrokernelTester()
21858 .mr(1)
21859 .nr(4)
21860 .kr(8)
21861 .sr(1)
21862 .m(1)
21863 .n(4)
21864 .k(k)
21865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21866 }
21867 }
21868
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)21869 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
21870 TEST_REQUIRES_X86_SSE2;
21871 for (size_t k = 9; k < 16; k++) {
21872 for (uint32_t n = 1; n <= 4; n++) {
21873 for (uint32_t m = 1; m <= 1; m++) {
21874 GemmMicrokernelTester()
21875 .mr(1)
21876 .nr(4)
21877 .kr(8)
21878 .sr(1)
21879 .m(m)
21880 .n(n)
21881 .k(k)
21882 .iterations(1)
21883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21884 }
21885 }
21886 }
21887 }
21888
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)21889 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
21890 TEST_REQUIRES_X86_SSE2;
21891 for (size_t k = 16; k <= 80; k += 8) {
21892 GemmMicrokernelTester()
21893 .mr(1)
21894 .nr(4)
21895 .kr(8)
21896 .sr(1)
21897 .m(1)
21898 .n(4)
21899 .k(k)
21900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21901 }
21902 }
21903
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)21904 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
21905 TEST_REQUIRES_X86_SSE2;
21906 for (size_t k = 16; k <= 80; k += 8) {
21907 for (uint32_t n = 1; n <= 4; n++) {
21908 for (uint32_t m = 1; m <= 1; m++) {
21909 GemmMicrokernelTester()
21910 .mr(1)
21911 .nr(4)
21912 .kr(8)
21913 .sr(1)
21914 .m(m)
21915 .n(n)
21916 .k(k)
21917 .iterations(1)
21918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21919 }
21920 }
21921 }
21922 }
21923
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)21924 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
21925 TEST_REQUIRES_X86_SSE2;
21926 for (uint32_t n = 5; n < 8; n++) {
21927 for (size_t k = 1; k <= 40; k += 9) {
21928 GemmMicrokernelTester()
21929 .mr(1)
21930 .nr(4)
21931 .kr(8)
21932 .sr(1)
21933 .m(1)
21934 .n(n)
21935 .k(k)
21936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21937 }
21938 }
21939 }
21940
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)21941 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
21942 TEST_REQUIRES_X86_SSE2;
21943 for (uint32_t n = 5; n < 8; n++) {
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(1)
21947 .nr(4)
21948 .kr(8)
21949 .sr(1)
21950 .m(1)
21951 .n(n)
21952 .k(k)
21953 .cn_stride(7)
21954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21955 }
21956 }
21957 }
21958
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)21959 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
21960 TEST_REQUIRES_X86_SSE2;
21961 for (uint32_t n = 5; n < 8; n++) {
21962 for (size_t k = 1; k <= 40; k += 9) {
21963 for (uint32_t m = 1; m <= 1; m++) {
21964 GemmMicrokernelTester()
21965 .mr(1)
21966 .nr(4)
21967 .kr(8)
21968 .sr(1)
21969 .m(m)
21970 .n(n)
21971 .k(k)
21972 .iterations(1)
21973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21974 }
21975 }
21976 }
21977 }
21978
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)21979 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
21980 TEST_REQUIRES_X86_SSE2;
21981 for (uint32_t n = 8; n <= 12; n += 4) {
21982 for (size_t k = 1; k <= 40; k += 9) {
21983 GemmMicrokernelTester()
21984 .mr(1)
21985 .nr(4)
21986 .kr(8)
21987 .sr(1)
21988 .m(1)
21989 .n(n)
21990 .k(k)
21991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21992 }
21993 }
21994 }
21995
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)21996 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
21997 TEST_REQUIRES_X86_SSE2;
21998 for (uint32_t n = 8; n <= 12; n += 4) {
21999 for (size_t k = 1; k <= 40; k += 9) {
22000 GemmMicrokernelTester()
22001 .mr(1)
22002 .nr(4)
22003 .kr(8)
22004 .sr(1)
22005 .m(1)
22006 .n(n)
22007 .k(k)
22008 .cn_stride(7)
22009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22010 }
22011 }
22012 }
22013
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)22014 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
22015 TEST_REQUIRES_X86_SSE2;
22016 for (uint32_t n = 8; n <= 12; n += 4) {
22017 for (size_t k = 1; k <= 40; k += 9) {
22018 for (uint32_t m = 1; m <= 1; m++) {
22019 GemmMicrokernelTester()
22020 .mr(1)
22021 .nr(4)
22022 .kr(8)
22023 .sr(1)
22024 .m(m)
22025 .n(n)
22026 .k(k)
22027 .iterations(1)
22028 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22029 }
22030 }
22031 }
22032 }
22033
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel)22034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
22035 TEST_REQUIRES_X86_SSE2;
22036 for (size_t k = 1; k <= 40; k += 9) {
22037 GemmMicrokernelTester()
22038 .mr(1)
22039 .nr(4)
22040 .kr(8)
22041 .sr(1)
22042 .m(1)
22043 .n(4)
22044 .k(k)
22045 .ks(3)
22046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22047 }
22048 }
22049
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel_subtile)22050 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
22051 TEST_REQUIRES_X86_SSE2;
22052 for (size_t k = 1; k <= 40; k += 9) {
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 1; m++) {
22055 GemmMicrokernelTester()
22056 .mr(1)
22057 .nr(4)
22058 .kr(8)
22059 .sr(1)
22060 .m(m)
22061 .n(n)
22062 .k(k)
22063 .ks(3)
22064 .iterations(1)
22065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22066 }
22067 }
22068 }
22069 }
22070
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_small_kernel)22071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
22072 TEST_REQUIRES_X86_SSE2;
22073 for (uint32_t n = 5; n < 8; n++) {
22074 for (size_t k = 1; k <= 40; k += 9) {
22075 GemmMicrokernelTester()
22076 .mr(1)
22077 .nr(4)
22078 .kr(8)
22079 .sr(1)
22080 .m(1)
22081 .n(n)
22082 .k(k)
22083 .ks(3)
22084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22085 }
22086 }
22087 }
22088
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_small_kernel)22089 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
22090 TEST_REQUIRES_X86_SSE2;
22091 for (uint32_t n = 8; n <= 12; n += 4) {
22092 for (size_t k = 1; k <= 40; k += 9) {
22093 GemmMicrokernelTester()
22094 .mr(1)
22095 .nr(4)
22096 .kr(8)
22097 .sr(1)
22098 .m(1)
22099 .n(n)
22100 .k(k)
22101 .ks(3)
22102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22103 }
22104 }
22105 }
22106
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)22107 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
22108 TEST_REQUIRES_X86_SSE2;
22109 for (size_t k = 1; k <= 40; k += 9) {
22110 for (uint32_t n = 1; n <= 4; n++) {
22111 for (uint32_t m = 1; m <= 1; m++) {
22112 GemmMicrokernelTester()
22113 .mr(1)
22114 .nr(4)
22115 .kr(8)
22116 .sr(1)
22117 .m(m)
22118 .n(n)
22119 .k(k)
22120 .cm_stride(7)
22121 .iterations(1)
22122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22123 }
22124 }
22125 }
22126 }
22127
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,a_offset)22128 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
22129 TEST_REQUIRES_X86_SSE2;
22130 for (size_t k = 1; k <= 40; k += 9) {
22131 GemmMicrokernelTester()
22132 .mr(1)
22133 .nr(4)
22134 .kr(8)
22135 .sr(1)
22136 .m(1)
22137 .n(4)
22138 .k(k)
22139 .ks(3)
22140 .a_offset(43)
22141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22142 }
22143 }
22144
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,zero)22145 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
22146 TEST_REQUIRES_X86_SSE2;
22147 for (size_t k = 1; k <= 40; k += 9) {
22148 for (uint32_t mz = 0; mz < 1; mz++) {
22149 GemmMicrokernelTester()
22150 .mr(1)
22151 .nr(4)
22152 .kr(8)
22153 .sr(1)
22154 .m(1)
22155 .n(4)
22156 .k(k)
22157 .ks(3)
22158 .a_offset(43)
22159 .zero_index(mz)
22160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22161 }
22162 }
22163 }
22164
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)22165 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
22166 TEST_REQUIRES_X86_SSE2;
22167 GemmMicrokernelTester()
22168 .mr(1)
22169 .nr(4)
22170 .kr(8)
22171 .sr(1)
22172 .m(1)
22173 .n(4)
22174 .k(8)
22175 .qmin(128)
22176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22177 }
22178
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)22179 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
22180 TEST_REQUIRES_X86_SSE2;
22181 GemmMicrokernelTester()
22182 .mr(1)
22183 .nr(4)
22184 .kr(8)
22185 .sr(1)
22186 .m(1)
22187 .n(4)
22188 .k(8)
22189 .qmax(128)
22190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22191 }
22192
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)22193 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
22194 TEST_REQUIRES_X86_SSE2;
22195 GemmMicrokernelTester()
22196 .mr(1)
22197 .nr(4)
22198 .kr(8)
22199 .sr(1)
22200 .m(1)
22201 .n(4)
22202 .k(8)
22203 .cm_stride(7)
22204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22205 }
22206
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_a_zero_point)22207 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_a_zero_point) {
22208 TEST_REQUIRES_X86_SSE2;
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(1)
22212 .nr(4)
22213 .kr(8)
22214 .sr(1)
22215 .m(1)
22216 .n(4)
22217 .k(k)
22218 .a_zero_point(0)
22219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22220 }
22221 }
22222
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_b_zero_point)22223 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_b_zero_point) {
22224 TEST_REQUIRES_X86_SSE2;
22225 for (size_t k = 1; k <= 40; k += 9) {
22226 GemmMicrokernelTester()
22227 .mr(1)
22228 .nr(4)
22229 .kr(8)
22230 .sr(1)
22231 .m(1)
22232 .n(4)
22233 .k(k)
22234 .b_zero_point(0)
22235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22236 }
22237 }
22238
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_zero_point)22239 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_zero_point) {
22240 TEST_REQUIRES_X86_SSE2;
22241 for (size_t k = 1; k <= 40; k += 9) {
22242 GemmMicrokernelTester()
22243 .mr(1)
22244 .nr(4)
22245 .kr(8)
22246 .sr(1)
22247 .m(1)
22248 .n(4)
22249 .k(k)
22250 .a_zero_point(0)
22251 .b_zero_point(0)
22252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22253 }
22254 }
22255 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22256
22257
22258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)22259 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
22260 TEST_REQUIRES_X86_SSE2;
22261 GemmMicrokernelTester()
22262 .mr(2)
22263 .nr(4)
22264 .kr(8)
22265 .sr(1)
22266 .m(2)
22267 .n(4)
22268 .k(8)
22269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22270 }
22271
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)22272 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
22273 TEST_REQUIRES_X86_SSE2;
22274 GemmMicrokernelTester()
22275 .mr(2)
22276 .nr(4)
22277 .kr(8)
22278 .sr(1)
22279 .m(2)
22280 .n(4)
22281 .k(8)
22282 .cn_stride(7)
22283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22284 }
22285
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)22286 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
22287 TEST_REQUIRES_X86_SSE2;
22288 for (uint32_t n = 1; n <= 4; n++) {
22289 for (uint32_t m = 1; m <= 2; m++) {
22290 GemmMicrokernelTester()
22291 .mr(2)
22292 .nr(4)
22293 .kr(8)
22294 .sr(1)
22295 .m(m)
22296 .n(n)
22297 .k(8)
22298 .iterations(1)
22299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22300 }
22301 }
22302 }
22303
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)22304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
22305 TEST_REQUIRES_X86_SSE2;
22306 for (uint32_t m = 1; m <= 2; m++) {
22307 GemmMicrokernelTester()
22308 .mr(2)
22309 .nr(4)
22310 .kr(8)
22311 .sr(1)
22312 .m(m)
22313 .n(4)
22314 .k(8)
22315 .iterations(1)
22316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22317 }
22318 }
22319
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)22320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
22321 TEST_REQUIRES_X86_SSE2;
22322 for (uint32_t n = 1; n <= 4; n++) {
22323 GemmMicrokernelTester()
22324 .mr(2)
22325 .nr(4)
22326 .kr(8)
22327 .sr(1)
22328 .m(2)
22329 .n(n)
22330 .k(8)
22331 .iterations(1)
22332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22333 }
22334 }
22335
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)22336 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
22337 TEST_REQUIRES_X86_SSE2;
22338 for (size_t k = 1; k < 8; k++) {
22339 GemmMicrokernelTester()
22340 .mr(2)
22341 .nr(4)
22342 .kr(8)
22343 .sr(1)
22344 .m(2)
22345 .n(4)
22346 .k(k)
22347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22348 }
22349 }
22350
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)22351 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
22352 TEST_REQUIRES_X86_SSE2;
22353 for (size_t k = 1; k < 8; k++) {
22354 for (uint32_t n = 1; n <= 4; n++) {
22355 for (uint32_t m = 1; m <= 2; m++) {
22356 GemmMicrokernelTester()
22357 .mr(2)
22358 .nr(4)
22359 .kr(8)
22360 .sr(1)
22361 .m(m)
22362 .n(n)
22363 .k(k)
22364 .iterations(1)
22365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22366 }
22367 }
22368 }
22369 }
22370
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)22371 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
22372 TEST_REQUIRES_X86_SSE2;
22373 for (size_t k = 9; k < 16; k++) {
22374 GemmMicrokernelTester()
22375 .mr(2)
22376 .nr(4)
22377 .kr(8)
22378 .sr(1)
22379 .m(2)
22380 .n(4)
22381 .k(k)
22382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22383 }
22384 }
22385
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)22386 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
22387 TEST_REQUIRES_X86_SSE2;
22388 for (size_t k = 9; k < 16; k++) {
22389 for (uint32_t n = 1; n <= 4; n++) {
22390 for (uint32_t m = 1; m <= 2; m++) {
22391 GemmMicrokernelTester()
22392 .mr(2)
22393 .nr(4)
22394 .kr(8)
22395 .sr(1)
22396 .m(m)
22397 .n(n)
22398 .k(k)
22399 .iterations(1)
22400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22401 }
22402 }
22403 }
22404 }
22405
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)22406 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
22407 TEST_REQUIRES_X86_SSE2;
22408 for (size_t k = 16; k <= 80; k += 8) {
22409 GemmMicrokernelTester()
22410 .mr(2)
22411 .nr(4)
22412 .kr(8)
22413 .sr(1)
22414 .m(2)
22415 .n(4)
22416 .k(k)
22417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22418 }
22419 }
22420
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)22421 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
22422 TEST_REQUIRES_X86_SSE2;
22423 for (size_t k = 16; k <= 80; k += 8) {
22424 for (uint32_t n = 1; n <= 4; n++) {
22425 for (uint32_t m = 1; m <= 2; m++) {
22426 GemmMicrokernelTester()
22427 .mr(2)
22428 .nr(4)
22429 .kr(8)
22430 .sr(1)
22431 .m(m)
22432 .n(n)
22433 .k(k)
22434 .iterations(1)
22435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22436 }
22437 }
22438 }
22439 }
22440
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)22441 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
22442 TEST_REQUIRES_X86_SSE2;
22443 for (uint32_t n = 5; n < 8; n++) {
22444 for (size_t k = 1; k <= 40; k += 9) {
22445 GemmMicrokernelTester()
22446 .mr(2)
22447 .nr(4)
22448 .kr(8)
22449 .sr(1)
22450 .m(2)
22451 .n(n)
22452 .k(k)
22453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22454 }
22455 }
22456 }
22457
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)22458 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
22459 TEST_REQUIRES_X86_SSE2;
22460 for (uint32_t n = 5; n < 8; n++) {
22461 for (size_t k = 1; k <= 40; k += 9) {
22462 GemmMicrokernelTester()
22463 .mr(2)
22464 .nr(4)
22465 .kr(8)
22466 .sr(1)
22467 .m(2)
22468 .n(n)
22469 .k(k)
22470 .cn_stride(7)
22471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22472 }
22473 }
22474 }
22475
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)22476 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
22477 TEST_REQUIRES_X86_SSE2;
22478 for (uint32_t n = 5; n < 8; n++) {
22479 for (size_t k = 1; k <= 40; k += 9) {
22480 for (uint32_t m = 1; m <= 2; m++) {
22481 GemmMicrokernelTester()
22482 .mr(2)
22483 .nr(4)
22484 .kr(8)
22485 .sr(1)
22486 .m(m)
22487 .n(n)
22488 .k(k)
22489 .iterations(1)
22490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22491 }
22492 }
22493 }
22494 }
22495
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)22496 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
22497 TEST_REQUIRES_X86_SSE2;
22498 for (uint32_t n = 8; n <= 12; n += 4) {
22499 for (size_t k = 1; k <= 40; k += 9) {
22500 GemmMicrokernelTester()
22501 .mr(2)
22502 .nr(4)
22503 .kr(8)
22504 .sr(1)
22505 .m(2)
22506 .n(n)
22507 .k(k)
22508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22509 }
22510 }
22511 }
22512
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)22513 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
22514 TEST_REQUIRES_X86_SSE2;
22515 for (uint32_t n = 8; n <= 12; n += 4) {
22516 for (size_t k = 1; k <= 40; k += 9) {
22517 GemmMicrokernelTester()
22518 .mr(2)
22519 .nr(4)
22520 .kr(8)
22521 .sr(1)
22522 .m(2)
22523 .n(n)
22524 .k(k)
22525 .cn_stride(7)
22526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22527 }
22528 }
22529 }
22530
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)22531 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
22532 TEST_REQUIRES_X86_SSE2;
22533 for (uint32_t n = 8; n <= 12; n += 4) {
22534 for (size_t k = 1; k <= 40; k += 9) {
22535 for (uint32_t m = 1; m <= 2; m++) {
22536 GemmMicrokernelTester()
22537 .mr(2)
22538 .nr(4)
22539 .kr(8)
22540 .sr(1)
22541 .m(m)
22542 .n(n)
22543 .k(k)
22544 .iterations(1)
22545 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22546 }
22547 }
22548 }
22549 }
22550
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)22551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
22552 TEST_REQUIRES_X86_SSE2;
22553 for (size_t k = 1; k <= 40; k += 9) {
22554 GemmMicrokernelTester()
22555 .mr(2)
22556 .nr(4)
22557 .kr(8)
22558 .sr(1)
22559 .m(2)
22560 .n(4)
22561 .k(k)
22562 .ks(3)
22563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22564 }
22565 }
22566
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)22567 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
22568 TEST_REQUIRES_X86_SSE2;
22569 for (size_t k = 1; k <= 40; k += 9) {
22570 for (uint32_t n = 1; n <= 4; n++) {
22571 for (uint32_t m = 1; m <= 2; m++) {
22572 GemmMicrokernelTester()
22573 .mr(2)
22574 .nr(4)
22575 .kr(8)
22576 .sr(1)
22577 .m(m)
22578 .n(n)
22579 .k(k)
22580 .ks(3)
22581 .iterations(1)
22582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22583 }
22584 }
22585 }
22586 }
22587
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)22588 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
22589 TEST_REQUIRES_X86_SSE2;
22590 for (uint32_t n = 5; n < 8; n++) {
22591 for (size_t k = 1; k <= 40; k += 9) {
22592 GemmMicrokernelTester()
22593 .mr(2)
22594 .nr(4)
22595 .kr(8)
22596 .sr(1)
22597 .m(2)
22598 .n(n)
22599 .k(k)
22600 .ks(3)
22601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22602 }
22603 }
22604 }
22605
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)22606 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
22607 TEST_REQUIRES_X86_SSE2;
22608 for (uint32_t n = 8; n <= 12; n += 4) {
22609 for (size_t k = 1; k <= 40; k += 9) {
22610 GemmMicrokernelTester()
22611 .mr(2)
22612 .nr(4)
22613 .kr(8)
22614 .sr(1)
22615 .m(2)
22616 .n(n)
22617 .k(k)
22618 .ks(3)
22619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22620 }
22621 }
22622 }
22623
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)22624 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
22625 TEST_REQUIRES_X86_SSE2;
22626 for (size_t k = 1; k <= 40; k += 9) {
22627 for (uint32_t n = 1; n <= 4; n++) {
22628 for (uint32_t m = 1; m <= 2; m++) {
22629 GemmMicrokernelTester()
22630 .mr(2)
22631 .nr(4)
22632 .kr(8)
22633 .sr(1)
22634 .m(m)
22635 .n(n)
22636 .k(k)
22637 .cm_stride(7)
22638 .iterations(1)
22639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22640 }
22641 }
22642 }
22643 }
22644
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)22645 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
22646 TEST_REQUIRES_X86_SSE2;
22647 for (size_t k = 1; k <= 40; k += 9) {
22648 GemmMicrokernelTester()
22649 .mr(2)
22650 .nr(4)
22651 .kr(8)
22652 .sr(1)
22653 .m(2)
22654 .n(4)
22655 .k(k)
22656 .ks(3)
22657 .a_offset(83)
22658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22659 }
22660 }
22661
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)22662 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
22663 TEST_REQUIRES_X86_SSE2;
22664 for (size_t k = 1; k <= 40; k += 9) {
22665 for (uint32_t mz = 0; mz < 2; mz++) {
22666 GemmMicrokernelTester()
22667 .mr(2)
22668 .nr(4)
22669 .kr(8)
22670 .sr(1)
22671 .m(2)
22672 .n(4)
22673 .k(k)
22674 .ks(3)
22675 .a_offset(83)
22676 .zero_index(mz)
22677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22678 }
22679 }
22680 }
22681
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)22682 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
22683 TEST_REQUIRES_X86_SSE2;
22684 GemmMicrokernelTester()
22685 .mr(2)
22686 .nr(4)
22687 .kr(8)
22688 .sr(1)
22689 .m(2)
22690 .n(4)
22691 .k(8)
22692 .qmin(128)
22693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22694 }
22695
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)22696 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
22697 TEST_REQUIRES_X86_SSE2;
22698 GemmMicrokernelTester()
22699 .mr(2)
22700 .nr(4)
22701 .kr(8)
22702 .sr(1)
22703 .m(2)
22704 .n(4)
22705 .k(8)
22706 .qmax(128)
22707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22708 }
22709
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)22710 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
22711 TEST_REQUIRES_X86_SSE2;
22712 GemmMicrokernelTester()
22713 .mr(2)
22714 .nr(4)
22715 .kr(8)
22716 .sr(1)
22717 .m(2)
22718 .n(4)
22719 .k(8)
22720 .cm_stride(7)
22721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22722 }
22723
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_a_zero_point)22724 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_a_zero_point) {
22725 TEST_REQUIRES_X86_SSE2;
22726 for (size_t k = 1; k <= 40; k += 9) {
22727 GemmMicrokernelTester()
22728 .mr(2)
22729 .nr(4)
22730 .kr(8)
22731 .sr(1)
22732 .m(2)
22733 .n(4)
22734 .k(k)
22735 .a_zero_point(0)
22736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22737 }
22738 }
22739
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_b_zero_point)22740 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_b_zero_point) {
22741 TEST_REQUIRES_X86_SSE2;
22742 for (size_t k = 1; k <= 40; k += 9) {
22743 GemmMicrokernelTester()
22744 .mr(2)
22745 .nr(4)
22746 .kr(8)
22747 .sr(1)
22748 .m(2)
22749 .n(4)
22750 .k(k)
22751 .b_zero_point(0)
22752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22753 }
22754 }
22755
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_zero_point)22756 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_zero_point) {
22757 TEST_REQUIRES_X86_SSE2;
22758 for (size_t k = 1; k <= 40; k += 9) {
22759 GemmMicrokernelTester()
22760 .mr(2)
22761 .nr(4)
22762 .kr(8)
22763 .sr(1)
22764 .m(2)
22765 .n(4)
22766 .k(k)
22767 .a_zero_point(0)
22768 .b_zero_point(0)
22769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22770 }
22771 }
22772 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22773
22774
22775 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)22776 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
22777 TEST_REQUIRES_X86_SSE41;
22778 GemmMicrokernelTester()
22779 .mr(3)
22780 .nr(4)
22781 .kr(8)
22782 .sr(1)
22783 .m(3)
22784 .n(4)
22785 .k(8)
22786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22787 }
22788
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)22789 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
22790 TEST_REQUIRES_X86_SSE41;
22791 GemmMicrokernelTester()
22792 .mr(3)
22793 .nr(4)
22794 .kr(8)
22795 .sr(1)
22796 .m(3)
22797 .n(4)
22798 .k(8)
22799 .cn_stride(7)
22800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22801 }
22802
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)22803 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
22804 TEST_REQUIRES_X86_SSE41;
22805 for (uint32_t n = 1; n <= 4; n++) {
22806 for (uint32_t m = 1; m <= 3; m++) {
22807 GemmMicrokernelTester()
22808 .mr(3)
22809 .nr(4)
22810 .kr(8)
22811 .sr(1)
22812 .m(m)
22813 .n(n)
22814 .k(8)
22815 .iterations(1)
22816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22817 }
22818 }
22819 }
22820
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)22821 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
22822 TEST_REQUIRES_X86_SSE41;
22823 for (uint32_t m = 1; m <= 3; m++) {
22824 GemmMicrokernelTester()
22825 .mr(3)
22826 .nr(4)
22827 .kr(8)
22828 .sr(1)
22829 .m(m)
22830 .n(4)
22831 .k(8)
22832 .iterations(1)
22833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22834 }
22835 }
22836
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)22837 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
22838 TEST_REQUIRES_X86_SSE41;
22839 for (uint32_t n = 1; n <= 4; n++) {
22840 GemmMicrokernelTester()
22841 .mr(3)
22842 .nr(4)
22843 .kr(8)
22844 .sr(1)
22845 .m(3)
22846 .n(n)
22847 .k(8)
22848 .iterations(1)
22849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22850 }
22851 }
22852
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)22853 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
22854 TEST_REQUIRES_X86_SSE41;
22855 for (size_t k = 1; k < 8; k++) {
22856 GemmMicrokernelTester()
22857 .mr(3)
22858 .nr(4)
22859 .kr(8)
22860 .sr(1)
22861 .m(3)
22862 .n(4)
22863 .k(k)
22864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22865 }
22866 }
22867
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)22868 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
22869 TEST_REQUIRES_X86_SSE41;
22870 for (size_t k = 1; k < 8; k++) {
22871 for (uint32_t n = 1; n <= 4; n++) {
22872 for (uint32_t m = 1; m <= 3; m++) {
22873 GemmMicrokernelTester()
22874 .mr(3)
22875 .nr(4)
22876 .kr(8)
22877 .sr(1)
22878 .m(m)
22879 .n(n)
22880 .k(k)
22881 .iterations(1)
22882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22883 }
22884 }
22885 }
22886 }
22887
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)22888 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
22889 TEST_REQUIRES_X86_SSE41;
22890 for (size_t k = 9; k < 16; k++) {
22891 GemmMicrokernelTester()
22892 .mr(3)
22893 .nr(4)
22894 .kr(8)
22895 .sr(1)
22896 .m(3)
22897 .n(4)
22898 .k(k)
22899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22900 }
22901 }
22902
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)22903 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
22904 TEST_REQUIRES_X86_SSE41;
22905 for (size_t k = 9; k < 16; k++) {
22906 for (uint32_t n = 1; n <= 4; n++) {
22907 for (uint32_t m = 1; m <= 3; m++) {
22908 GemmMicrokernelTester()
22909 .mr(3)
22910 .nr(4)
22911 .kr(8)
22912 .sr(1)
22913 .m(m)
22914 .n(n)
22915 .k(k)
22916 .iterations(1)
22917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22918 }
22919 }
22920 }
22921 }
22922
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)22923 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
22924 TEST_REQUIRES_X86_SSE41;
22925 for (size_t k = 16; k <= 80; k += 8) {
22926 GemmMicrokernelTester()
22927 .mr(3)
22928 .nr(4)
22929 .kr(8)
22930 .sr(1)
22931 .m(3)
22932 .n(4)
22933 .k(k)
22934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22935 }
22936 }
22937
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)22938 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
22939 TEST_REQUIRES_X86_SSE41;
22940 for (size_t k = 16; k <= 80; k += 8) {
22941 for (uint32_t n = 1; n <= 4; n++) {
22942 for (uint32_t m = 1; m <= 3; m++) {
22943 GemmMicrokernelTester()
22944 .mr(3)
22945 .nr(4)
22946 .kr(8)
22947 .sr(1)
22948 .m(m)
22949 .n(n)
22950 .k(k)
22951 .iterations(1)
22952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22953 }
22954 }
22955 }
22956 }
22957
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)22958 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
22959 TEST_REQUIRES_X86_SSE41;
22960 for (uint32_t n = 5; n < 8; n++) {
22961 for (size_t k = 1; k <= 40; k += 9) {
22962 GemmMicrokernelTester()
22963 .mr(3)
22964 .nr(4)
22965 .kr(8)
22966 .sr(1)
22967 .m(3)
22968 .n(n)
22969 .k(k)
22970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22971 }
22972 }
22973 }
22974
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)22975 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
22976 TEST_REQUIRES_X86_SSE41;
22977 for (uint32_t n = 5; n < 8; n++) {
22978 for (size_t k = 1; k <= 40; k += 9) {
22979 GemmMicrokernelTester()
22980 .mr(3)
22981 .nr(4)
22982 .kr(8)
22983 .sr(1)
22984 .m(3)
22985 .n(n)
22986 .k(k)
22987 .cn_stride(7)
22988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22989 }
22990 }
22991 }
22992
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)22993 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
22994 TEST_REQUIRES_X86_SSE41;
22995 for (uint32_t n = 5; n < 8; n++) {
22996 for (size_t k = 1; k <= 40; k += 9) {
22997 for (uint32_t m = 1; m <= 3; m++) {
22998 GemmMicrokernelTester()
22999 .mr(3)
23000 .nr(4)
23001 .kr(8)
23002 .sr(1)
23003 .m(m)
23004 .n(n)
23005 .k(k)
23006 .iterations(1)
23007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23008 }
23009 }
23010 }
23011 }
23012
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)23013 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
23014 TEST_REQUIRES_X86_SSE41;
23015 for (uint32_t n = 8; n <= 12; n += 4) {
23016 for (size_t k = 1; k <= 40; k += 9) {
23017 GemmMicrokernelTester()
23018 .mr(3)
23019 .nr(4)
23020 .kr(8)
23021 .sr(1)
23022 .m(3)
23023 .n(n)
23024 .k(k)
23025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23026 }
23027 }
23028 }
23029
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)23030 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
23031 TEST_REQUIRES_X86_SSE41;
23032 for (uint32_t n = 8; n <= 12; n += 4) {
23033 for (size_t k = 1; k <= 40; k += 9) {
23034 GemmMicrokernelTester()
23035 .mr(3)
23036 .nr(4)
23037 .kr(8)
23038 .sr(1)
23039 .m(3)
23040 .n(n)
23041 .k(k)
23042 .cn_stride(7)
23043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23044 }
23045 }
23046 }
23047
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)23048 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
23049 TEST_REQUIRES_X86_SSE41;
23050 for (uint32_t n = 8; n <= 12; n += 4) {
23051 for (size_t k = 1; k <= 40; k += 9) {
23052 for (uint32_t m = 1; m <= 3; m++) {
23053 GemmMicrokernelTester()
23054 .mr(3)
23055 .nr(4)
23056 .kr(8)
23057 .sr(1)
23058 .m(m)
23059 .n(n)
23060 .k(k)
23061 .iterations(1)
23062 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23063 }
23064 }
23065 }
23066 }
23067
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)23068 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
23069 TEST_REQUIRES_X86_SSE41;
23070 for (size_t k = 1; k <= 40; k += 9) {
23071 GemmMicrokernelTester()
23072 .mr(3)
23073 .nr(4)
23074 .kr(8)
23075 .sr(1)
23076 .m(3)
23077 .n(4)
23078 .k(k)
23079 .ks(3)
23080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23081 }
23082 }
23083
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)23084 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
23085 TEST_REQUIRES_X86_SSE41;
23086 for (size_t k = 1; k <= 40; k += 9) {
23087 for (uint32_t n = 1; n <= 4; n++) {
23088 for (uint32_t m = 1; m <= 3; m++) {
23089 GemmMicrokernelTester()
23090 .mr(3)
23091 .nr(4)
23092 .kr(8)
23093 .sr(1)
23094 .m(m)
23095 .n(n)
23096 .k(k)
23097 .ks(3)
23098 .iterations(1)
23099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23100 }
23101 }
23102 }
23103 }
23104
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)23105 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
23106 TEST_REQUIRES_X86_SSE41;
23107 for (uint32_t n = 5; n < 8; n++) {
23108 for (size_t k = 1; k <= 40; k += 9) {
23109 GemmMicrokernelTester()
23110 .mr(3)
23111 .nr(4)
23112 .kr(8)
23113 .sr(1)
23114 .m(3)
23115 .n(n)
23116 .k(k)
23117 .ks(3)
23118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23119 }
23120 }
23121 }
23122
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)23123 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
23124 TEST_REQUIRES_X86_SSE41;
23125 for (uint32_t n = 8; n <= 12; n += 4) {
23126 for (size_t k = 1; k <= 40; k += 9) {
23127 GemmMicrokernelTester()
23128 .mr(3)
23129 .nr(4)
23130 .kr(8)
23131 .sr(1)
23132 .m(3)
23133 .n(n)
23134 .k(k)
23135 .ks(3)
23136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23137 }
23138 }
23139 }
23140
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)23141 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
23142 TEST_REQUIRES_X86_SSE41;
23143 for (size_t k = 1; k <= 40; k += 9) {
23144 for (uint32_t n = 1; n <= 4; n++) {
23145 for (uint32_t m = 1; m <= 3; m++) {
23146 GemmMicrokernelTester()
23147 .mr(3)
23148 .nr(4)
23149 .kr(8)
23150 .sr(1)
23151 .m(m)
23152 .n(n)
23153 .k(k)
23154 .cm_stride(7)
23155 .iterations(1)
23156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23157 }
23158 }
23159 }
23160 }
23161
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)23162 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
23163 TEST_REQUIRES_X86_SSE41;
23164 for (size_t k = 1; k <= 40; k += 9) {
23165 GemmMicrokernelTester()
23166 .mr(3)
23167 .nr(4)
23168 .kr(8)
23169 .sr(1)
23170 .m(3)
23171 .n(4)
23172 .k(k)
23173 .ks(3)
23174 .a_offset(127)
23175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23176 }
23177 }
23178
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)23179 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
23180 TEST_REQUIRES_X86_SSE41;
23181 for (size_t k = 1; k <= 40; k += 9) {
23182 for (uint32_t mz = 0; mz < 3; mz++) {
23183 GemmMicrokernelTester()
23184 .mr(3)
23185 .nr(4)
23186 .kr(8)
23187 .sr(1)
23188 .m(3)
23189 .n(4)
23190 .k(k)
23191 .ks(3)
23192 .a_offset(127)
23193 .zero_index(mz)
23194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23195 }
23196 }
23197 }
23198
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)23199 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
23200 TEST_REQUIRES_X86_SSE41;
23201 GemmMicrokernelTester()
23202 .mr(3)
23203 .nr(4)
23204 .kr(8)
23205 .sr(1)
23206 .m(3)
23207 .n(4)
23208 .k(8)
23209 .qmin(128)
23210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23211 }
23212
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)23213 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
23214 TEST_REQUIRES_X86_SSE41;
23215 GemmMicrokernelTester()
23216 .mr(3)
23217 .nr(4)
23218 .kr(8)
23219 .sr(1)
23220 .m(3)
23221 .n(4)
23222 .k(8)
23223 .qmax(128)
23224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23225 }
23226
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)23227 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
23228 TEST_REQUIRES_X86_SSE41;
23229 GemmMicrokernelTester()
23230 .mr(3)
23231 .nr(4)
23232 .kr(8)
23233 .sr(1)
23234 .m(3)
23235 .n(4)
23236 .k(8)
23237 .cm_stride(7)
23238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23239 }
23240
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_a_zero_point)23241 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_a_zero_point) {
23242 TEST_REQUIRES_X86_SSE41;
23243 for (size_t k = 1; k <= 40; k += 9) {
23244 GemmMicrokernelTester()
23245 .mr(3)
23246 .nr(4)
23247 .kr(8)
23248 .sr(1)
23249 .m(3)
23250 .n(4)
23251 .k(k)
23252 .a_zero_point(0)
23253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23254 }
23255 }
23256
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_b_zero_point)23257 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_b_zero_point) {
23258 TEST_REQUIRES_X86_SSE41;
23259 for (size_t k = 1; k <= 40; k += 9) {
23260 GemmMicrokernelTester()
23261 .mr(3)
23262 .nr(4)
23263 .kr(8)
23264 .sr(1)
23265 .m(3)
23266 .n(4)
23267 .k(k)
23268 .b_zero_point(0)
23269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23270 }
23271 }
23272
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_zero_point)23273 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_zero_point) {
23274 TEST_REQUIRES_X86_SSE41;
23275 for (size_t k = 1; k <= 40; k += 9) {
23276 GemmMicrokernelTester()
23277 .mr(3)
23278 .nr(4)
23279 .kr(8)
23280 .sr(1)
23281 .m(3)
23282 .n(4)
23283 .k(k)
23284 .a_zero_point(0)
23285 .b_zero_point(0)
23286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23287 }
23288 }
23289 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23290
23291
23292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8)23293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
23294 TEST_REQUIRES_X86_XOP;
23295 GemmMicrokernelTester()
23296 .mr(1)
23297 .nr(4)
23298 .kr(8)
23299 .sr(1)
23300 .m(1)
23301 .n(4)
23302 .k(8)
23303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23304 }
23305
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cn)23306 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
23307 TEST_REQUIRES_X86_XOP;
23308 GemmMicrokernelTester()
23309 .mr(1)
23310 .nr(4)
23311 .kr(8)
23312 .sr(1)
23313 .m(1)
23314 .n(4)
23315 .k(8)
23316 .cn_stride(7)
23317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23318 }
23319
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile)23320 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
23321 TEST_REQUIRES_X86_XOP;
23322 for (uint32_t n = 1; n <= 4; n++) {
23323 for (uint32_t m = 1; m <= 1; m++) {
23324 GemmMicrokernelTester()
23325 .mr(1)
23326 .nr(4)
23327 .kr(8)
23328 .sr(1)
23329 .m(m)
23330 .n(n)
23331 .k(8)
23332 .iterations(1)
23333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23334 }
23335 }
23336 }
23337
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_m)23338 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
23339 TEST_REQUIRES_X86_XOP;
23340 for (uint32_t m = 1; m <= 1; m++) {
23341 GemmMicrokernelTester()
23342 .mr(1)
23343 .nr(4)
23344 .kr(8)
23345 .sr(1)
23346 .m(m)
23347 .n(4)
23348 .k(8)
23349 .iterations(1)
23350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23351 }
23352 }
23353
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_n)23354 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
23355 TEST_REQUIRES_X86_XOP;
23356 for (uint32_t n = 1; n <= 4; n++) {
23357 GemmMicrokernelTester()
23358 .mr(1)
23359 .nr(4)
23360 .kr(8)
23361 .sr(1)
23362 .m(1)
23363 .n(n)
23364 .k(8)
23365 .iterations(1)
23366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23367 }
23368 }
23369
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8)23370 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
23371 TEST_REQUIRES_X86_XOP;
23372 for (size_t k = 1; k < 8; k++) {
23373 GemmMicrokernelTester()
23374 .mr(1)
23375 .nr(4)
23376 .kr(8)
23377 .sr(1)
23378 .m(1)
23379 .n(4)
23380 .k(k)
23381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23382 }
23383 }
23384
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_subtile)23385 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
23386 TEST_REQUIRES_X86_XOP;
23387 for (size_t k = 1; k < 8; k++) {
23388 for (uint32_t n = 1; n <= 4; n++) {
23389 for (uint32_t m = 1; m <= 1; m++) {
23390 GemmMicrokernelTester()
23391 .mr(1)
23392 .nr(4)
23393 .kr(8)
23394 .sr(1)
23395 .m(m)
23396 .n(n)
23397 .k(k)
23398 .iterations(1)
23399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23400 }
23401 }
23402 }
23403 }
23404
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8)23405 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
23406 TEST_REQUIRES_X86_XOP;
23407 for (size_t k = 9; k < 16; k++) {
23408 GemmMicrokernelTester()
23409 .mr(1)
23410 .nr(4)
23411 .kr(8)
23412 .sr(1)
23413 .m(1)
23414 .n(4)
23415 .k(k)
23416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23417 }
23418 }
23419
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_subtile)23420 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
23421 TEST_REQUIRES_X86_XOP;
23422 for (size_t k = 9; k < 16; k++) {
23423 for (uint32_t n = 1; n <= 4; n++) {
23424 for (uint32_t m = 1; m <= 1; m++) {
23425 GemmMicrokernelTester()
23426 .mr(1)
23427 .nr(4)
23428 .kr(8)
23429 .sr(1)
23430 .m(m)
23431 .n(n)
23432 .k(k)
23433 .iterations(1)
23434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23435 }
23436 }
23437 }
23438 }
23439
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8)23440 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
23441 TEST_REQUIRES_X86_XOP;
23442 for (size_t k = 16; k <= 80; k += 8) {
23443 GemmMicrokernelTester()
23444 .mr(1)
23445 .nr(4)
23446 .kr(8)
23447 .sr(1)
23448 .m(1)
23449 .n(4)
23450 .k(k)
23451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23452 }
23453 }
23454
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_subtile)23455 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
23456 TEST_REQUIRES_X86_XOP;
23457 for (size_t k = 16; k <= 80; k += 8) {
23458 for (uint32_t n = 1; n <= 4; n++) {
23459 for (uint32_t m = 1; m <= 1; m++) {
23460 GemmMicrokernelTester()
23461 .mr(1)
23462 .nr(4)
23463 .kr(8)
23464 .sr(1)
23465 .m(m)
23466 .n(n)
23467 .k(k)
23468 .iterations(1)
23469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23470 }
23471 }
23472 }
23473 }
23474
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4)23475 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
23476 TEST_REQUIRES_X86_XOP;
23477 for (uint32_t n = 5; n < 8; n++) {
23478 for (size_t k = 1; k <= 40; k += 9) {
23479 GemmMicrokernelTester()
23480 .mr(1)
23481 .nr(4)
23482 .kr(8)
23483 .sr(1)
23484 .m(1)
23485 .n(n)
23486 .k(k)
23487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23488 }
23489 }
23490 }
23491
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_cn)23492 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
23493 TEST_REQUIRES_X86_XOP;
23494 for (uint32_t n = 5; n < 8; n++) {
23495 for (size_t k = 1; k <= 40; k += 9) {
23496 GemmMicrokernelTester()
23497 .mr(1)
23498 .nr(4)
23499 .kr(8)
23500 .sr(1)
23501 .m(1)
23502 .n(n)
23503 .k(k)
23504 .cn_stride(7)
23505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23506 }
23507 }
23508 }
23509
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_subtile)23510 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
23511 TEST_REQUIRES_X86_XOP;
23512 for (uint32_t n = 5; n < 8; n++) {
23513 for (size_t k = 1; k <= 40; k += 9) {
23514 for (uint32_t m = 1; m <= 1; m++) {
23515 GemmMicrokernelTester()
23516 .mr(1)
23517 .nr(4)
23518 .kr(8)
23519 .sr(1)
23520 .m(m)
23521 .n(n)
23522 .k(k)
23523 .iterations(1)
23524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23525 }
23526 }
23527 }
23528 }
23529
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4)23530 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
23531 TEST_REQUIRES_X86_XOP;
23532 for (uint32_t n = 8; n <= 12; n += 4) {
23533 for (size_t k = 1; k <= 40; k += 9) {
23534 GemmMicrokernelTester()
23535 .mr(1)
23536 .nr(4)
23537 .kr(8)
23538 .sr(1)
23539 .m(1)
23540 .n(n)
23541 .k(k)
23542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23543 }
23544 }
23545 }
23546
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_cn)23547 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
23548 TEST_REQUIRES_X86_XOP;
23549 for (uint32_t n = 8; n <= 12; n += 4) {
23550 for (size_t k = 1; k <= 40; k += 9) {
23551 GemmMicrokernelTester()
23552 .mr(1)
23553 .nr(4)
23554 .kr(8)
23555 .sr(1)
23556 .m(1)
23557 .n(n)
23558 .k(k)
23559 .cn_stride(7)
23560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23561 }
23562 }
23563 }
23564
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_subtile)23565 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
23566 TEST_REQUIRES_X86_XOP;
23567 for (uint32_t n = 8; n <= 12; n += 4) {
23568 for (size_t k = 1; k <= 40; k += 9) {
23569 for (uint32_t m = 1; m <= 1; m++) {
23570 GemmMicrokernelTester()
23571 .mr(1)
23572 .nr(4)
23573 .kr(8)
23574 .sr(1)
23575 .m(m)
23576 .n(n)
23577 .k(k)
23578 .iterations(1)
23579 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23580 }
23581 }
23582 }
23583 }
23584
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel)23585 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel) {
23586 TEST_REQUIRES_X86_XOP;
23587 for (size_t k = 1; k <= 40; k += 9) {
23588 GemmMicrokernelTester()
23589 .mr(1)
23590 .nr(4)
23591 .kr(8)
23592 .sr(1)
23593 .m(1)
23594 .n(4)
23595 .k(k)
23596 .ks(3)
23597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23598 }
23599 }
23600
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel_subtile)23601 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel_subtile) {
23602 TEST_REQUIRES_X86_XOP;
23603 for (size_t k = 1; k <= 40; k += 9) {
23604 for (uint32_t n = 1; n <= 4; n++) {
23605 for (uint32_t m = 1; m <= 1; m++) {
23606 GemmMicrokernelTester()
23607 .mr(1)
23608 .nr(4)
23609 .kr(8)
23610 .sr(1)
23611 .m(m)
23612 .n(n)
23613 .k(k)
23614 .ks(3)
23615 .iterations(1)
23616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23617 }
23618 }
23619 }
23620 }
23621
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_small_kernel)23622 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
23623 TEST_REQUIRES_X86_XOP;
23624 for (uint32_t n = 5; n < 8; n++) {
23625 for (size_t k = 1; k <= 40; k += 9) {
23626 GemmMicrokernelTester()
23627 .mr(1)
23628 .nr(4)
23629 .kr(8)
23630 .sr(1)
23631 .m(1)
23632 .n(n)
23633 .k(k)
23634 .ks(3)
23635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23636 }
23637 }
23638 }
23639
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_small_kernel)23640 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_small_kernel) {
23641 TEST_REQUIRES_X86_XOP;
23642 for (uint32_t n = 8; n <= 12; n += 4) {
23643 for (size_t k = 1; k <= 40; k += 9) {
23644 GemmMicrokernelTester()
23645 .mr(1)
23646 .nr(4)
23647 .kr(8)
23648 .sr(1)
23649 .m(1)
23650 .n(n)
23651 .k(k)
23652 .ks(3)
23653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23654 }
23655 }
23656 }
23657
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm_subtile)23658 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
23659 TEST_REQUIRES_X86_XOP;
23660 for (size_t k = 1; k <= 40; k += 9) {
23661 for (uint32_t n = 1; n <= 4; n++) {
23662 for (uint32_t m = 1; m <= 1; m++) {
23663 GemmMicrokernelTester()
23664 .mr(1)
23665 .nr(4)
23666 .kr(8)
23667 .sr(1)
23668 .m(m)
23669 .n(n)
23670 .k(k)
23671 .cm_stride(7)
23672 .iterations(1)
23673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23674 }
23675 }
23676 }
23677 }
23678
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,a_offset)23679 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, a_offset) {
23680 TEST_REQUIRES_X86_XOP;
23681 for (size_t k = 1; k <= 40; k += 9) {
23682 GemmMicrokernelTester()
23683 .mr(1)
23684 .nr(4)
23685 .kr(8)
23686 .sr(1)
23687 .m(1)
23688 .n(4)
23689 .k(k)
23690 .ks(3)
23691 .a_offset(43)
23692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23693 }
23694 }
23695
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,zero)23696 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, zero) {
23697 TEST_REQUIRES_X86_XOP;
23698 for (size_t k = 1; k <= 40; k += 9) {
23699 for (uint32_t mz = 0; mz < 1; mz++) {
23700 GemmMicrokernelTester()
23701 .mr(1)
23702 .nr(4)
23703 .kr(8)
23704 .sr(1)
23705 .m(1)
23706 .n(4)
23707 .k(k)
23708 .ks(3)
23709 .a_offset(43)
23710 .zero_index(mz)
23711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23712 }
23713 }
23714 }
23715
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmin)23716 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
23717 TEST_REQUIRES_X86_XOP;
23718 GemmMicrokernelTester()
23719 .mr(1)
23720 .nr(4)
23721 .kr(8)
23722 .sr(1)
23723 .m(1)
23724 .n(4)
23725 .k(8)
23726 .qmin(128)
23727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23728 }
23729
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmax)23730 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
23731 TEST_REQUIRES_X86_XOP;
23732 GemmMicrokernelTester()
23733 .mr(1)
23734 .nr(4)
23735 .kr(8)
23736 .sr(1)
23737 .m(1)
23738 .n(4)
23739 .k(8)
23740 .qmax(128)
23741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23742 }
23743
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm)23744 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
23745 TEST_REQUIRES_X86_XOP;
23746 GemmMicrokernelTester()
23747 .mr(1)
23748 .nr(4)
23749 .kr(8)
23750 .sr(1)
23751 .m(1)
23752 .n(4)
23753 .k(8)
23754 .cm_stride(7)
23755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23756 }
23757
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_a_zero_point)23758 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_a_zero_point) {
23759 TEST_REQUIRES_X86_XOP;
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(1)
23763 .nr(4)
23764 .kr(8)
23765 .sr(1)
23766 .m(1)
23767 .n(4)
23768 .k(k)
23769 .a_zero_point(0)
23770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23771 }
23772 }
23773
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_b_zero_point)23774 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_b_zero_point) {
23775 TEST_REQUIRES_X86_XOP;
23776 for (size_t k = 1; k <= 40; k += 9) {
23777 GemmMicrokernelTester()
23778 .mr(1)
23779 .nr(4)
23780 .kr(8)
23781 .sr(1)
23782 .m(1)
23783 .n(4)
23784 .k(k)
23785 .b_zero_point(0)
23786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23787 }
23788 }
23789
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_zero_point)23790 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_zero_point) {
23791 TEST_REQUIRES_X86_XOP;
23792 for (size_t k = 1; k <= 40; k += 9) {
23793 GemmMicrokernelTester()
23794 .mr(1)
23795 .nr(4)
23796 .kr(8)
23797 .sr(1)
23798 .m(1)
23799 .n(4)
23800 .k(k)
23801 .a_zero_point(0)
23802 .b_zero_point(0)
23803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23804 }
23805 }
23806 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23807
23808
23809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8)23810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
23811 TEST_REQUIRES_X86_XOP;
23812 GemmMicrokernelTester()
23813 .mr(2)
23814 .nr(4)
23815 .kr(8)
23816 .sr(1)
23817 .m(2)
23818 .n(4)
23819 .k(8)
23820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23821 }
23822
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cn)23823 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
23824 TEST_REQUIRES_X86_XOP;
23825 GemmMicrokernelTester()
23826 .mr(2)
23827 .nr(4)
23828 .kr(8)
23829 .sr(1)
23830 .m(2)
23831 .n(4)
23832 .k(8)
23833 .cn_stride(7)
23834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23835 }
23836
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile)23837 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
23838 TEST_REQUIRES_X86_XOP;
23839 for (uint32_t n = 1; n <= 4; n++) {
23840 for (uint32_t m = 1; m <= 2; m++) {
23841 GemmMicrokernelTester()
23842 .mr(2)
23843 .nr(4)
23844 .kr(8)
23845 .sr(1)
23846 .m(m)
23847 .n(n)
23848 .k(8)
23849 .iterations(1)
23850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23851 }
23852 }
23853 }
23854
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_m)23855 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
23856 TEST_REQUIRES_X86_XOP;
23857 for (uint32_t m = 1; m <= 2; m++) {
23858 GemmMicrokernelTester()
23859 .mr(2)
23860 .nr(4)
23861 .kr(8)
23862 .sr(1)
23863 .m(m)
23864 .n(4)
23865 .k(8)
23866 .iterations(1)
23867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23868 }
23869 }
23870
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_n)23871 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
23872 TEST_REQUIRES_X86_XOP;
23873 for (uint32_t n = 1; n <= 4; n++) {
23874 GemmMicrokernelTester()
23875 .mr(2)
23876 .nr(4)
23877 .kr(8)
23878 .sr(1)
23879 .m(2)
23880 .n(n)
23881 .k(8)
23882 .iterations(1)
23883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23884 }
23885 }
23886
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8)23887 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
23888 TEST_REQUIRES_X86_XOP;
23889 for (size_t k = 1; k < 8; k++) {
23890 GemmMicrokernelTester()
23891 .mr(2)
23892 .nr(4)
23893 .kr(8)
23894 .sr(1)
23895 .m(2)
23896 .n(4)
23897 .k(k)
23898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23899 }
23900 }
23901
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_subtile)23902 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
23903 TEST_REQUIRES_X86_XOP;
23904 for (size_t k = 1; k < 8; k++) {
23905 for (uint32_t n = 1; n <= 4; n++) {
23906 for (uint32_t m = 1; m <= 2; m++) {
23907 GemmMicrokernelTester()
23908 .mr(2)
23909 .nr(4)
23910 .kr(8)
23911 .sr(1)
23912 .m(m)
23913 .n(n)
23914 .k(k)
23915 .iterations(1)
23916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23917 }
23918 }
23919 }
23920 }
23921
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8)23922 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
23923 TEST_REQUIRES_X86_XOP;
23924 for (size_t k = 9; k < 16; k++) {
23925 GemmMicrokernelTester()
23926 .mr(2)
23927 .nr(4)
23928 .kr(8)
23929 .sr(1)
23930 .m(2)
23931 .n(4)
23932 .k(k)
23933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23934 }
23935 }
23936
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_subtile)23937 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
23938 TEST_REQUIRES_X86_XOP;
23939 for (size_t k = 9; k < 16; k++) {
23940 for (uint32_t n = 1; n <= 4; n++) {
23941 for (uint32_t m = 1; m <= 2; m++) {
23942 GemmMicrokernelTester()
23943 .mr(2)
23944 .nr(4)
23945 .kr(8)
23946 .sr(1)
23947 .m(m)
23948 .n(n)
23949 .k(k)
23950 .iterations(1)
23951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23952 }
23953 }
23954 }
23955 }
23956
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8)23957 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
23958 TEST_REQUIRES_X86_XOP;
23959 for (size_t k = 16; k <= 80; k += 8) {
23960 GemmMicrokernelTester()
23961 .mr(2)
23962 .nr(4)
23963 .kr(8)
23964 .sr(1)
23965 .m(2)
23966 .n(4)
23967 .k(k)
23968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23969 }
23970 }
23971
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_subtile)23972 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
23973 TEST_REQUIRES_X86_XOP;
23974 for (size_t k = 16; k <= 80; k += 8) {
23975 for (uint32_t n = 1; n <= 4; n++) {
23976 for (uint32_t m = 1; m <= 2; m++) {
23977 GemmMicrokernelTester()
23978 .mr(2)
23979 .nr(4)
23980 .kr(8)
23981 .sr(1)
23982 .m(m)
23983 .n(n)
23984 .k(k)
23985 .iterations(1)
23986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23987 }
23988 }
23989 }
23990 }
23991
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4)23992 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
23993 TEST_REQUIRES_X86_XOP;
23994 for (uint32_t n = 5; n < 8; n++) {
23995 for (size_t k = 1; k <= 40; k += 9) {
23996 GemmMicrokernelTester()
23997 .mr(2)
23998 .nr(4)
23999 .kr(8)
24000 .sr(1)
24001 .m(2)
24002 .n(n)
24003 .k(k)
24004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24005 }
24006 }
24007 }
24008
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_cn)24009 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
24010 TEST_REQUIRES_X86_XOP;
24011 for (uint32_t n = 5; n < 8; n++) {
24012 for (size_t k = 1; k <= 40; k += 9) {
24013 GemmMicrokernelTester()
24014 .mr(2)
24015 .nr(4)
24016 .kr(8)
24017 .sr(1)
24018 .m(2)
24019 .n(n)
24020 .k(k)
24021 .cn_stride(7)
24022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24023 }
24024 }
24025 }
24026
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_subtile)24027 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
24028 TEST_REQUIRES_X86_XOP;
24029 for (uint32_t n = 5; n < 8; n++) {
24030 for (size_t k = 1; k <= 40; k += 9) {
24031 for (uint32_t m = 1; m <= 2; m++) {
24032 GemmMicrokernelTester()
24033 .mr(2)
24034 .nr(4)
24035 .kr(8)
24036 .sr(1)
24037 .m(m)
24038 .n(n)
24039 .k(k)
24040 .iterations(1)
24041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24042 }
24043 }
24044 }
24045 }
24046
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4)24047 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
24048 TEST_REQUIRES_X86_XOP;
24049 for (uint32_t n = 8; n <= 12; n += 4) {
24050 for (size_t k = 1; k <= 40; k += 9) {
24051 GemmMicrokernelTester()
24052 .mr(2)
24053 .nr(4)
24054 .kr(8)
24055 .sr(1)
24056 .m(2)
24057 .n(n)
24058 .k(k)
24059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24060 }
24061 }
24062 }
24063
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_cn)24064 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
24065 TEST_REQUIRES_X86_XOP;
24066 for (uint32_t n = 8; n <= 12; n += 4) {
24067 for (size_t k = 1; k <= 40; k += 9) {
24068 GemmMicrokernelTester()
24069 .mr(2)
24070 .nr(4)
24071 .kr(8)
24072 .sr(1)
24073 .m(2)
24074 .n(n)
24075 .k(k)
24076 .cn_stride(7)
24077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24078 }
24079 }
24080 }
24081
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_subtile)24082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
24083 TEST_REQUIRES_X86_XOP;
24084 for (uint32_t n = 8; n <= 12; n += 4) {
24085 for (size_t k = 1; k <= 40; k += 9) {
24086 for (uint32_t m = 1; m <= 2; m++) {
24087 GemmMicrokernelTester()
24088 .mr(2)
24089 .nr(4)
24090 .kr(8)
24091 .sr(1)
24092 .m(m)
24093 .n(n)
24094 .k(k)
24095 .iterations(1)
24096 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24097 }
24098 }
24099 }
24100 }
24101
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel)24102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel) {
24103 TEST_REQUIRES_X86_XOP;
24104 for (size_t k = 1; k <= 40; k += 9) {
24105 GemmMicrokernelTester()
24106 .mr(2)
24107 .nr(4)
24108 .kr(8)
24109 .sr(1)
24110 .m(2)
24111 .n(4)
24112 .k(k)
24113 .ks(3)
24114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24115 }
24116 }
24117
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel_subtile)24118 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel_subtile) {
24119 TEST_REQUIRES_X86_XOP;
24120 for (size_t k = 1; k <= 40; k += 9) {
24121 for (uint32_t n = 1; n <= 4; n++) {
24122 for (uint32_t m = 1; m <= 2; m++) {
24123 GemmMicrokernelTester()
24124 .mr(2)
24125 .nr(4)
24126 .kr(8)
24127 .sr(1)
24128 .m(m)
24129 .n(n)
24130 .k(k)
24131 .ks(3)
24132 .iterations(1)
24133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24134 }
24135 }
24136 }
24137 }
24138
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_small_kernel)24139 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
24140 TEST_REQUIRES_X86_XOP;
24141 for (uint32_t n = 5; n < 8; n++) {
24142 for (size_t k = 1; k <= 40; k += 9) {
24143 GemmMicrokernelTester()
24144 .mr(2)
24145 .nr(4)
24146 .kr(8)
24147 .sr(1)
24148 .m(2)
24149 .n(n)
24150 .k(k)
24151 .ks(3)
24152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24153 }
24154 }
24155 }
24156
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_small_kernel)24157 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_small_kernel) {
24158 TEST_REQUIRES_X86_XOP;
24159 for (uint32_t n = 8; n <= 12; n += 4) {
24160 for (size_t k = 1; k <= 40; k += 9) {
24161 GemmMicrokernelTester()
24162 .mr(2)
24163 .nr(4)
24164 .kr(8)
24165 .sr(1)
24166 .m(2)
24167 .n(n)
24168 .k(k)
24169 .ks(3)
24170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24171 }
24172 }
24173 }
24174
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm_subtile)24175 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
24176 TEST_REQUIRES_X86_XOP;
24177 for (size_t k = 1; k <= 40; k += 9) {
24178 for (uint32_t n = 1; n <= 4; n++) {
24179 for (uint32_t m = 1; m <= 2; m++) {
24180 GemmMicrokernelTester()
24181 .mr(2)
24182 .nr(4)
24183 .kr(8)
24184 .sr(1)
24185 .m(m)
24186 .n(n)
24187 .k(k)
24188 .cm_stride(7)
24189 .iterations(1)
24190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24191 }
24192 }
24193 }
24194 }
24195
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,a_offset)24196 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, a_offset) {
24197 TEST_REQUIRES_X86_XOP;
24198 for (size_t k = 1; k <= 40; k += 9) {
24199 GemmMicrokernelTester()
24200 .mr(2)
24201 .nr(4)
24202 .kr(8)
24203 .sr(1)
24204 .m(2)
24205 .n(4)
24206 .k(k)
24207 .ks(3)
24208 .a_offset(83)
24209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24210 }
24211 }
24212
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,zero)24213 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, zero) {
24214 TEST_REQUIRES_X86_XOP;
24215 for (size_t k = 1; k <= 40; k += 9) {
24216 for (uint32_t mz = 0; mz < 2; mz++) {
24217 GemmMicrokernelTester()
24218 .mr(2)
24219 .nr(4)
24220 .kr(8)
24221 .sr(1)
24222 .m(2)
24223 .n(4)
24224 .k(k)
24225 .ks(3)
24226 .a_offset(83)
24227 .zero_index(mz)
24228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24229 }
24230 }
24231 }
24232
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmin)24233 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
24234 TEST_REQUIRES_X86_XOP;
24235 GemmMicrokernelTester()
24236 .mr(2)
24237 .nr(4)
24238 .kr(8)
24239 .sr(1)
24240 .m(2)
24241 .n(4)
24242 .k(8)
24243 .qmin(128)
24244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24245 }
24246
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmax)24247 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
24248 TEST_REQUIRES_X86_XOP;
24249 GemmMicrokernelTester()
24250 .mr(2)
24251 .nr(4)
24252 .kr(8)
24253 .sr(1)
24254 .m(2)
24255 .n(4)
24256 .k(8)
24257 .qmax(128)
24258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24259 }
24260
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm)24261 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
24262 TEST_REQUIRES_X86_XOP;
24263 GemmMicrokernelTester()
24264 .mr(2)
24265 .nr(4)
24266 .kr(8)
24267 .sr(1)
24268 .m(2)
24269 .n(4)
24270 .k(8)
24271 .cm_stride(7)
24272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24273 }
24274
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_a_zero_point)24275 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_a_zero_point) {
24276 TEST_REQUIRES_X86_XOP;
24277 for (size_t k = 1; k <= 40; k += 9) {
24278 GemmMicrokernelTester()
24279 .mr(2)
24280 .nr(4)
24281 .kr(8)
24282 .sr(1)
24283 .m(2)
24284 .n(4)
24285 .k(k)
24286 .a_zero_point(0)
24287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24288 }
24289 }
24290
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_b_zero_point)24291 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_b_zero_point) {
24292 TEST_REQUIRES_X86_XOP;
24293 for (size_t k = 1; k <= 40; k += 9) {
24294 GemmMicrokernelTester()
24295 .mr(2)
24296 .nr(4)
24297 .kr(8)
24298 .sr(1)
24299 .m(2)
24300 .n(4)
24301 .k(k)
24302 .b_zero_point(0)
24303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24304 }
24305 }
24306
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_zero_point)24307 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_zero_point) {
24308 TEST_REQUIRES_X86_XOP;
24309 for (size_t k = 1; k <= 40; k += 9) {
24310 GemmMicrokernelTester()
24311 .mr(2)
24312 .nr(4)
24313 .kr(8)
24314 .sr(1)
24315 .m(2)
24316 .n(4)
24317 .k(k)
24318 .a_zero_point(0)
24319 .b_zero_point(0)
24320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24321 }
24322 }
24323 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24324
24325
24326 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8)24327 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
24328 TEST_REQUIRES_X86_AVX;
24329 GemmMicrokernelTester()
24330 .mr(3)
24331 .nr(4)
24332 .kr(8)
24333 .sr(1)
24334 .m(3)
24335 .n(4)
24336 .k(8)
24337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24338 }
24339
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cn)24340 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
24341 TEST_REQUIRES_X86_AVX;
24342 GemmMicrokernelTester()
24343 .mr(3)
24344 .nr(4)
24345 .kr(8)
24346 .sr(1)
24347 .m(3)
24348 .n(4)
24349 .k(8)
24350 .cn_stride(7)
24351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24352 }
24353
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile)24354 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
24355 TEST_REQUIRES_X86_AVX;
24356 for (uint32_t n = 1; n <= 4; n++) {
24357 for (uint32_t m = 1; m <= 3; m++) {
24358 GemmMicrokernelTester()
24359 .mr(3)
24360 .nr(4)
24361 .kr(8)
24362 .sr(1)
24363 .m(m)
24364 .n(n)
24365 .k(8)
24366 .iterations(1)
24367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24368 }
24369 }
24370 }
24371
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_m)24372 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
24373 TEST_REQUIRES_X86_AVX;
24374 for (uint32_t m = 1; m <= 3; m++) {
24375 GemmMicrokernelTester()
24376 .mr(3)
24377 .nr(4)
24378 .kr(8)
24379 .sr(1)
24380 .m(m)
24381 .n(4)
24382 .k(8)
24383 .iterations(1)
24384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24385 }
24386 }
24387
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_n)24388 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
24389 TEST_REQUIRES_X86_AVX;
24390 for (uint32_t n = 1; n <= 4; n++) {
24391 GemmMicrokernelTester()
24392 .mr(3)
24393 .nr(4)
24394 .kr(8)
24395 .sr(1)
24396 .m(3)
24397 .n(n)
24398 .k(8)
24399 .iterations(1)
24400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24401 }
24402 }
24403
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8)24404 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
24405 TEST_REQUIRES_X86_AVX;
24406 for (size_t k = 1; k < 8; k++) {
24407 GemmMicrokernelTester()
24408 .mr(3)
24409 .nr(4)
24410 .kr(8)
24411 .sr(1)
24412 .m(3)
24413 .n(4)
24414 .k(k)
24415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24416 }
24417 }
24418
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_subtile)24419 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
24420 TEST_REQUIRES_X86_AVX;
24421 for (size_t k = 1; k < 8; k++) {
24422 for (uint32_t n = 1; n <= 4; n++) {
24423 for (uint32_t m = 1; m <= 3; m++) {
24424 GemmMicrokernelTester()
24425 .mr(3)
24426 .nr(4)
24427 .kr(8)
24428 .sr(1)
24429 .m(m)
24430 .n(n)
24431 .k(k)
24432 .iterations(1)
24433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24434 }
24435 }
24436 }
24437 }
24438
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8)24439 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
24440 TEST_REQUIRES_X86_AVX;
24441 for (size_t k = 9; k < 16; k++) {
24442 GemmMicrokernelTester()
24443 .mr(3)
24444 .nr(4)
24445 .kr(8)
24446 .sr(1)
24447 .m(3)
24448 .n(4)
24449 .k(k)
24450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24451 }
24452 }
24453
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_subtile)24454 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
24455 TEST_REQUIRES_X86_AVX;
24456 for (size_t k = 9; k < 16; k++) {
24457 for (uint32_t n = 1; n <= 4; n++) {
24458 for (uint32_t m = 1; m <= 3; m++) {
24459 GemmMicrokernelTester()
24460 .mr(3)
24461 .nr(4)
24462 .kr(8)
24463 .sr(1)
24464 .m(m)
24465 .n(n)
24466 .k(k)
24467 .iterations(1)
24468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24469 }
24470 }
24471 }
24472 }
24473
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8)24474 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
24475 TEST_REQUIRES_X86_AVX;
24476 for (size_t k = 16; k <= 80; k += 8) {
24477 GemmMicrokernelTester()
24478 .mr(3)
24479 .nr(4)
24480 .kr(8)
24481 .sr(1)
24482 .m(3)
24483 .n(4)
24484 .k(k)
24485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24486 }
24487 }
24488
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_subtile)24489 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
24490 TEST_REQUIRES_X86_AVX;
24491 for (size_t k = 16; k <= 80; k += 8) {
24492 for (uint32_t n = 1; n <= 4; n++) {
24493 for (uint32_t m = 1; m <= 3; m++) {
24494 GemmMicrokernelTester()
24495 .mr(3)
24496 .nr(4)
24497 .kr(8)
24498 .sr(1)
24499 .m(m)
24500 .n(n)
24501 .k(k)
24502 .iterations(1)
24503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24504 }
24505 }
24506 }
24507 }
24508
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4)24509 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
24510 TEST_REQUIRES_X86_AVX;
24511 for (uint32_t n = 5; n < 8; n++) {
24512 for (size_t k = 1; k <= 40; k += 9) {
24513 GemmMicrokernelTester()
24514 .mr(3)
24515 .nr(4)
24516 .kr(8)
24517 .sr(1)
24518 .m(3)
24519 .n(n)
24520 .k(k)
24521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24522 }
24523 }
24524 }
24525
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_cn)24526 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
24527 TEST_REQUIRES_X86_AVX;
24528 for (uint32_t n = 5; n < 8; n++) {
24529 for (size_t k = 1; k <= 40; k += 9) {
24530 GemmMicrokernelTester()
24531 .mr(3)
24532 .nr(4)
24533 .kr(8)
24534 .sr(1)
24535 .m(3)
24536 .n(n)
24537 .k(k)
24538 .cn_stride(7)
24539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24540 }
24541 }
24542 }
24543
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_subtile)24544 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
24545 TEST_REQUIRES_X86_AVX;
24546 for (uint32_t n = 5; n < 8; n++) {
24547 for (size_t k = 1; k <= 40; k += 9) {
24548 for (uint32_t m = 1; m <= 3; m++) {
24549 GemmMicrokernelTester()
24550 .mr(3)
24551 .nr(4)
24552 .kr(8)
24553 .sr(1)
24554 .m(m)
24555 .n(n)
24556 .k(k)
24557 .iterations(1)
24558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24559 }
24560 }
24561 }
24562 }
24563
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4)24564 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
24565 TEST_REQUIRES_X86_AVX;
24566 for (uint32_t n = 8; n <= 12; n += 4) {
24567 for (size_t k = 1; k <= 40; k += 9) {
24568 GemmMicrokernelTester()
24569 .mr(3)
24570 .nr(4)
24571 .kr(8)
24572 .sr(1)
24573 .m(3)
24574 .n(n)
24575 .k(k)
24576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_cn)24581 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
24582 TEST_REQUIRES_X86_AVX;
24583 for (uint32_t n = 8; n <= 12; n += 4) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 GemmMicrokernelTester()
24586 .mr(3)
24587 .nr(4)
24588 .kr(8)
24589 .sr(1)
24590 .m(3)
24591 .n(n)
24592 .k(k)
24593 .cn_stride(7)
24594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24595 }
24596 }
24597 }
24598
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_subtile)24599 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
24600 TEST_REQUIRES_X86_AVX;
24601 for (uint32_t n = 8; n <= 12; n += 4) {
24602 for (size_t k = 1; k <= 40; k += 9) {
24603 for (uint32_t m = 1; m <= 3; m++) {
24604 GemmMicrokernelTester()
24605 .mr(3)
24606 .nr(4)
24607 .kr(8)
24608 .sr(1)
24609 .m(m)
24610 .n(n)
24611 .k(k)
24612 .iterations(1)
24613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24614 }
24615 }
24616 }
24617 }
24618
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel)24619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel) {
24620 TEST_REQUIRES_X86_AVX;
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(3)
24624 .nr(4)
24625 .kr(8)
24626 .sr(1)
24627 .m(3)
24628 .n(4)
24629 .k(k)
24630 .ks(3)
24631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24632 }
24633 }
24634
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel_subtile)24635 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel_subtile) {
24636 TEST_REQUIRES_X86_AVX;
24637 for (size_t k = 1; k <= 40; k += 9) {
24638 for (uint32_t n = 1; n <= 4; n++) {
24639 for (uint32_t m = 1; m <= 3; m++) {
24640 GemmMicrokernelTester()
24641 .mr(3)
24642 .nr(4)
24643 .kr(8)
24644 .sr(1)
24645 .m(m)
24646 .n(n)
24647 .k(k)
24648 .ks(3)
24649 .iterations(1)
24650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_small_kernel)24656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
24657 TEST_REQUIRES_X86_AVX;
24658 for (uint32_t n = 5; n < 8; n++) {
24659 for (size_t k = 1; k <= 40; k += 9) {
24660 GemmMicrokernelTester()
24661 .mr(3)
24662 .nr(4)
24663 .kr(8)
24664 .sr(1)
24665 .m(3)
24666 .n(n)
24667 .k(k)
24668 .ks(3)
24669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24670 }
24671 }
24672 }
24673
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_small_kernel)24674 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_small_kernel) {
24675 TEST_REQUIRES_X86_AVX;
24676 for (uint32_t n = 8; n <= 12; n += 4) {
24677 for (size_t k = 1; k <= 40; k += 9) {
24678 GemmMicrokernelTester()
24679 .mr(3)
24680 .nr(4)
24681 .kr(8)
24682 .sr(1)
24683 .m(3)
24684 .n(n)
24685 .k(k)
24686 .ks(3)
24687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24688 }
24689 }
24690 }
24691
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm_subtile)24692 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
24693 TEST_REQUIRES_X86_AVX;
24694 for (size_t k = 1; k <= 40; k += 9) {
24695 for (uint32_t n = 1; n <= 4; n++) {
24696 for (uint32_t m = 1; m <= 3; m++) {
24697 GemmMicrokernelTester()
24698 .mr(3)
24699 .nr(4)
24700 .kr(8)
24701 .sr(1)
24702 .m(m)
24703 .n(n)
24704 .k(k)
24705 .cm_stride(7)
24706 .iterations(1)
24707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24708 }
24709 }
24710 }
24711 }
24712
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,a_offset)24713 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, a_offset) {
24714 TEST_REQUIRES_X86_AVX;
24715 for (size_t k = 1; k <= 40; k += 9) {
24716 GemmMicrokernelTester()
24717 .mr(3)
24718 .nr(4)
24719 .kr(8)
24720 .sr(1)
24721 .m(3)
24722 .n(4)
24723 .k(k)
24724 .ks(3)
24725 .a_offset(127)
24726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24727 }
24728 }
24729
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,zero)24730 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, zero) {
24731 TEST_REQUIRES_X86_AVX;
24732 for (size_t k = 1; k <= 40; k += 9) {
24733 for (uint32_t mz = 0; mz < 3; mz++) {
24734 GemmMicrokernelTester()
24735 .mr(3)
24736 .nr(4)
24737 .kr(8)
24738 .sr(1)
24739 .m(3)
24740 .n(4)
24741 .k(k)
24742 .ks(3)
24743 .a_offset(127)
24744 .zero_index(mz)
24745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24746 }
24747 }
24748 }
24749
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmin)24750 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
24751 TEST_REQUIRES_X86_AVX;
24752 GemmMicrokernelTester()
24753 .mr(3)
24754 .nr(4)
24755 .kr(8)
24756 .sr(1)
24757 .m(3)
24758 .n(4)
24759 .k(8)
24760 .qmin(128)
24761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24762 }
24763
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmax)24764 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
24765 TEST_REQUIRES_X86_AVX;
24766 GemmMicrokernelTester()
24767 .mr(3)
24768 .nr(4)
24769 .kr(8)
24770 .sr(1)
24771 .m(3)
24772 .n(4)
24773 .k(8)
24774 .qmax(128)
24775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24776 }
24777
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm)24778 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
24779 TEST_REQUIRES_X86_AVX;
24780 GemmMicrokernelTester()
24781 .mr(3)
24782 .nr(4)
24783 .kr(8)
24784 .sr(1)
24785 .m(3)
24786 .n(4)
24787 .k(8)
24788 .cm_stride(7)
24789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24790 }
24791
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_a_zero_point)24792 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_a_zero_point) {
24793 TEST_REQUIRES_X86_AVX;
24794 for (size_t k = 1; k <= 40; k += 9) {
24795 GemmMicrokernelTester()
24796 .mr(3)
24797 .nr(4)
24798 .kr(8)
24799 .sr(1)
24800 .m(3)
24801 .n(4)
24802 .k(k)
24803 .a_zero_point(0)
24804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24805 }
24806 }
24807
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_b_zero_point)24808 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_b_zero_point) {
24809 TEST_REQUIRES_X86_AVX;
24810 for (size_t k = 1; k <= 40; k += 9) {
24811 GemmMicrokernelTester()
24812 .mr(3)
24813 .nr(4)
24814 .kr(8)
24815 .sr(1)
24816 .m(3)
24817 .n(4)
24818 .k(k)
24819 .b_zero_point(0)
24820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24821 }
24822 }
24823
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_zero_point)24824 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_zero_point) {
24825 TEST_REQUIRES_X86_AVX;
24826 for (size_t k = 1; k <= 40; k += 9) {
24827 GemmMicrokernelTester()
24828 .mr(3)
24829 .nr(4)
24830 .kr(8)
24831 .sr(1)
24832 .m(3)
24833 .n(4)
24834 .k(k)
24835 .a_zero_point(0)
24836 .b_zero_point(0)
24837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24838 }
24839 }
24840 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24841
24842
24843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)24844 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
24845 TEST_REQUIRES_X86_AVX2;
24846 GemmMicrokernelTester()
24847 .mr(2)
24848 .nr(8)
24849 .kr(8)
24850 .sr(1)
24851 .m(2)
24852 .n(8)
24853 .k(8)
24854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24855 }
24856
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)24857 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
24858 TEST_REQUIRES_X86_AVX2;
24859 GemmMicrokernelTester()
24860 .mr(2)
24861 .nr(8)
24862 .kr(8)
24863 .sr(1)
24864 .m(2)
24865 .n(8)
24866 .k(8)
24867 .cn_stride(11)
24868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24869 }
24870
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)24871 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
24872 TEST_REQUIRES_X86_AVX2;
24873 for (uint32_t n = 1; n <= 8; n++) {
24874 for (uint32_t m = 1; m <= 2; m++) {
24875 GemmMicrokernelTester()
24876 .mr(2)
24877 .nr(8)
24878 .kr(8)
24879 .sr(1)
24880 .m(m)
24881 .n(n)
24882 .k(8)
24883 .iterations(1)
24884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24885 }
24886 }
24887 }
24888
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)24889 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
24890 TEST_REQUIRES_X86_AVX2;
24891 for (uint32_t m = 1; m <= 2; m++) {
24892 GemmMicrokernelTester()
24893 .mr(2)
24894 .nr(8)
24895 .kr(8)
24896 .sr(1)
24897 .m(m)
24898 .n(8)
24899 .k(8)
24900 .iterations(1)
24901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24902 }
24903 }
24904
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)24905 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
24906 TEST_REQUIRES_X86_AVX2;
24907 for (uint32_t n = 1; n <= 8; n++) {
24908 GemmMicrokernelTester()
24909 .mr(2)
24910 .nr(8)
24911 .kr(8)
24912 .sr(1)
24913 .m(2)
24914 .n(n)
24915 .k(8)
24916 .iterations(1)
24917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24918 }
24919 }
24920
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)24921 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
24922 TEST_REQUIRES_X86_AVX2;
24923 for (size_t k = 1; k < 8; k++) {
24924 GemmMicrokernelTester()
24925 .mr(2)
24926 .nr(8)
24927 .kr(8)
24928 .sr(1)
24929 .m(2)
24930 .n(8)
24931 .k(k)
24932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24933 }
24934 }
24935
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)24936 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
24937 TEST_REQUIRES_X86_AVX2;
24938 for (size_t k = 1; k < 8; k++) {
24939 for (uint32_t n = 1; n <= 8; n++) {
24940 for (uint32_t m = 1; m <= 2; m++) {
24941 GemmMicrokernelTester()
24942 .mr(2)
24943 .nr(8)
24944 .kr(8)
24945 .sr(1)
24946 .m(m)
24947 .n(n)
24948 .k(k)
24949 .iterations(1)
24950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24951 }
24952 }
24953 }
24954 }
24955
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)24956 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
24957 TEST_REQUIRES_X86_AVX2;
24958 for (size_t k = 9; k < 16; k++) {
24959 GemmMicrokernelTester()
24960 .mr(2)
24961 .nr(8)
24962 .kr(8)
24963 .sr(1)
24964 .m(2)
24965 .n(8)
24966 .k(k)
24967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24968 }
24969 }
24970
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)24971 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
24972 TEST_REQUIRES_X86_AVX2;
24973 for (size_t k = 9; k < 16; k++) {
24974 for (uint32_t n = 1; n <= 8; n++) {
24975 for (uint32_t m = 1; m <= 2; m++) {
24976 GemmMicrokernelTester()
24977 .mr(2)
24978 .nr(8)
24979 .kr(8)
24980 .sr(1)
24981 .m(m)
24982 .n(n)
24983 .k(k)
24984 .iterations(1)
24985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24986 }
24987 }
24988 }
24989 }
24990
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)24991 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
24992 TEST_REQUIRES_X86_AVX2;
24993 for (size_t k = 16; k <= 80; k += 8) {
24994 GemmMicrokernelTester()
24995 .mr(2)
24996 .nr(8)
24997 .kr(8)
24998 .sr(1)
24999 .m(2)
25000 .n(8)
25001 .k(k)
25002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25003 }
25004 }
25005
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)25006 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
25007 TEST_REQUIRES_X86_AVX2;
25008 for (size_t k = 16; k <= 80; k += 8) {
25009 for (uint32_t n = 1; n <= 8; n++) {
25010 for (uint32_t m = 1; m <= 2; m++) {
25011 GemmMicrokernelTester()
25012 .mr(2)
25013 .nr(8)
25014 .kr(8)
25015 .sr(1)
25016 .m(m)
25017 .n(n)
25018 .k(k)
25019 .iterations(1)
25020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25021 }
25022 }
25023 }
25024 }
25025
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)25026 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
25027 TEST_REQUIRES_X86_AVX2;
25028 for (uint32_t n = 9; n < 16; n++) {
25029 for (size_t k = 1; k <= 40; k += 9) {
25030 GemmMicrokernelTester()
25031 .mr(2)
25032 .nr(8)
25033 .kr(8)
25034 .sr(1)
25035 .m(2)
25036 .n(n)
25037 .k(k)
25038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25039 }
25040 }
25041 }
25042
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)25043 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
25044 TEST_REQUIRES_X86_AVX2;
25045 for (uint32_t n = 9; n < 16; n++) {
25046 for (size_t k = 1; k <= 40; k += 9) {
25047 GemmMicrokernelTester()
25048 .mr(2)
25049 .nr(8)
25050 .kr(8)
25051 .sr(1)
25052 .m(2)
25053 .n(n)
25054 .k(k)
25055 .cn_stride(11)
25056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25057 }
25058 }
25059 }
25060
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)25061 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
25062 TEST_REQUIRES_X86_AVX2;
25063 for (uint32_t n = 9; n < 16; n++) {
25064 for (size_t k = 1; k <= 40; k += 9) {
25065 for (uint32_t m = 1; m <= 2; m++) {
25066 GemmMicrokernelTester()
25067 .mr(2)
25068 .nr(8)
25069 .kr(8)
25070 .sr(1)
25071 .m(m)
25072 .n(n)
25073 .k(k)
25074 .iterations(1)
25075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25076 }
25077 }
25078 }
25079 }
25080
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)25081 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
25082 TEST_REQUIRES_X86_AVX2;
25083 for (uint32_t n = 16; n <= 24; n += 8) {
25084 for (size_t k = 1; k <= 40; k += 9) {
25085 GemmMicrokernelTester()
25086 .mr(2)
25087 .nr(8)
25088 .kr(8)
25089 .sr(1)
25090 .m(2)
25091 .n(n)
25092 .k(k)
25093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25094 }
25095 }
25096 }
25097
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)25098 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
25099 TEST_REQUIRES_X86_AVX2;
25100 for (uint32_t n = 16; n <= 24; n += 8) {
25101 for (size_t k = 1; k <= 40; k += 9) {
25102 GemmMicrokernelTester()
25103 .mr(2)
25104 .nr(8)
25105 .kr(8)
25106 .sr(1)
25107 .m(2)
25108 .n(n)
25109 .k(k)
25110 .cn_stride(11)
25111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25112 }
25113 }
25114 }
25115
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)25116 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
25117 TEST_REQUIRES_X86_AVX2;
25118 for (uint32_t n = 16; n <= 24; n += 8) {
25119 for (size_t k = 1; k <= 40; k += 9) {
25120 for (uint32_t m = 1; m <= 2; m++) {
25121 GemmMicrokernelTester()
25122 .mr(2)
25123 .nr(8)
25124 .kr(8)
25125 .sr(1)
25126 .m(m)
25127 .n(n)
25128 .k(k)
25129 .iterations(1)
25130 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25131 }
25132 }
25133 }
25134 }
25135
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel)25136 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
25137 TEST_REQUIRES_X86_AVX2;
25138 for (size_t k = 1; k <= 40; k += 9) {
25139 GemmMicrokernelTester()
25140 .mr(2)
25141 .nr(8)
25142 .kr(8)
25143 .sr(1)
25144 .m(2)
25145 .n(8)
25146 .k(k)
25147 .ks(3)
25148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25149 }
25150 }
25151
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel_subtile)25152 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
25153 TEST_REQUIRES_X86_AVX2;
25154 for (size_t k = 1; k <= 40; k += 9) {
25155 for (uint32_t n = 1; n <= 8; n++) {
25156 for (uint32_t m = 1; m <= 2; m++) {
25157 GemmMicrokernelTester()
25158 .mr(2)
25159 .nr(8)
25160 .kr(8)
25161 .sr(1)
25162 .m(m)
25163 .n(n)
25164 .k(k)
25165 .ks(3)
25166 .iterations(1)
25167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25168 }
25169 }
25170 }
25171 }
25172
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_small_kernel)25173 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
25174 TEST_REQUIRES_X86_AVX2;
25175 for (uint32_t n = 9; n < 16; n++) {
25176 for (size_t k = 1; k <= 40; k += 9) {
25177 GemmMicrokernelTester()
25178 .mr(2)
25179 .nr(8)
25180 .kr(8)
25181 .sr(1)
25182 .m(2)
25183 .n(n)
25184 .k(k)
25185 .ks(3)
25186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25187 }
25188 }
25189 }
25190
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_small_kernel)25191 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
25192 TEST_REQUIRES_X86_AVX2;
25193 for (uint32_t n = 16; n <= 24; n += 8) {
25194 for (size_t k = 1; k <= 40; k += 9) {
25195 GemmMicrokernelTester()
25196 .mr(2)
25197 .nr(8)
25198 .kr(8)
25199 .sr(1)
25200 .m(2)
25201 .n(n)
25202 .k(k)
25203 .ks(3)
25204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25205 }
25206 }
25207 }
25208
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)25209 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
25210 TEST_REQUIRES_X86_AVX2;
25211 for (size_t k = 1; k <= 40; k += 9) {
25212 for (uint32_t n = 1; n <= 8; n++) {
25213 for (uint32_t m = 1; m <= 2; m++) {
25214 GemmMicrokernelTester()
25215 .mr(2)
25216 .nr(8)
25217 .kr(8)
25218 .sr(1)
25219 .m(m)
25220 .n(n)
25221 .k(k)
25222 .cm_stride(11)
25223 .iterations(1)
25224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25225 }
25226 }
25227 }
25228 }
25229
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,a_offset)25230 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
25231 TEST_REQUIRES_X86_AVX2;
25232 for (size_t k = 1; k <= 40; k += 9) {
25233 GemmMicrokernelTester()
25234 .mr(2)
25235 .nr(8)
25236 .kr(8)
25237 .sr(1)
25238 .m(2)
25239 .n(8)
25240 .k(k)
25241 .ks(3)
25242 .a_offset(83)
25243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25244 }
25245 }
25246
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,zero)25247 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
25248 TEST_REQUIRES_X86_AVX2;
25249 for (size_t k = 1; k <= 40; k += 9) {
25250 for (uint32_t mz = 0; mz < 2; mz++) {
25251 GemmMicrokernelTester()
25252 .mr(2)
25253 .nr(8)
25254 .kr(8)
25255 .sr(1)
25256 .m(2)
25257 .n(8)
25258 .k(k)
25259 .ks(3)
25260 .a_offset(83)
25261 .zero_index(mz)
25262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25263 }
25264 }
25265 }
25266
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmin)25267 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
25268 TEST_REQUIRES_X86_AVX2;
25269 GemmMicrokernelTester()
25270 .mr(2)
25271 .nr(8)
25272 .kr(8)
25273 .sr(1)
25274 .m(2)
25275 .n(8)
25276 .k(8)
25277 .qmin(128)
25278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25279 }
25280
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmax)25281 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
25282 TEST_REQUIRES_X86_AVX2;
25283 GemmMicrokernelTester()
25284 .mr(2)
25285 .nr(8)
25286 .kr(8)
25287 .sr(1)
25288 .m(2)
25289 .n(8)
25290 .k(8)
25291 .qmax(128)
25292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25293 }
25294
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)25295 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
25296 TEST_REQUIRES_X86_AVX2;
25297 GemmMicrokernelTester()
25298 .mr(2)
25299 .nr(8)
25300 .kr(8)
25301 .sr(1)
25302 .m(2)
25303 .n(8)
25304 .k(8)
25305 .cm_stride(11)
25306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25307 }
25308
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_a_zero_point)25309 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_a_zero_point) {
25310 TEST_REQUIRES_X86_AVX2;
25311 for (size_t k = 1; k <= 40; k += 9) {
25312 GemmMicrokernelTester()
25313 .mr(2)
25314 .nr(8)
25315 .kr(8)
25316 .sr(1)
25317 .m(2)
25318 .n(8)
25319 .k(k)
25320 .a_zero_point(0)
25321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25322 }
25323 }
25324
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_b_zero_point)25325 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_b_zero_point) {
25326 TEST_REQUIRES_X86_AVX2;
25327 for (size_t k = 1; k <= 40; k += 9) {
25328 GemmMicrokernelTester()
25329 .mr(2)
25330 .nr(8)
25331 .kr(8)
25332 .sr(1)
25333 .m(2)
25334 .n(8)
25335 .k(k)
25336 .b_zero_point(0)
25337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25338 }
25339 }
25340
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_zero_point)25341 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_zero_point) {
25342 TEST_REQUIRES_X86_AVX2;
25343 for (size_t k = 1; k <= 40; k += 9) {
25344 GemmMicrokernelTester()
25345 .mr(2)
25346 .nr(8)
25347 .kr(8)
25348 .sr(1)
25349 .m(2)
25350 .n(8)
25351 .k(k)
25352 .a_zero_point(0)
25353 .b_zero_point(0)
25354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25355 }
25356 }
25357 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25358
25359
25360 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)25361 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
25362 TEST_REQUIRES_X86_AVX2;
25363 GemmMicrokernelTester()
25364 .mr(3)
25365 .nr(8)
25366 .kr(8)
25367 .sr(1)
25368 .m(3)
25369 .n(8)
25370 .k(8)
25371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25372 }
25373
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)25374 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
25375 TEST_REQUIRES_X86_AVX2;
25376 GemmMicrokernelTester()
25377 .mr(3)
25378 .nr(8)
25379 .kr(8)
25380 .sr(1)
25381 .m(3)
25382 .n(8)
25383 .k(8)
25384 .cn_stride(11)
25385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25386 }
25387
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)25388 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
25389 TEST_REQUIRES_X86_AVX2;
25390 for (uint32_t n = 1; n <= 8; n++) {
25391 for (uint32_t m = 1; m <= 3; m++) {
25392 GemmMicrokernelTester()
25393 .mr(3)
25394 .nr(8)
25395 .kr(8)
25396 .sr(1)
25397 .m(m)
25398 .n(n)
25399 .k(8)
25400 .iterations(1)
25401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25402 }
25403 }
25404 }
25405
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)25406 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
25407 TEST_REQUIRES_X86_AVX2;
25408 for (uint32_t m = 1; m <= 3; m++) {
25409 GemmMicrokernelTester()
25410 .mr(3)
25411 .nr(8)
25412 .kr(8)
25413 .sr(1)
25414 .m(m)
25415 .n(8)
25416 .k(8)
25417 .iterations(1)
25418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25419 }
25420 }
25421
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)25422 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
25423 TEST_REQUIRES_X86_AVX2;
25424 for (uint32_t n = 1; n <= 8; n++) {
25425 GemmMicrokernelTester()
25426 .mr(3)
25427 .nr(8)
25428 .kr(8)
25429 .sr(1)
25430 .m(3)
25431 .n(n)
25432 .k(8)
25433 .iterations(1)
25434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25435 }
25436 }
25437
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)25438 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
25439 TEST_REQUIRES_X86_AVX2;
25440 for (size_t k = 1; k < 8; k++) {
25441 GemmMicrokernelTester()
25442 .mr(3)
25443 .nr(8)
25444 .kr(8)
25445 .sr(1)
25446 .m(3)
25447 .n(8)
25448 .k(k)
25449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25450 }
25451 }
25452
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)25453 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
25454 TEST_REQUIRES_X86_AVX2;
25455 for (size_t k = 1; k < 8; k++) {
25456 for (uint32_t n = 1; n <= 8; n++) {
25457 for (uint32_t m = 1; m <= 3; m++) {
25458 GemmMicrokernelTester()
25459 .mr(3)
25460 .nr(8)
25461 .kr(8)
25462 .sr(1)
25463 .m(m)
25464 .n(n)
25465 .k(k)
25466 .iterations(1)
25467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25468 }
25469 }
25470 }
25471 }
25472
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)25473 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
25474 TEST_REQUIRES_X86_AVX2;
25475 for (size_t k = 9; k < 16; k++) {
25476 GemmMicrokernelTester()
25477 .mr(3)
25478 .nr(8)
25479 .kr(8)
25480 .sr(1)
25481 .m(3)
25482 .n(8)
25483 .k(k)
25484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25485 }
25486 }
25487
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)25488 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
25489 TEST_REQUIRES_X86_AVX2;
25490 for (size_t k = 9; k < 16; k++) {
25491 for (uint32_t n = 1; n <= 8; n++) {
25492 for (uint32_t m = 1; m <= 3; m++) {
25493 GemmMicrokernelTester()
25494 .mr(3)
25495 .nr(8)
25496 .kr(8)
25497 .sr(1)
25498 .m(m)
25499 .n(n)
25500 .k(k)
25501 .iterations(1)
25502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25503 }
25504 }
25505 }
25506 }
25507
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)25508 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
25509 TEST_REQUIRES_X86_AVX2;
25510 for (size_t k = 16; k <= 80; k += 8) {
25511 GemmMicrokernelTester()
25512 .mr(3)
25513 .nr(8)
25514 .kr(8)
25515 .sr(1)
25516 .m(3)
25517 .n(8)
25518 .k(k)
25519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25520 }
25521 }
25522
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)25523 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
25524 TEST_REQUIRES_X86_AVX2;
25525 for (size_t k = 16; k <= 80; k += 8) {
25526 for (uint32_t n = 1; n <= 8; n++) {
25527 for (uint32_t m = 1; m <= 3; m++) {
25528 GemmMicrokernelTester()
25529 .mr(3)
25530 .nr(8)
25531 .kr(8)
25532 .sr(1)
25533 .m(m)
25534 .n(n)
25535 .k(k)
25536 .iterations(1)
25537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25538 }
25539 }
25540 }
25541 }
25542
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)25543 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
25544 TEST_REQUIRES_X86_AVX2;
25545 for (uint32_t n = 9; n < 16; n++) {
25546 for (size_t k = 1; k <= 40; k += 9) {
25547 GemmMicrokernelTester()
25548 .mr(3)
25549 .nr(8)
25550 .kr(8)
25551 .sr(1)
25552 .m(3)
25553 .n(n)
25554 .k(k)
25555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25556 }
25557 }
25558 }
25559
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)25560 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
25561 TEST_REQUIRES_X86_AVX2;
25562 for (uint32_t n = 9; n < 16; n++) {
25563 for (size_t k = 1; k <= 40; k += 9) {
25564 GemmMicrokernelTester()
25565 .mr(3)
25566 .nr(8)
25567 .kr(8)
25568 .sr(1)
25569 .m(3)
25570 .n(n)
25571 .k(k)
25572 .cn_stride(11)
25573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25574 }
25575 }
25576 }
25577
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)25578 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
25579 TEST_REQUIRES_X86_AVX2;
25580 for (uint32_t n = 9; n < 16; n++) {
25581 for (size_t k = 1; k <= 40; k += 9) {
25582 for (uint32_t m = 1; m <= 3; m++) {
25583 GemmMicrokernelTester()
25584 .mr(3)
25585 .nr(8)
25586 .kr(8)
25587 .sr(1)
25588 .m(m)
25589 .n(n)
25590 .k(k)
25591 .iterations(1)
25592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25593 }
25594 }
25595 }
25596 }
25597
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)25598 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
25599 TEST_REQUIRES_X86_AVX2;
25600 for (uint32_t n = 16; n <= 24; n += 8) {
25601 for (size_t k = 1; k <= 40; k += 9) {
25602 GemmMicrokernelTester()
25603 .mr(3)
25604 .nr(8)
25605 .kr(8)
25606 .sr(1)
25607 .m(3)
25608 .n(n)
25609 .k(k)
25610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25611 }
25612 }
25613 }
25614
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)25615 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
25616 TEST_REQUIRES_X86_AVX2;
25617 for (uint32_t n = 16; n <= 24; n += 8) {
25618 for (size_t k = 1; k <= 40; k += 9) {
25619 GemmMicrokernelTester()
25620 .mr(3)
25621 .nr(8)
25622 .kr(8)
25623 .sr(1)
25624 .m(3)
25625 .n(n)
25626 .k(k)
25627 .cn_stride(11)
25628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25629 }
25630 }
25631 }
25632
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)25633 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
25634 TEST_REQUIRES_X86_AVX2;
25635 for (uint32_t n = 16; n <= 24; n += 8) {
25636 for (size_t k = 1; k <= 40; k += 9) {
25637 for (uint32_t m = 1; m <= 3; m++) {
25638 GemmMicrokernelTester()
25639 .mr(3)
25640 .nr(8)
25641 .kr(8)
25642 .sr(1)
25643 .m(m)
25644 .n(n)
25645 .k(k)
25646 .iterations(1)
25647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25648 }
25649 }
25650 }
25651 }
25652
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel)25653 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
25654 TEST_REQUIRES_X86_AVX2;
25655 for (size_t k = 1; k <= 40; k += 9) {
25656 GemmMicrokernelTester()
25657 .mr(3)
25658 .nr(8)
25659 .kr(8)
25660 .sr(1)
25661 .m(3)
25662 .n(8)
25663 .k(k)
25664 .ks(3)
25665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25666 }
25667 }
25668
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel_subtile)25669 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
25670 TEST_REQUIRES_X86_AVX2;
25671 for (size_t k = 1; k <= 40; k += 9) {
25672 for (uint32_t n = 1; n <= 8; n++) {
25673 for (uint32_t m = 1; m <= 3; m++) {
25674 GemmMicrokernelTester()
25675 .mr(3)
25676 .nr(8)
25677 .kr(8)
25678 .sr(1)
25679 .m(m)
25680 .n(n)
25681 .k(k)
25682 .ks(3)
25683 .iterations(1)
25684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25685 }
25686 }
25687 }
25688 }
25689
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_small_kernel)25690 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
25691 TEST_REQUIRES_X86_AVX2;
25692 for (uint32_t n = 9; n < 16; n++) {
25693 for (size_t k = 1; k <= 40; k += 9) {
25694 GemmMicrokernelTester()
25695 .mr(3)
25696 .nr(8)
25697 .kr(8)
25698 .sr(1)
25699 .m(3)
25700 .n(n)
25701 .k(k)
25702 .ks(3)
25703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25704 }
25705 }
25706 }
25707
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_small_kernel)25708 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
25709 TEST_REQUIRES_X86_AVX2;
25710 for (uint32_t n = 16; n <= 24; n += 8) {
25711 for (size_t k = 1; k <= 40; k += 9) {
25712 GemmMicrokernelTester()
25713 .mr(3)
25714 .nr(8)
25715 .kr(8)
25716 .sr(1)
25717 .m(3)
25718 .n(n)
25719 .k(k)
25720 .ks(3)
25721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25722 }
25723 }
25724 }
25725
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)25726 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
25727 TEST_REQUIRES_X86_AVX2;
25728 for (size_t k = 1; k <= 40; k += 9) {
25729 for (uint32_t n = 1; n <= 8; n++) {
25730 for (uint32_t m = 1; m <= 3; m++) {
25731 GemmMicrokernelTester()
25732 .mr(3)
25733 .nr(8)
25734 .kr(8)
25735 .sr(1)
25736 .m(m)
25737 .n(n)
25738 .k(k)
25739 .cm_stride(11)
25740 .iterations(1)
25741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25742 }
25743 }
25744 }
25745 }
25746
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,a_offset)25747 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
25748 TEST_REQUIRES_X86_AVX2;
25749 for (size_t k = 1; k <= 40; k += 9) {
25750 GemmMicrokernelTester()
25751 .mr(3)
25752 .nr(8)
25753 .kr(8)
25754 .sr(1)
25755 .m(3)
25756 .n(8)
25757 .k(k)
25758 .ks(3)
25759 .a_offset(127)
25760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25761 }
25762 }
25763
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,zero)25764 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
25765 TEST_REQUIRES_X86_AVX2;
25766 for (size_t k = 1; k <= 40; k += 9) {
25767 for (uint32_t mz = 0; mz < 3; mz++) {
25768 GemmMicrokernelTester()
25769 .mr(3)
25770 .nr(8)
25771 .kr(8)
25772 .sr(1)
25773 .m(3)
25774 .n(8)
25775 .k(k)
25776 .ks(3)
25777 .a_offset(127)
25778 .zero_index(mz)
25779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25780 }
25781 }
25782 }
25783
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmin)25784 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
25785 TEST_REQUIRES_X86_AVX2;
25786 GemmMicrokernelTester()
25787 .mr(3)
25788 .nr(8)
25789 .kr(8)
25790 .sr(1)
25791 .m(3)
25792 .n(8)
25793 .k(8)
25794 .qmin(128)
25795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25796 }
25797
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmax)25798 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
25799 TEST_REQUIRES_X86_AVX2;
25800 GemmMicrokernelTester()
25801 .mr(3)
25802 .nr(8)
25803 .kr(8)
25804 .sr(1)
25805 .m(3)
25806 .n(8)
25807 .k(8)
25808 .qmax(128)
25809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25810 }
25811
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)25812 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
25813 TEST_REQUIRES_X86_AVX2;
25814 GemmMicrokernelTester()
25815 .mr(3)
25816 .nr(8)
25817 .kr(8)
25818 .sr(1)
25819 .m(3)
25820 .n(8)
25821 .k(8)
25822 .cm_stride(11)
25823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25824 }
25825
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_a_zero_point)25826 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_a_zero_point) {
25827 TEST_REQUIRES_X86_AVX2;
25828 for (size_t k = 1; k <= 40; k += 9) {
25829 GemmMicrokernelTester()
25830 .mr(3)
25831 .nr(8)
25832 .kr(8)
25833 .sr(1)
25834 .m(3)
25835 .n(8)
25836 .k(k)
25837 .a_zero_point(0)
25838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25839 }
25840 }
25841
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_b_zero_point)25842 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_b_zero_point) {
25843 TEST_REQUIRES_X86_AVX2;
25844 for (size_t k = 1; k <= 40; k += 9) {
25845 GemmMicrokernelTester()
25846 .mr(3)
25847 .nr(8)
25848 .kr(8)
25849 .sr(1)
25850 .m(3)
25851 .n(8)
25852 .k(k)
25853 .b_zero_point(0)
25854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25855 }
25856 }
25857
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_zero_point)25858 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_zero_point) {
25859 TEST_REQUIRES_X86_AVX2;
25860 for (size_t k = 1; k <= 40; k += 9) {
25861 GemmMicrokernelTester()
25862 .mr(3)
25863 .nr(8)
25864 .kr(8)
25865 .sr(1)
25866 .m(3)
25867 .n(8)
25868 .k(k)
25869 .a_zero_point(0)
25870 .b_zero_point(0)
25871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25872 }
25873 }
25874 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25875
25876
25877 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)25878 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
25879 TEST_REQUIRES_X86_AVX512SKX;
25880 GemmMicrokernelTester()
25881 .mr(3)
25882 .nr(16)
25883 .kr(8)
25884 .sr(1)
25885 .m(3)
25886 .n(16)
25887 .k(8)
25888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25889 }
25890
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)25891 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
25892 TEST_REQUIRES_X86_AVX512SKX;
25893 GemmMicrokernelTester()
25894 .mr(3)
25895 .nr(16)
25896 .kr(8)
25897 .sr(1)
25898 .m(3)
25899 .n(16)
25900 .k(8)
25901 .cn_stride(19)
25902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25903 }
25904
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)25905 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
25906 TEST_REQUIRES_X86_AVX512SKX;
25907 for (uint32_t n = 1; n <= 16; n++) {
25908 for (uint32_t m = 1; m <= 3; m++) {
25909 GemmMicrokernelTester()
25910 .mr(3)
25911 .nr(16)
25912 .kr(8)
25913 .sr(1)
25914 .m(m)
25915 .n(n)
25916 .k(8)
25917 .iterations(1)
25918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25919 }
25920 }
25921 }
25922
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)25923 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
25924 TEST_REQUIRES_X86_AVX512SKX;
25925 for (uint32_t m = 1; m <= 3; m++) {
25926 GemmMicrokernelTester()
25927 .mr(3)
25928 .nr(16)
25929 .kr(8)
25930 .sr(1)
25931 .m(m)
25932 .n(16)
25933 .k(8)
25934 .iterations(1)
25935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25936 }
25937 }
25938
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)25939 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
25940 TEST_REQUIRES_X86_AVX512SKX;
25941 for (uint32_t n = 1; n <= 16; n++) {
25942 GemmMicrokernelTester()
25943 .mr(3)
25944 .nr(16)
25945 .kr(8)
25946 .sr(1)
25947 .m(3)
25948 .n(n)
25949 .k(8)
25950 .iterations(1)
25951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25952 }
25953 }
25954
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)25955 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
25956 TEST_REQUIRES_X86_AVX512SKX;
25957 for (size_t k = 1; k < 8; k++) {
25958 GemmMicrokernelTester()
25959 .mr(3)
25960 .nr(16)
25961 .kr(8)
25962 .sr(1)
25963 .m(3)
25964 .n(16)
25965 .k(k)
25966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25967 }
25968 }
25969
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)25970 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
25971 TEST_REQUIRES_X86_AVX512SKX;
25972 for (size_t k = 1; k < 8; k++) {
25973 for (uint32_t n = 1; n <= 16; n++) {
25974 for (uint32_t m = 1; m <= 3; m++) {
25975 GemmMicrokernelTester()
25976 .mr(3)
25977 .nr(16)
25978 .kr(8)
25979 .sr(1)
25980 .m(m)
25981 .n(n)
25982 .k(k)
25983 .iterations(1)
25984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25985 }
25986 }
25987 }
25988 }
25989
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)25990 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
25991 TEST_REQUIRES_X86_AVX512SKX;
25992 for (size_t k = 9; k < 16; k++) {
25993 GemmMicrokernelTester()
25994 .mr(3)
25995 .nr(16)
25996 .kr(8)
25997 .sr(1)
25998 .m(3)
25999 .n(16)
26000 .k(k)
26001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26002 }
26003 }
26004
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)26005 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
26006 TEST_REQUIRES_X86_AVX512SKX;
26007 for (size_t k = 9; k < 16; k++) {
26008 for (uint32_t n = 1; n <= 16; n++) {
26009 for (uint32_t m = 1; m <= 3; m++) {
26010 GemmMicrokernelTester()
26011 .mr(3)
26012 .nr(16)
26013 .kr(8)
26014 .sr(1)
26015 .m(m)
26016 .n(n)
26017 .k(k)
26018 .iterations(1)
26019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26020 }
26021 }
26022 }
26023 }
26024
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)26025 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
26026 TEST_REQUIRES_X86_AVX512SKX;
26027 for (size_t k = 16; k <= 80; k += 8) {
26028 GemmMicrokernelTester()
26029 .mr(3)
26030 .nr(16)
26031 .kr(8)
26032 .sr(1)
26033 .m(3)
26034 .n(16)
26035 .k(k)
26036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26037 }
26038 }
26039
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)26040 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
26041 TEST_REQUIRES_X86_AVX512SKX;
26042 for (size_t k = 16; k <= 80; k += 8) {
26043 for (uint32_t n = 1; n <= 16; n++) {
26044 for (uint32_t m = 1; m <= 3; m++) {
26045 GemmMicrokernelTester()
26046 .mr(3)
26047 .nr(16)
26048 .kr(8)
26049 .sr(1)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)26060 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
26061 TEST_REQUIRES_X86_AVX512SKX;
26062 for (uint32_t n = 17; n < 32; n++) {
26063 for (size_t k = 1; k <= 40; k += 9) {
26064 GemmMicrokernelTester()
26065 .mr(3)
26066 .nr(16)
26067 .kr(8)
26068 .sr(1)
26069 .m(3)
26070 .n(n)
26071 .k(k)
26072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26073 }
26074 }
26075 }
26076
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)26077 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
26078 TEST_REQUIRES_X86_AVX512SKX;
26079 for (uint32_t n = 17; n < 32; n++) {
26080 for (size_t k = 1; k <= 40; k += 9) {
26081 GemmMicrokernelTester()
26082 .mr(3)
26083 .nr(16)
26084 .kr(8)
26085 .sr(1)
26086 .m(3)
26087 .n(n)
26088 .k(k)
26089 .cn_stride(19)
26090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26091 }
26092 }
26093 }
26094
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)26095 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
26096 TEST_REQUIRES_X86_AVX512SKX;
26097 for (uint32_t n = 17; n < 32; n++) {
26098 for (size_t k = 1; k <= 40; k += 9) {
26099 for (uint32_t m = 1; m <= 3; m++) {
26100 GemmMicrokernelTester()
26101 .mr(3)
26102 .nr(16)
26103 .kr(8)
26104 .sr(1)
26105 .m(m)
26106 .n(n)
26107 .k(k)
26108 .iterations(1)
26109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26110 }
26111 }
26112 }
26113 }
26114
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)26115 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
26116 TEST_REQUIRES_X86_AVX512SKX;
26117 for (uint32_t n = 32; n <= 48; n += 16) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(3)
26121 .nr(16)
26122 .kr(8)
26123 .sr(1)
26124 .m(3)
26125 .n(n)
26126 .k(k)
26127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26128 }
26129 }
26130 }
26131
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)26132 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
26133 TEST_REQUIRES_X86_AVX512SKX;
26134 for (uint32_t n = 32; n <= 48; n += 16) {
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 GemmMicrokernelTester()
26137 .mr(3)
26138 .nr(16)
26139 .kr(8)
26140 .sr(1)
26141 .m(3)
26142 .n(n)
26143 .k(k)
26144 .cn_stride(19)
26145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26146 }
26147 }
26148 }
26149
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)26150 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
26151 TEST_REQUIRES_X86_AVX512SKX;
26152 for (uint32_t n = 32; n <= 48; n += 16) {
26153 for (size_t k = 1; k <= 40; k += 9) {
26154 for (uint32_t m = 1; m <= 3; m++) {
26155 GemmMicrokernelTester()
26156 .mr(3)
26157 .nr(16)
26158 .kr(8)
26159 .sr(1)
26160 .m(m)
26161 .n(n)
26162 .k(k)
26163 .iterations(1)
26164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26165 }
26166 }
26167 }
26168 }
26169
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel)26170 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
26171 TEST_REQUIRES_X86_AVX512SKX;
26172 for (size_t k = 1; k <= 40; k += 9) {
26173 GemmMicrokernelTester()
26174 .mr(3)
26175 .nr(16)
26176 .kr(8)
26177 .sr(1)
26178 .m(3)
26179 .n(16)
26180 .k(k)
26181 .ks(3)
26182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26183 }
26184 }
26185
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel_subtile)26186 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
26187 TEST_REQUIRES_X86_AVX512SKX;
26188 for (size_t k = 1; k <= 40; k += 9) {
26189 for (uint32_t n = 1; n <= 16; n++) {
26190 for (uint32_t m = 1; m <= 3; m++) {
26191 GemmMicrokernelTester()
26192 .mr(3)
26193 .nr(16)
26194 .kr(8)
26195 .sr(1)
26196 .m(m)
26197 .n(n)
26198 .k(k)
26199 .ks(3)
26200 .iterations(1)
26201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26202 }
26203 }
26204 }
26205 }
26206
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_small_kernel)26207 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
26208 TEST_REQUIRES_X86_AVX512SKX;
26209 for (uint32_t n = 17; n < 32; n++) {
26210 for (size_t k = 1; k <= 40; k += 9) {
26211 GemmMicrokernelTester()
26212 .mr(3)
26213 .nr(16)
26214 .kr(8)
26215 .sr(1)
26216 .m(3)
26217 .n(n)
26218 .k(k)
26219 .ks(3)
26220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26221 }
26222 }
26223 }
26224
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_small_kernel)26225 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
26226 TEST_REQUIRES_X86_AVX512SKX;
26227 for (uint32_t n = 32; n <= 48; n += 16) {
26228 for (size_t k = 1; k <= 40; k += 9) {
26229 GemmMicrokernelTester()
26230 .mr(3)
26231 .nr(16)
26232 .kr(8)
26233 .sr(1)
26234 .m(3)
26235 .n(n)
26236 .k(k)
26237 .ks(3)
26238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26239 }
26240 }
26241 }
26242
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)26243 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
26244 TEST_REQUIRES_X86_AVX512SKX;
26245 for (size_t k = 1; k <= 40; k += 9) {
26246 for (uint32_t n = 1; n <= 16; n++) {
26247 for (uint32_t m = 1; m <= 3; m++) {
26248 GemmMicrokernelTester()
26249 .mr(3)
26250 .nr(16)
26251 .kr(8)
26252 .sr(1)
26253 .m(m)
26254 .n(n)
26255 .k(k)
26256 .cm_stride(19)
26257 .iterations(1)
26258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26259 }
26260 }
26261 }
26262 }
26263
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,a_offset)26264 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
26265 TEST_REQUIRES_X86_AVX512SKX;
26266 for (size_t k = 1; k <= 40; k += 9) {
26267 GemmMicrokernelTester()
26268 .mr(3)
26269 .nr(16)
26270 .kr(8)
26271 .sr(1)
26272 .m(3)
26273 .n(16)
26274 .k(k)
26275 .ks(3)
26276 .a_offset(127)
26277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26278 }
26279 }
26280
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,zero)26281 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
26282 TEST_REQUIRES_X86_AVX512SKX;
26283 for (size_t k = 1; k <= 40; k += 9) {
26284 for (uint32_t mz = 0; mz < 3; mz++) {
26285 GemmMicrokernelTester()
26286 .mr(3)
26287 .nr(16)
26288 .kr(8)
26289 .sr(1)
26290 .m(3)
26291 .n(16)
26292 .k(k)
26293 .ks(3)
26294 .a_offset(127)
26295 .zero_index(mz)
26296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26297 }
26298 }
26299 }
26300
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)26301 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
26302 TEST_REQUIRES_X86_AVX512SKX;
26303 GemmMicrokernelTester()
26304 .mr(3)
26305 .nr(16)
26306 .kr(8)
26307 .sr(1)
26308 .m(3)
26309 .n(16)
26310 .k(8)
26311 .qmin(128)
26312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26313 }
26314
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)26315 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
26316 TEST_REQUIRES_X86_AVX512SKX;
26317 GemmMicrokernelTester()
26318 .mr(3)
26319 .nr(16)
26320 .kr(8)
26321 .sr(1)
26322 .m(3)
26323 .n(16)
26324 .k(8)
26325 .qmax(128)
26326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26327 }
26328
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)26329 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
26330 TEST_REQUIRES_X86_AVX512SKX;
26331 GemmMicrokernelTester()
26332 .mr(3)
26333 .nr(16)
26334 .kr(8)
26335 .sr(1)
26336 .m(3)
26337 .n(16)
26338 .k(8)
26339 .cm_stride(19)
26340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26341 }
26342
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_a_zero_point)26343 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_a_zero_point) {
26344 TEST_REQUIRES_X86_AVX512SKX;
26345 for (size_t k = 1; k <= 40; k += 9) {
26346 GemmMicrokernelTester()
26347 .mr(3)
26348 .nr(16)
26349 .kr(8)
26350 .sr(1)
26351 .m(3)
26352 .n(16)
26353 .k(k)
26354 .a_zero_point(0)
26355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26356 }
26357 }
26358
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_b_zero_point)26359 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_b_zero_point) {
26360 TEST_REQUIRES_X86_AVX512SKX;
26361 for (size_t k = 1; k <= 40; k += 9) {
26362 GemmMicrokernelTester()
26363 .mr(3)
26364 .nr(16)
26365 .kr(8)
26366 .sr(1)
26367 .m(3)
26368 .n(16)
26369 .k(k)
26370 .b_zero_point(0)
26371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26372 }
26373 }
26374
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_zero_point)26375 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_zero_point) {
26376 TEST_REQUIRES_X86_AVX512SKX;
26377 for (size_t k = 1; k <= 40; k += 9) {
26378 GemmMicrokernelTester()
26379 .mr(3)
26380 .nr(16)
26381 .kr(8)
26382 .sr(1)
26383 .m(3)
26384 .n(16)
26385 .k(k)
26386 .a_zero_point(0)
26387 .b_zero_point(0)
26388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26389 }
26390 }
26391 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26392
26393
26394 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)26395 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26396 GemmMicrokernelTester()
26397 .mr(1)
26398 .nr(4)
26399 .kr(2)
26400 .sr(1)
26401 .m(1)
26402 .n(4)
26403 .k(8)
26404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26405 }
26406
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)26407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
26408 GemmMicrokernelTester()
26409 .mr(1)
26410 .nr(4)
26411 .kr(2)
26412 .sr(1)
26413 .m(1)
26414 .n(4)
26415 .k(8)
26416 .cn_stride(7)
26417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26418 }
26419
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)26420 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
26421 for (uint32_t n = 1; n <= 4; n++) {
26422 for (uint32_t m = 1; m <= 1; m++) {
26423 GemmMicrokernelTester()
26424 .mr(1)
26425 .nr(4)
26426 .kr(2)
26427 .sr(1)
26428 .m(m)
26429 .n(n)
26430 .k(8)
26431 .iterations(1)
26432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26433 }
26434 }
26435 }
26436
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)26437 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26438 for (uint32_t m = 1; m <= 1; m++) {
26439 GemmMicrokernelTester()
26440 .mr(1)
26441 .nr(4)
26442 .kr(2)
26443 .sr(1)
26444 .m(m)
26445 .n(4)
26446 .k(8)
26447 .iterations(1)
26448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26449 }
26450 }
26451
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)26452 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26453 for (uint32_t n = 1; n <= 4; n++) {
26454 GemmMicrokernelTester()
26455 .mr(1)
26456 .nr(4)
26457 .kr(2)
26458 .sr(1)
26459 .m(1)
26460 .n(n)
26461 .k(8)
26462 .iterations(1)
26463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26464 }
26465 }
26466
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)26467 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26468 for (size_t k = 1; k < 8; k++) {
26469 GemmMicrokernelTester()
26470 .mr(1)
26471 .nr(4)
26472 .kr(2)
26473 .sr(1)
26474 .m(1)
26475 .n(4)
26476 .k(k)
26477 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26478 }
26479 }
26480
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)26481 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26482 for (size_t k = 1; k < 8; k++) {
26483 for (uint32_t n = 1; n <= 4; n++) {
26484 for (uint32_t m = 1; m <= 1; m++) {
26485 GemmMicrokernelTester()
26486 .mr(1)
26487 .nr(4)
26488 .kr(2)
26489 .sr(1)
26490 .m(m)
26491 .n(n)
26492 .k(k)
26493 .iterations(1)
26494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26495 }
26496 }
26497 }
26498 }
26499
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)26500 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26501 for (size_t k = 9; k < 16; k++) {
26502 GemmMicrokernelTester()
26503 .mr(1)
26504 .nr(4)
26505 .kr(2)
26506 .sr(1)
26507 .m(1)
26508 .n(4)
26509 .k(k)
26510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26511 }
26512 }
26513
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)26514 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
26515 for (size_t k = 9; k < 16; k++) {
26516 for (uint32_t n = 1; n <= 4; n++) {
26517 for (uint32_t m = 1; m <= 1; m++) {
26518 GemmMicrokernelTester()
26519 .mr(1)
26520 .nr(4)
26521 .kr(2)
26522 .sr(1)
26523 .m(m)
26524 .n(n)
26525 .k(k)
26526 .iterations(1)
26527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26528 }
26529 }
26530 }
26531 }
26532
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)26533 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
26534 for (size_t k = 16; k <= 80; k += 8) {
26535 GemmMicrokernelTester()
26536 .mr(1)
26537 .nr(4)
26538 .kr(2)
26539 .sr(1)
26540 .m(1)
26541 .n(4)
26542 .k(k)
26543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26544 }
26545 }
26546
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)26547 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
26548 for (size_t k = 16; k <= 80; k += 8) {
26549 for (uint32_t n = 1; n <= 4; n++) {
26550 for (uint32_t m = 1; m <= 1; m++) {
26551 GemmMicrokernelTester()
26552 .mr(1)
26553 .nr(4)
26554 .kr(2)
26555 .sr(1)
26556 .m(m)
26557 .n(n)
26558 .k(k)
26559 .iterations(1)
26560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26561 }
26562 }
26563 }
26564 }
26565
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)26566 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
26567 for (uint32_t n = 5; n < 8; n++) {
26568 for (size_t k = 1; k <= 40; k += 9) {
26569 GemmMicrokernelTester()
26570 .mr(1)
26571 .nr(4)
26572 .kr(2)
26573 .sr(1)
26574 .m(1)
26575 .n(n)
26576 .k(k)
26577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26578 }
26579 }
26580 }
26581
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)26582 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
26583 for (uint32_t n = 5; n < 8; n++) {
26584 for (size_t k = 1; k <= 40; k += 9) {
26585 GemmMicrokernelTester()
26586 .mr(1)
26587 .nr(4)
26588 .kr(2)
26589 .sr(1)
26590 .m(1)
26591 .n(n)
26592 .k(k)
26593 .cn_stride(7)
26594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26595 }
26596 }
26597 }
26598
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)26599 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
26600 for (uint32_t n = 5; n < 8; n++) {
26601 for (size_t k = 1; k <= 40; k += 9) {
26602 for (uint32_t m = 1; m <= 1; m++) {
26603 GemmMicrokernelTester()
26604 .mr(1)
26605 .nr(4)
26606 .kr(2)
26607 .sr(1)
26608 .m(m)
26609 .n(n)
26610 .k(k)
26611 .iterations(1)
26612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26613 }
26614 }
26615 }
26616 }
26617
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)26618 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
26619 for (uint32_t n = 8; n <= 12; n += 4) {
26620 for (size_t k = 1; k <= 40; k += 9) {
26621 GemmMicrokernelTester()
26622 .mr(1)
26623 .nr(4)
26624 .kr(2)
26625 .sr(1)
26626 .m(1)
26627 .n(n)
26628 .k(k)
26629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26630 }
26631 }
26632 }
26633
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)26634 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
26635 for (uint32_t n = 8; n <= 12; n += 4) {
26636 for (size_t k = 1; k <= 40; k += 9) {
26637 GemmMicrokernelTester()
26638 .mr(1)
26639 .nr(4)
26640 .kr(2)
26641 .sr(1)
26642 .m(1)
26643 .n(n)
26644 .k(k)
26645 .cn_stride(7)
26646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26647 }
26648 }
26649 }
26650
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)26651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
26652 for (uint32_t n = 8; n <= 12; n += 4) {
26653 for (size_t k = 1; k <= 40; k += 9) {
26654 for (uint32_t m = 1; m <= 1; m++) {
26655 GemmMicrokernelTester()
26656 .mr(1)
26657 .nr(4)
26658 .kr(2)
26659 .sr(1)
26660 .m(m)
26661 .n(n)
26662 .k(k)
26663 .iterations(1)
26664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26665 }
26666 }
26667 }
26668 }
26669
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)26670 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
26671 for (size_t k = 1; k <= 40; k += 9) {
26672 GemmMicrokernelTester()
26673 .mr(1)
26674 .nr(4)
26675 .kr(2)
26676 .sr(1)
26677 .m(1)
26678 .n(4)
26679 .k(k)
26680 .ks(3)
26681 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26682 }
26683 }
26684
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)26685 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
26686 for (size_t k = 1; k <= 40; k += 9) {
26687 for (uint32_t n = 1; n <= 4; n++) {
26688 for (uint32_t m = 1; m <= 1; m++) {
26689 GemmMicrokernelTester()
26690 .mr(1)
26691 .nr(4)
26692 .kr(2)
26693 .sr(1)
26694 .m(m)
26695 .n(n)
26696 .k(k)
26697 .ks(3)
26698 .iterations(1)
26699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26700 }
26701 }
26702 }
26703 }
26704
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)26705 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
26706 for (uint32_t n = 5; n < 8; n++) {
26707 for (size_t k = 1; k <= 40; k += 9) {
26708 GemmMicrokernelTester()
26709 .mr(1)
26710 .nr(4)
26711 .kr(2)
26712 .sr(1)
26713 .m(1)
26714 .n(n)
26715 .k(k)
26716 .ks(3)
26717 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26718 }
26719 }
26720 }
26721
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)26722 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
26723 for (uint32_t n = 8; n <= 12; n += 4) {
26724 for (size_t k = 1; k <= 40; k += 9) {
26725 GemmMicrokernelTester()
26726 .mr(1)
26727 .nr(4)
26728 .kr(2)
26729 .sr(1)
26730 .m(1)
26731 .n(n)
26732 .k(k)
26733 .ks(3)
26734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26735 }
26736 }
26737 }
26738
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)26739 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
26740 for (size_t k = 1; k <= 40; k += 9) {
26741 for (uint32_t n = 1; n <= 4; n++) {
26742 for (uint32_t m = 1; m <= 1; m++) {
26743 GemmMicrokernelTester()
26744 .mr(1)
26745 .nr(4)
26746 .kr(2)
26747 .sr(1)
26748 .m(m)
26749 .n(n)
26750 .k(k)
26751 .cm_stride(7)
26752 .iterations(1)
26753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26754 }
26755 }
26756 }
26757 }
26758
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,a_offset)26759 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
26760 for (size_t k = 1; k <= 40; k += 9) {
26761 GemmMicrokernelTester()
26762 .mr(1)
26763 .nr(4)
26764 .kr(2)
26765 .sr(1)
26766 .m(1)
26767 .n(4)
26768 .k(k)
26769 .ks(3)
26770 .a_offset(43)
26771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26772 }
26773 }
26774
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,zero)26775 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
26776 for (size_t k = 1; k <= 40; k += 9) {
26777 for (uint32_t mz = 0; mz < 1; mz++) {
26778 GemmMicrokernelTester()
26779 .mr(1)
26780 .nr(4)
26781 .kr(2)
26782 .sr(1)
26783 .m(1)
26784 .n(4)
26785 .k(k)
26786 .ks(3)
26787 .a_offset(43)
26788 .zero_index(mz)
26789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26790 }
26791 }
26792 }
26793
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)26794 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
26795 GemmMicrokernelTester()
26796 .mr(1)
26797 .nr(4)
26798 .kr(2)
26799 .sr(1)
26800 .m(1)
26801 .n(4)
26802 .k(8)
26803 .qmin(128)
26804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26805 }
26806
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)26807 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
26808 GemmMicrokernelTester()
26809 .mr(1)
26810 .nr(4)
26811 .kr(2)
26812 .sr(1)
26813 .m(1)
26814 .n(4)
26815 .k(8)
26816 .qmax(128)
26817 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26818 }
26819
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)26820 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
26821 GemmMicrokernelTester()
26822 .mr(1)
26823 .nr(4)
26824 .kr(2)
26825 .sr(1)
26826 .m(1)
26827 .n(4)
26828 .k(8)
26829 .cm_stride(7)
26830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26831 }
26832
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)26833 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
26834 for (size_t k = 1; k <= 40; k += 9) {
26835 GemmMicrokernelTester()
26836 .mr(1)
26837 .nr(4)
26838 .kr(2)
26839 .sr(1)
26840 .m(1)
26841 .n(4)
26842 .k(k)
26843 .a_zero_point(0)
26844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26845 }
26846 }
26847
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)26848 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
26849 for (size_t k = 1; k <= 40; k += 9) {
26850 GemmMicrokernelTester()
26851 .mr(1)
26852 .nr(4)
26853 .kr(2)
26854 .sr(1)
26855 .m(1)
26856 .n(4)
26857 .k(k)
26858 .b_zero_point(0)
26859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26860 }
26861 }
26862
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)26863 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
26864 for (size_t k = 1; k <= 40; k += 9) {
26865 GemmMicrokernelTester()
26866 .mr(1)
26867 .nr(4)
26868 .kr(2)
26869 .sr(1)
26870 .m(1)
26871 .n(4)
26872 .k(k)
26873 .a_zero_point(0)
26874 .b_zero_point(0)
26875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26876 }
26877 }
26878 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26879
26880
26881 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)26882 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26883 GemmMicrokernelTester()
26884 .mr(1)
26885 .nr(4)
26886 .kr(2)
26887 .sr(4)
26888 .m(1)
26889 .n(4)
26890 .k(8)
26891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26892 }
26893
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)26894 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
26895 GemmMicrokernelTester()
26896 .mr(1)
26897 .nr(4)
26898 .kr(2)
26899 .sr(4)
26900 .m(1)
26901 .n(4)
26902 .k(8)
26903 .cn_stride(7)
26904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26905 }
26906
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)26907 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
26908 for (uint32_t n = 1; n <= 4; n++) {
26909 for (uint32_t m = 1; m <= 1; m++) {
26910 GemmMicrokernelTester()
26911 .mr(1)
26912 .nr(4)
26913 .kr(2)
26914 .sr(4)
26915 .m(m)
26916 .n(n)
26917 .k(8)
26918 .iterations(1)
26919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26920 }
26921 }
26922 }
26923
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)26924 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26925 for (uint32_t m = 1; m <= 1; m++) {
26926 GemmMicrokernelTester()
26927 .mr(1)
26928 .nr(4)
26929 .kr(2)
26930 .sr(4)
26931 .m(m)
26932 .n(4)
26933 .k(8)
26934 .iterations(1)
26935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26936 }
26937 }
26938
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)26939 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26940 for (uint32_t n = 1; n <= 4; n++) {
26941 GemmMicrokernelTester()
26942 .mr(1)
26943 .nr(4)
26944 .kr(2)
26945 .sr(4)
26946 .m(1)
26947 .n(n)
26948 .k(8)
26949 .iterations(1)
26950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26951 }
26952 }
26953
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)26954 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26955 for (size_t k = 1; k < 8; k++) {
26956 GemmMicrokernelTester()
26957 .mr(1)
26958 .nr(4)
26959 .kr(2)
26960 .sr(4)
26961 .m(1)
26962 .n(4)
26963 .k(k)
26964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26965 }
26966 }
26967
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)26968 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26969 for (size_t k = 1; k < 8; k++) {
26970 for (uint32_t n = 1; n <= 4; n++) {
26971 for (uint32_t m = 1; m <= 1; m++) {
26972 GemmMicrokernelTester()
26973 .mr(1)
26974 .nr(4)
26975 .kr(2)
26976 .sr(4)
26977 .m(m)
26978 .n(n)
26979 .k(k)
26980 .iterations(1)
26981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26982 }
26983 }
26984 }
26985 }
26986
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)26987 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26988 for (size_t k = 9; k < 16; k++) {
26989 GemmMicrokernelTester()
26990 .mr(1)
26991 .nr(4)
26992 .kr(2)
26993 .sr(4)
26994 .m(1)
26995 .n(4)
26996 .k(k)
26997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26998 }
26999 }
27000
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)27001 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
27002 for (size_t k = 9; k < 16; k++) {
27003 for (uint32_t n = 1; n <= 4; n++) {
27004 for (uint32_t m = 1; m <= 1; m++) {
27005 GemmMicrokernelTester()
27006 .mr(1)
27007 .nr(4)
27008 .kr(2)
27009 .sr(4)
27010 .m(m)
27011 .n(n)
27012 .k(k)
27013 .iterations(1)
27014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27015 }
27016 }
27017 }
27018 }
27019
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)27020 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
27021 for (size_t k = 16; k <= 80; k += 8) {
27022 GemmMicrokernelTester()
27023 .mr(1)
27024 .nr(4)
27025 .kr(2)
27026 .sr(4)
27027 .m(1)
27028 .n(4)
27029 .k(k)
27030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27031 }
27032 }
27033
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)27034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
27035 for (size_t k = 16; k <= 80; k += 8) {
27036 for (uint32_t n = 1; n <= 4; n++) {
27037 for (uint32_t m = 1; m <= 1; m++) {
27038 GemmMicrokernelTester()
27039 .mr(1)
27040 .nr(4)
27041 .kr(2)
27042 .sr(4)
27043 .m(m)
27044 .n(n)
27045 .k(k)
27046 .iterations(1)
27047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27048 }
27049 }
27050 }
27051 }
27052
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)27053 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
27054 for (uint32_t n = 5; n < 8; n++) {
27055 for (size_t k = 1; k <= 40; k += 9) {
27056 GemmMicrokernelTester()
27057 .mr(1)
27058 .nr(4)
27059 .kr(2)
27060 .sr(4)
27061 .m(1)
27062 .n(n)
27063 .k(k)
27064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27065 }
27066 }
27067 }
27068
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)27069 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
27070 for (uint32_t n = 5; n < 8; n++) {
27071 for (size_t k = 1; k <= 40; k += 9) {
27072 GemmMicrokernelTester()
27073 .mr(1)
27074 .nr(4)
27075 .kr(2)
27076 .sr(4)
27077 .m(1)
27078 .n(n)
27079 .k(k)
27080 .cn_stride(7)
27081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27082 }
27083 }
27084 }
27085
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)27086 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
27087 for (uint32_t n = 5; n < 8; n++) {
27088 for (size_t k = 1; k <= 40; k += 9) {
27089 for (uint32_t m = 1; m <= 1; m++) {
27090 GemmMicrokernelTester()
27091 .mr(1)
27092 .nr(4)
27093 .kr(2)
27094 .sr(4)
27095 .m(m)
27096 .n(n)
27097 .k(k)
27098 .iterations(1)
27099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27100 }
27101 }
27102 }
27103 }
27104
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)27105 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
27106 for (uint32_t n = 8; n <= 12; n += 4) {
27107 for (size_t k = 1; k <= 40; k += 9) {
27108 GemmMicrokernelTester()
27109 .mr(1)
27110 .nr(4)
27111 .kr(2)
27112 .sr(4)
27113 .m(1)
27114 .n(n)
27115 .k(k)
27116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27117 }
27118 }
27119 }
27120
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)27121 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
27122 for (uint32_t n = 8; n <= 12; n += 4) {
27123 for (size_t k = 1; k <= 40; k += 9) {
27124 GemmMicrokernelTester()
27125 .mr(1)
27126 .nr(4)
27127 .kr(2)
27128 .sr(4)
27129 .m(1)
27130 .n(n)
27131 .k(k)
27132 .cn_stride(7)
27133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27134 }
27135 }
27136 }
27137
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)27138 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
27139 for (uint32_t n = 8; n <= 12; n += 4) {
27140 for (size_t k = 1; k <= 40; k += 9) {
27141 for (uint32_t m = 1; m <= 1; m++) {
27142 GemmMicrokernelTester()
27143 .mr(1)
27144 .nr(4)
27145 .kr(2)
27146 .sr(4)
27147 .m(m)
27148 .n(n)
27149 .k(k)
27150 .iterations(1)
27151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27152 }
27153 }
27154 }
27155 }
27156
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)27157 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
27158 for (size_t k = 1; k <= 40; k += 9) {
27159 GemmMicrokernelTester()
27160 .mr(1)
27161 .nr(4)
27162 .kr(2)
27163 .sr(4)
27164 .m(1)
27165 .n(4)
27166 .k(k)
27167 .ks(3)
27168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27169 }
27170 }
27171
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)27172 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
27173 for (size_t k = 1; k <= 40; k += 9) {
27174 for (uint32_t n = 1; n <= 4; n++) {
27175 for (uint32_t m = 1; m <= 1; m++) {
27176 GemmMicrokernelTester()
27177 .mr(1)
27178 .nr(4)
27179 .kr(2)
27180 .sr(4)
27181 .m(m)
27182 .n(n)
27183 .k(k)
27184 .ks(3)
27185 .iterations(1)
27186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27187 }
27188 }
27189 }
27190 }
27191
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)27192 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
27193 for (uint32_t n = 5; n < 8; n++) {
27194 for (size_t k = 1; k <= 40; k += 9) {
27195 GemmMicrokernelTester()
27196 .mr(1)
27197 .nr(4)
27198 .kr(2)
27199 .sr(4)
27200 .m(1)
27201 .n(n)
27202 .k(k)
27203 .ks(3)
27204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27205 }
27206 }
27207 }
27208
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)27209 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
27210 for (uint32_t n = 8; n <= 12; n += 4) {
27211 for (size_t k = 1; k <= 40; k += 9) {
27212 GemmMicrokernelTester()
27213 .mr(1)
27214 .nr(4)
27215 .kr(2)
27216 .sr(4)
27217 .m(1)
27218 .n(n)
27219 .k(k)
27220 .ks(3)
27221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27222 }
27223 }
27224 }
27225
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)27226 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
27227 for (size_t k = 1; k <= 40; k += 9) {
27228 for (uint32_t n = 1; n <= 4; n++) {
27229 for (uint32_t m = 1; m <= 1; m++) {
27230 GemmMicrokernelTester()
27231 .mr(1)
27232 .nr(4)
27233 .kr(2)
27234 .sr(4)
27235 .m(m)
27236 .n(n)
27237 .k(k)
27238 .cm_stride(7)
27239 .iterations(1)
27240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27241 }
27242 }
27243 }
27244 }
27245
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)27246 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
27247 for (size_t k = 1; k <= 40; k += 9) {
27248 GemmMicrokernelTester()
27249 .mr(1)
27250 .nr(4)
27251 .kr(2)
27252 .sr(4)
27253 .m(1)
27254 .n(4)
27255 .k(k)
27256 .ks(3)
27257 .a_offset(43)
27258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27259 }
27260 }
27261
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,zero)27262 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
27263 for (size_t k = 1; k <= 40; k += 9) {
27264 for (uint32_t mz = 0; mz < 1; mz++) {
27265 GemmMicrokernelTester()
27266 .mr(1)
27267 .nr(4)
27268 .kr(2)
27269 .sr(4)
27270 .m(1)
27271 .n(4)
27272 .k(k)
27273 .ks(3)
27274 .a_offset(43)
27275 .zero_index(mz)
27276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27277 }
27278 }
27279 }
27280
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)27281 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
27282 GemmMicrokernelTester()
27283 .mr(1)
27284 .nr(4)
27285 .kr(2)
27286 .sr(4)
27287 .m(1)
27288 .n(4)
27289 .k(8)
27290 .qmin(128)
27291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27292 }
27293
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)27294 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
27295 GemmMicrokernelTester()
27296 .mr(1)
27297 .nr(4)
27298 .kr(2)
27299 .sr(4)
27300 .m(1)
27301 .n(4)
27302 .k(8)
27303 .qmax(128)
27304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27305 }
27306
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)27307 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
27308 GemmMicrokernelTester()
27309 .mr(1)
27310 .nr(4)
27311 .kr(2)
27312 .sr(4)
27313 .m(1)
27314 .n(4)
27315 .k(8)
27316 .cm_stride(7)
27317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27318 }
27319
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)27320 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
27321 for (size_t k = 1; k <= 40; k += 9) {
27322 GemmMicrokernelTester()
27323 .mr(1)
27324 .nr(4)
27325 .kr(2)
27326 .sr(4)
27327 .m(1)
27328 .n(4)
27329 .k(k)
27330 .a_zero_point(0)
27331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27332 }
27333 }
27334
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)27335 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
27336 for (size_t k = 1; k <= 40; k += 9) {
27337 GemmMicrokernelTester()
27338 .mr(1)
27339 .nr(4)
27340 .kr(2)
27341 .sr(4)
27342 .m(1)
27343 .n(4)
27344 .k(k)
27345 .b_zero_point(0)
27346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27347 }
27348 }
27349
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)27350 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
27351 for (size_t k = 1; k <= 40; k += 9) {
27352 GemmMicrokernelTester()
27353 .mr(1)
27354 .nr(4)
27355 .kr(2)
27356 .sr(4)
27357 .m(1)
27358 .n(4)
27359 .k(k)
27360 .a_zero_point(0)
27361 .b_zero_point(0)
27362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27363 }
27364 }
27365 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27366
27367
27368 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)27369 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
27370 GemmMicrokernelTester()
27371 .mr(1)
27372 .nr(4)
27373 .kr(8)
27374 .sr(1)
27375 .m(1)
27376 .n(4)
27377 .k(8)
27378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27379 }
27380
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)27381 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
27382 GemmMicrokernelTester()
27383 .mr(1)
27384 .nr(4)
27385 .kr(8)
27386 .sr(1)
27387 .m(1)
27388 .n(4)
27389 .k(8)
27390 .cn_stride(7)
27391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27392 }
27393
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)27394 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
27395 for (uint32_t n = 1; n <= 4; n++) {
27396 for (uint32_t m = 1; m <= 1; m++) {
27397 GemmMicrokernelTester()
27398 .mr(1)
27399 .nr(4)
27400 .kr(8)
27401 .sr(1)
27402 .m(m)
27403 .n(n)
27404 .k(8)
27405 .iterations(1)
27406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27407 }
27408 }
27409 }
27410
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)27411 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
27412 for (uint32_t m = 1; m <= 1; m++) {
27413 GemmMicrokernelTester()
27414 .mr(1)
27415 .nr(4)
27416 .kr(8)
27417 .sr(1)
27418 .m(m)
27419 .n(4)
27420 .k(8)
27421 .iterations(1)
27422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27423 }
27424 }
27425
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)27426 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
27427 for (uint32_t n = 1; n <= 4; n++) {
27428 GemmMicrokernelTester()
27429 .mr(1)
27430 .nr(4)
27431 .kr(8)
27432 .sr(1)
27433 .m(1)
27434 .n(n)
27435 .k(8)
27436 .iterations(1)
27437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27438 }
27439 }
27440
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)27441 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
27442 for (size_t k = 1; k < 8; k++) {
27443 GemmMicrokernelTester()
27444 .mr(1)
27445 .nr(4)
27446 .kr(8)
27447 .sr(1)
27448 .m(1)
27449 .n(4)
27450 .k(k)
27451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27452 }
27453 }
27454
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)27455 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
27456 for (size_t k = 1; k < 8; k++) {
27457 for (uint32_t n = 1; n <= 4; n++) {
27458 for (uint32_t m = 1; m <= 1; m++) {
27459 GemmMicrokernelTester()
27460 .mr(1)
27461 .nr(4)
27462 .kr(8)
27463 .sr(1)
27464 .m(m)
27465 .n(n)
27466 .k(k)
27467 .iterations(1)
27468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27469 }
27470 }
27471 }
27472 }
27473
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)27474 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
27475 for (size_t k = 9; k < 16; k++) {
27476 GemmMicrokernelTester()
27477 .mr(1)
27478 .nr(4)
27479 .kr(8)
27480 .sr(1)
27481 .m(1)
27482 .n(4)
27483 .k(k)
27484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27485 }
27486 }
27487
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)27488 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
27489 for (size_t k = 9; k < 16; k++) {
27490 for (uint32_t n = 1; n <= 4; n++) {
27491 for (uint32_t m = 1; m <= 1; m++) {
27492 GemmMicrokernelTester()
27493 .mr(1)
27494 .nr(4)
27495 .kr(8)
27496 .sr(1)
27497 .m(m)
27498 .n(n)
27499 .k(k)
27500 .iterations(1)
27501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27502 }
27503 }
27504 }
27505 }
27506
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)27507 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
27508 for (size_t k = 16; k <= 80; k += 8) {
27509 GemmMicrokernelTester()
27510 .mr(1)
27511 .nr(4)
27512 .kr(8)
27513 .sr(1)
27514 .m(1)
27515 .n(4)
27516 .k(k)
27517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27518 }
27519 }
27520
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)27521 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
27522 for (size_t k = 16; k <= 80; k += 8) {
27523 for (uint32_t n = 1; n <= 4; n++) {
27524 for (uint32_t m = 1; m <= 1; m++) {
27525 GemmMicrokernelTester()
27526 .mr(1)
27527 .nr(4)
27528 .kr(8)
27529 .sr(1)
27530 .m(m)
27531 .n(n)
27532 .k(k)
27533 .iterations(1)
27534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27535 }
27536 }
27537 }
27538 }
27539
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)27540 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
27541 for (uint32_t n = 5; n < 8; n++) {
27542 for (size_t k = 1; k <= 40; k += 9) {
27543 GemmMicrokernelTester()
27544 .mr(1)
27545 .nr(4)
27546 .kr(8)
27547 .sr(1)
27548 .m(1)
27549 .n(n)
27550 .k(k)
27551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27552 }
27553 }
27554 }
27555
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)27556 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
27557 for (uint32_t n = 5; n < 8; n++) {
27558 for (size_t k = 1; k <= 40; k += 9) {
27559 GemmMicrokernelTester()
27560 .mr(1)
27561 .nr(4)
27562 .kr(8)
27563 .sr(1)
27564 .m(1)
27565 .n(n)
27566 .k(k)
27567 .cn_stride(7)
27568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27569 }
27570 }
27571 }
27572
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)27573 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
27574 for (uint32_t n = 5; n < 8; n++) {
27575 for (size_t k = 1; k <= 40; k += 9) {
27576 for (uint32_t m = 1; m <= 1; m++) {
27577 GemmMicrokernelTester()
27578 .mr(1)
27579 .nr(4)
27580 .kr(8)
27581 .sr(1)
27582 .m(m)
27583 .n(n)
27584 .k(k)
27585 .iterations(1)
27586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27587 }
27588 }
27589 }
27590 }
27591
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)27592 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
27593 for (uint32_t n = 8; n <= 12; n += 4) {
27594 for (size_t k = 1; k <= 40; k += 9) {
27595 GemmMicrokernelTester()
27596 .mr(1)
27597 .nr(4)
27598 .kr(8)
27599 .sr(1)
27600 .m(1)
27601 .n(n)
27602 .k(k)
27603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27604 }
27605 }
27606 }
27607
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)27608 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
27609 for (uint32_t n = 8; n <= 12; n += 4) {
27610 for (size_t k = 1; k <= 40; k += 9) {
27611 GemmMicrokernelTester()
27612 .mr(1)
27613 .nr(4)
27614 .kr(8)
27615 .sr(1)
27616 .m(1)
27617 .n(n)
27618 .k(k)
27619 .cn_stride(7)
27620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27621 }
27622 }
27623 }
27624
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)27625 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
27626 for (uint32_t n = 8; n <= 12; n += 4) {
27627 for (size_t k = 1; k <= 40; k += 9) {
27628 for (uint32_t m = 1; m <= 1; m++) {
27629 GemmMicrokernelTester()
27630 .mr(1)
27631 .nr(4)
27632 .kr(8)
27633 .sr(1)
27634 .m(m)
27635 .n(n)
27636 .k(k)
27637 .iterations(1)
27638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27639 }
27640 }
27641 }
27642 }
27643
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)27644 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
27645 for (size_t k = 1; k <= 40; k += 9) {
27646 GemmMicrokernelTester()
27647 .mr(1)
27648 .nr(4)
27649 .kr(8)
27650 .sr(1)
27651 .m(1)
27652 .n(4)
27653 .k(k)
27654 .ks(3)
27655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27656 }
27657 }
27658
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)27659 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
27660 for (size_t k = 1; k <= 40; k += 9) {
27661 for (uint32_t n = 1; n <= 4; n++) {
27662 for (uint32_t m = 1; m <= 1; m++) {
27663 GemmMicrokernelTester()
27664 .mr(1)
27665 .nr(4)
27666 .kr(8)
27667 .sr(1)
27668 .m(m)
27669 .n(n)
27670 .k(k)
27671 .ks(3)
27672 .iterations(1)
27673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27674 }
27675 }
27676 }
27677 }
27678
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)27679 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
27680 for (uint32_t n = 5; n < 8; n++) {
27681 for (size_t k = 1; k <= 40; k += 9) {
27682 GemmMicrokernelTester()
27683 .mr(1)
27684 .nr(4)
27685 .kr(8)
27686 .sr(1)
27687 .m(1)
27688 .n(n)
27689 .k(k)
27690 .ks(3)
27691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27692 }
27693 }
27694 }
27695
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)27696 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
27697 for (uint32_t n = 8; n <= 12; n += 4) {
27698 for (size_t k = 1; k <= 40; k += 9) {
27699 GemmMicrokernelTester()
27700 .mr(1)
27701 .nr(4)
27702 .kr(8)
27703 .sr(1)
27704 .m(1)
27705 .n(n)
27706 .k(k)
27707 .ks(3)
27708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27709 }
27710 }
27711 }
27712
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)27713 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
27714 for (size_t k = 1; k <= 40; k += 9) {
27715 for (uint32_t n = 1; n <= 4; n++) {
27716 for (uint32_t m = 1; m <= 1; m++) {
27717 GemmMicrokernelTester()
27718 .mr(1)
27719 .nr(4)
27720 .kr(8)
27721 .sr(1)
27722 .m(m)
27723 .n(n)
27724 .k(k)
27725 .cm_stride(7)
27726 .iterations(1)
27727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27728 }
27729 }
27730 }
27731 }
27732
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,a_offset)27733 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
27734 for (size_t k = 1; k <= 40; k += 9) {
27735 GemmMicrokernelTester()
27736 .mr(1)
27737 .nr(4)
27738 .kr(8)
27739 .sr(1)
27740 .m(1)
27741 .n(4)
27742 .k(k)
27743 .ks(3)
27744 .a_offset(43)
27745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27746 }
27747 }
27748
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,zero)27749 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, zero) {
27750 for (size_t k = 1; k <= 40; k += 9) {
27751 for (uint32_t mz = 0; mz < 1; mz++) {
27752 GemmMicrokernelTester()
27753 .mr(1)
27754 .nr(4)
27755 .kr(8)
27756 .sr(1)
27757 .m(1)
27758 .n(4)
27759 .k(k)
27760 .ks(3)
27761 .a_offset(43)
27762 .zero_index(mz)
27763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27764 }
27765 }
27766 }
27767
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)27768 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
27769 GemmMicrokernelTester()
27770 .mr(1)
27771 .nr(4)
27772 .kr(8)
27773 .sr(1)
27774 .m(1)
27775 .n(4)
27776 .k(8)
27777 .qmin(128)
27778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27779 }
27780
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)27781 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
27782 GemmMicrokernelTester()
27783 .mr(1)
27784 .nr(4)
27785 .kr(8)
27786 .sr(1)
27787 .m(1)
27788 .n(4)
27789 .k(8)
27790 .qmax(128)
27791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27792 }
27793
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)27794 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
27795 GemmMicrokernelTester()
27796 .mr(1)
27797 .nr(4)
27798 .kr(8)
27799 .sr(1)
27800 .m(1)
27801 .n(4)
27802 .k(8)
27803 .cm_stride(7)
27804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27805 }
27806
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)27807 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
27808 for (size_t k = 1; k <= 40; k += 9) {
27809 GemmMicrokernelTester()
27810 .mr(1)
27811 .nr(4)
27812 .kr(8)
27813 .sr(1)
27814 .m(1)
27815 .n(4)
27816 .k(k)
27817 .a_zero_point(0)
27818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27819 }
27820 }
27821
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)27822 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
27823 for (size_t k = 1; k <= 40; k += 9) {
27824 GemmMicrokernelTester()
27825 .mr(1)
27826 .nr(4)
27827 .kr(8)
27828 .sr(1)
27829 .m(1)
27830 .n(4)
27831 .k(k)
27832 .b_zero_point(0)
27833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27834 }
27835 }
27836
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)27837 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
27838 for (size_t k = 1; k <= 40; k += 9) {
27839 GemmMicrokernelTester()
27840 .mr(1)
27841 .nr(4)
27842 .kr(8)
27843 .sr(1)
27844 .m(1)
27845 .n(4)
27846 .k(k)
27847 .a_zero_point(0)
27848 .b_zero_point(0)
27849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27850 }
27851 }
27852 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27853
27854
27855 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)27856 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
27857 GemmMicrokernelTester()
27858 .mr(1)
27859 .nr(4)
27860 .kr(8)
27861 .sr(1)
27862 .m(1)
27863 .n(4)
27864 .k(8)
27865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27866 }
27867
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)27868 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
27869 GemmMicrokernelTester()
27870 .mr(1)
27871 .nr(4)
27872 .kr(8)
27873 .sr(1)
27874 .m(1)
27875 .n(4)
27876 .k(8)
27877 .cn_stride(7)
27878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27879 }
27880
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)27881 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
27882 for (uint32_t n = 1; n <= 4; n++) {
27883 for (uint32_t m = 1; m <= 1; m++) {
27884 GemmMicrokernelTester()
27885 .mr(1)
27886 .nr(4)
27887 .kr(8)
27888 .sr(1)
27889 .m(m)
27890 .n(n)
27891 .k(8)
27892 .iterations(1)
27893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27894 }
27895 }
27896 }
27897
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)27898 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
27899 for (uint32_t m = 1; m <= 1; m++) {
27900 GemmMicrokernelTester()
27901 .mr(1)
27902 .nr(4)
27903 .kr(8)
27904 .sr(1)
27905 .m(m)
27906 .n(4)
27907 .k(8)
27908 .iterations(1)
27909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27910 }
27911 }
27912
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)27913 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
27914 for (uint32_t n = 1; n <= 4; n++) {
27915 GemmMicrokernelTester()
27916 .mr(1)
27917 .nr(4)
27918 .kr(8)
27919 .sr(1)
27920 .m(1)
27921 .n(n)
27922 .k(8)
27923 .iterations(1)
27924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27925 }
27926 }
27927
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)27928 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
27929 for (size_t k = 1; k < 8; k++) {
27930 GemmMicrokernelTester()
27931 .mr(1)
27932 .nr(4)
27933 .kr(8)
27934 .sr(1)
27935 .m(1)
27936 .n(4)
27937 .k(k)
27938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27939 }
27940 }
27941
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)27942 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
27943 for (size_t k = 1; k < 8; k++) {
27944 for (uint32_t n = 1; n <= 4; n++) {
27945 for (uint32_t m = 1; m <= 1; m++) {
27946 GemmMicrokernelTester()
27947 .mr(1)
27948 .nr(4)
27949 .kr(8)
27950 .sr(1)
27951 .m(m)
27952 .n(n)
27953 .k(k)
27954 .iterations(1)
27955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27956 }
27957 }
27958 }
27959 }
27960
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)27961 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
27962 for (size_t k = 9; k < 16; k++) {
27963 GemmMicrokernelTester()
27964 .mr(1)
27965 .nr(4)
27966 .kr(8)
27967 .sr(1)
27968 .m(1)
27969 .n(4)
27970 .k(k)
27971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27972 }
27973 }
27974
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)27975 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
27976 for (size_t k = 9; k < 16; k++) {
27977 for (uint32_t n = 1; n <= 4; n++) {
27978 for (uint32_t m = 1; m <= 1; m++) {
27979 GemmMicrokernelTester()
27980 .mr(1)
27981 .nr(4)
27982 .kr(8)
27983 .sr(1)
27984 .m(m)
27985 .n(n)
27986 .k(k)
27987 .iterations(1)
27988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27989 }
27990 }
27991 }
27992 }
27993
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)27994 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
27995 for (size_t k = 16; k <= 80; k += 8) {
27996 GemmMicrokernelTester()
27997 .mr(1)
27998 .nr(4)
27999 .kr(8)
28000 .sr(1)
28001 .m(1)
28002 .n(4)
28003 .k(k)
28004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28005 }
28006 }
28007
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)28008 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
28009 for (size_t k = 16; k <= 80; k += 8) {
28010 for (uint32_t n = 1; n <= 4; n++) {
28011 for (uint32_t m = 1; m <= 1; m++) {
28012 GemmMicrokernelTester()
28013 .mr(1)
28014 .nr(4)
28015 .kr(8)
28016 .sr(1)
28017 .m(m)
28018 .n(n)
28019 .k(k)
28020 .iterations(1)
28021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28022 }
28023 }
28024 }
28025 }
28026
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)28027 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
28028 for (uint32_t n = 5; n < 8; n++) {
28029 for (size_t k = 1; k <= 40; k += 9) {
28030 GemmMicrokernelTester()
28031 .mr(1)
28032 .nr(4)
28033 .kr(8)
28034 .sr(1)
28035 .m(1)
28036 .n(n)
28037 .k(k)
28038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28039 }
28040 }
28041 }
28042
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)28043 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
28044 for (uint32_t n = 5; n < 8; n++) {
28045 for (size_t k = 1; k <= 40; k += 9) {
28046 GemmMicrokernelTester()
28047 .mr(1)
28048 .nr(4)
28049 .kr(8)
28050 .sr(1)
28051 .m(1)
28052 .n(n)
28053 .k(k)
28054 .cn_stride(7)
28055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28056 }
28057 }
28058 }
28059
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)28060 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
28061 for (uint32_t n = 5; n < 8; n++) {
28062 for (size_t k = 1; k <= 40; k += 9) {
28063 for (uint32_t m = 1; m <= 1; m++) {
28064 GemmMicrokernelTester()
28065 .mr(1)
28066 .nr(4)
28067 .kr(8)
28068 .sr(1)
28069 .m(m)
28070 .n(n)
28071 .k(k)
28072 .iterations(1)
28073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28074 }
28075 }
28076 }
28077 }
28078
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)28079 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
28080 for (uint32_t n = 8; n <= 12; n += 4) {
28081 for (size_t k = 1; k <= 40; k += 9) {
28082 GemmMicrokernelTester()
28083 .mr(1)
28084 .nr(4)
28085 .kr(8)
28086 .sr(1)
28087 .m(1)
28088 .n(n)
28089 .k(k)
28090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28091 }
28092 }
28093 }
28094
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)28095 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
28096 for (uint32_t n = 8; n <= 12; n += 4) {
28097 for (size_t k = 1; k <= 40; k += 9) {
28098 GemmMicrokernelTester()
28099 .mr(1)
28100 .nr(4)
28101 .kr(8)
28102 .sr(1)
28103 .m(1)
28104 .n(n)
28105 .k(k)
28106 .cn_stride(7)
28107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28108 }
28109 }
28110 }
28111
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)28112 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
28113 for (uint32_t n = 8; n <= 12; n += 4) {
28114 for (size_t k = 1; k <= 40; k += 9) {
28115 for (uint32_t m = 1; m <= 1; m++) {
28116 GemmMicrokernelTester()
28117 .mr(1)
28118 .nr(4)
28119 .kr(8)
28120 .sr(1)
28121 .m(m)
28122 .n(n)
28123 .k(k)
28124 .iterations(1)
28125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28126 }
28127 }
28128 }
28129 }
28130
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)28131 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
28132 for (size_t k = 1; k <= 40; k += 9) {
28133 GemmMicrokernelTester()
28134 .mr(1)
28135 .nr(4)
28136 .kr(8)
28137 .sr(1)
28138 .m(1)
28139 .n(4)
28140 .k(k)
28141 .ks(3)
28142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28143 }
28144 }
28145
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)28146 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
28147 for (size_t k = 1; k <= 40; k += 9) {
28148 for (uint32_t n = 1; n <= 4; n++) {
28149 for (uint32_t m = 1; m <= 1; m++) {
28150 GemmMicrokernelTester()
28151 .mr(1)
28152 .nr(4)
28153 .kr(8)
28154 .sr(1)
28155 .m(m)
28156 .n(n)
28157 .k(k)
28158 .ks(3)
28159 .iterations(1)
28160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28161 }
28162 }
28163 }
28164 }
28165
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)28166 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
28167 for (uint32_t n = 5; n < 8; n++) {
28168 for (size_t k = 1; k <= 40; k += 9) {
28169 GemmMicrokernelTester()
28170 .mr(1)
28171 .nr(4)
28172 .kr(8)
28173 .sr(1)
28174 .m(1)
28175 .n(n)
28176 .k(k)
28177 .ks(3)
28178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28179 }
28180 }
28181 }
28182
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)28183 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
28184 for (uint32_t n = 8; n <= 12; n += 4) {
28185 for (size_t k = 1; k <= 40; k += 9) {
28186 GemmMicrokernelTester()
28187 .mr(1)
28188 .nr(4)
28189 .kr(8)
28190 .sr(1)
28191 .m(1)
28192 .n(n)
28193 .k(k)
28194 .ks(3)
28195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28196 }
28197 }
28198 }
28199
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)28200 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
28201 for (size_t k = 1; k <= 40; k += 9) {
28202 for (uint32_t n = 1; n <= 4; n++) {
28203 for (uint32_t m = 1; m <= 1; m++) {
28204 GemmMicrokernelTester()
28205 .mr(1)
28206 .nr(4)
28207 .kr(8)
28208 .sr(1)
28209 .m(m)
28210 .n(n)
28211 .k(k)
28212 .cm_stride(7)
28213 .iterations(1)
28214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28215 }
28216 }
28217 }
28218 }
28219
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,a_offset)28220 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
28221 for (size_t k = 1; k <= 40; k += 9) {
28222 GemmMicrokernelTester()
28223 .mr(1)
28224 .nr(4)
28225 .kr(8)
28226 .sr(1)
28227 .m(1)
28228 .n(4)
28229 .k(k)
28230 .ks(3)
28231 .a_offset(43)
28232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28233 }
28234 }
28235
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,zero)28236 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
28237 for (size_t k = 1; k <= 40; k += 9) {
28238 for (uint32_t mz = 0; mz < 1; mz++) {
28239 GemmMicrokernelTester()
28240 .mr(1)
28241 .nr(4)
28242 .kr(8)
28243 .sr(1)
28244 .m(1)
28245 .n(4)
28246 .k(k)
28247 .ks(3)
28248 .a_offset(43)
28249 .zero_index(mz)
28250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28251 }
28252 }
28253 }
28254
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)28255 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
28256 GemmMicrokernelTester()
28257 .mr(1)
28258 .nr(4)
28259 .kr(8)
28260 .sr(1)
28261 .m(1)
28262 .n(4)
28263 .k(8)
28264 .qmin(128)
28265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28266 }
28267
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)28268 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
28269 GemmMicrokernelTester()
28270 .mr(1)
28271 .nr(4)
28272 .kr(8)
28273 .sr(1)
28274 .m(1)
28275 .n(4)
28276 .k(8)
28277 .qmax(128)
28278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28279 }
28280
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)28281 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
28282 GemmMicrokernelTester()
28283 .mr(1)
28284 .nr(4)
28285 .kr(8)
28286 .sr(1)
28287 .m(1)
28288 .n(4)
28289 .k(8)
28290 .cm_stride(7)
28291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28292 }
28293
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)28294 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
28295 for (size_t k = 1; k <= 40; k += 9) {
28296 GemmMicrokernelTester()
28297 .mr(1)
28298 .nr(4)
28299 .kr(8)
28300 .sr(1)
28301 .m(1)
28302 .n(4)
28303 .k(k)
28304 .a_zero_point(0)
28305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28306 }
28307 }
28308
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)28309 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
28310 for (size_t k = 1; k <= 40; k += 9) {
28311 GemmMicrokernelTester()
28312 .mr(1)
28313 .nr(4)
28314 .kr(8)
28315 .sr(1)
28316 .m(1)
28317 .n(4)
28318 .k(k)
28319 .b_zero_point(0)
28320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28321 }
28322 }
28323
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)28324 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
28325 for (size_t k = 1; k <= 40; k += 9) {
28326 GemmMicrokernelTester()
28327 .mr(1)
28328 .nr(4)
28329 .kr(8)
28330 .sr(1)
28331 .m(1)
28332 .n(4)
28333 .k(k)
28334 .a_zero_point(0)
28335 .b_zero_point(0)
28336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28337 }
28338 }
28339 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28340
28341
28342 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)28343 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
28344 GemmMicrokernelTester()
28345 .mr(2)
28346 .nr(4)
28347 .kr(2)
28348 .sr(1)
28349 .m(2)
28350 .n(4)
28351 .k(8)
28352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28353 }
28354
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)28355 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
28356 GemmMicrokernelTester()
28357 .mr(2)
28358 .nr(4)
28359 .kr(2)
28360 .sr(1)
28361 .m(2)
28362 .n(4)
28363 .k(8)
28364 .cn_stride(7)
28365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28366 }
28367
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)28368 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
28369 for (uint32_t n = 1; n <= 4; n++) {
28370 for (uint32_t m = 1; m <= 2; m++) {
28371 GemmMicrokernelTester()
28372 .mr(2)
28373 .nr(4)
28374 .kr(2)
28375 .sr(1)
28376 .m(m)
28377 .n(n)
28378 .k(8)
28379 .iterations(1)
28380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28381 }
28382 }
28383 }
28384
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)28385 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
28386 for (uint32_t m = 1; m <= 2; m++) {
28387 GemmMicrokernelTester()
28388 .mr(2)
28389 .nr(4)
28390 .kr(2)
28391 .sr(1)
28392 .m(m)
28393 .n(4)
28394 .k(8)
28395 .iterations(1)
28396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28397 }
28398 }
28399
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)28400 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
28401 for (uint32_t n = 1; n <= 4; n++) {
28402 GemmMicrokernelTester()
28403 .mr(2)
28404 .nr(4)
28405 .kr(2)
28406 .sr(1)
28407 .m(2)
28408 .n(n)
28409 .k(8)
28410 .iterations(1)
28411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28412 }
28413 }
28414
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)28415 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
28416 for (size_t k = 1; k < 8; k++) {
28417 GemmMicrokernelTester()
28418 .mr(2)
28419 .nr(4)
28420 .kr(2)
28421 .sr(1)
28422 .m(2)
28423 .n(4)
28424 .k(k)
28425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28426 }
28427 }
28428
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)28429 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
28430 for (size_t k = 1; k < 8; k++) {
28431 for (uint32_t n = 1; n <= 4; n++) {
28432 for (uint32_t m = 1; m <= 2; m++) {
28433 GemmMicrokernelTester()
28434 .mr(2)
28435 .nr(4)
28436 .kr(2)
28437 .sr(1)
28438 .m(m)
28439 .n(n)
28440 .k(k)
28441 .iterations(1)
28442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28443 }
28444 }
28445 }
28446 }
28447
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)28448 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
28449 for (size_t k = 9; k < 16; k++) {
28450 GemmMicrokernelTester()
28451 .mr(2)
28452 .nr(4)
28453 .kr(2)
28454 .sr(1)
28455 .m(2)
28456 .n(4)
28457 .k(k)
28458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28459 }
28460 }
28461
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)28462 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
28463 for (size_t k = 9; k < 16; k++) {
28464 for (uint32_t n = 1; n <= 4; n++) {
28465 for (uint32_t m = 1; m <= 2; m++) {
28466 GemmMicrokernelTester()
28467 .mr(2)
28468 .nr(4)
28469 .kr(2)
28470 .sr(1)
28471 .m(m)
28472 .n(n)
28473 .k(k)
28474 .iterations(1)
28475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28476 }
28477 }
28478 }
28479 }
28480
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)28481 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
28482 for (size_t k = 16; k <= 80; k += 8) {
28483 GemmMicrokernelTester()
28484 .mr(2)
28485 .nr(4)
28486 .kr(2)
28487 .sr(1)
28488 .m(2)
28489 .n(4)
28490 .k(k)
28491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28492 }
28493 }
28494
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)28495 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
28496 for (size_t k = 16; k <= 80; k += 8) {
28497 for (uint32_t n = 1; n <= 4; n++) {
28498 for (uint32_t m = 1; m <= 2; m++) {
28499 GemmMicrokernelTester()
28500 .mr(2)
28501 .nr(4)
28502 .kr(2)
28503 .sr(1)
28504 .m(m)
28505 .n(n)
28506 .k(k)
28507 .iterations(1)
28508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28509 }
28510 }
28511 }
28512 }
28513
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)28514 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
28515 for (uint32_t n = 5; n < 8; n++) {
28516 for (size_t k = 1; k <= 40; k += 9) {
28517 GemmMicrokernelTester()
28518 .mr(2)
28519 .nr(4)
28520 .kr(2)
28521 .sr(1)
28522 .m(2)
28523 .n(n)
28524 .k(k)
28525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28526 }
28527 }
28528 }
28529
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)28530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
28531 for (uint32_t n = 5; n < 8; n++) {
28532 for (size_t k = 1; k <= 40; k += 9) {
28533 GemmMicrokernelTester()
28534 .mr(2)
28535 .nr(4)
28536 .kr(2)
28537 .sr(1)
28538 .m(2)
28539 .n(n)
28540 .k(k)
28541 .cn_stride(7)
28542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28543 }
28544 }
28545 }
28546
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)28547 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
28548 for (uint32_t n = 5; n < 8; n++) {
28549 for (size_t k = 1; k <= 40; k += 9) {
28550 for (uint32_t m = 1; m <= 2; m++) {
28551 GemmMicrokernelTester()
28552 .mr(2)
28553 .nr(4)
28554 .kr(2)
28555 .sr(1)
28556 .m(m)
28557 .n(n)
28558 .k(k)
28559 .iterations(1)
28560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28561 }
28562 }
28563 }
28564 }
28565
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)28566 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
28567 for (uint32_t n = 8; n <= 12; n += 4) {
28568 for (size_t k = 1; k <= 40; k += 9) {
28569 GemmMicrokernelTester()
28570 .mr(2)
28571 .nr(4)
28572 .kr(2)
28573 .sr(1)
28574 .m(2)
28575 .n(n)
28576 .k(k)
28577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28578 }
28579 }
28580 }
28581
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)28582 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
28583 for (uint32_t n = 8; n <= 12; n += 4) {
28584 for (size_t k = 1; k <= 40; k += 9) {
28585 GemmMicrokernelTester()
28586 .mr(2)
28587 .nr(4)
28588 .kr(2)
28589 .sr(1)
28590 .m(2)
28591 .n(n)
28592 .k(k)
28593 .cn_stride(7)
28594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28595 }
28596 }
28597 }
28598
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)28599 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
28600 for (uint32_t n = 8; n <= 12; n += 4) {
28601 for (size_t k = 1; k <= 40; k += 9) {
28602 for (uint32_t m = 1; m <= 2; m++) {
28603 GemmMicrokernelTester()
28604 .mr(2)
28605 .nr(4)
28606 .kr(2)
28607 .sr(1)
28608 .m(m)
28609 .n(n)
28610 .k(k)
28611 .iterations(1)
28612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28613 }
28614 }
28615 }
28616 }
28617
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)28618 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
28619 for (size_t k = 1; k <= 40; k += 9) {
28620 GemmMicrokernelTester()
28621 .mr(2)
28622 .nr(4)
28623 .kr(2)
28624 .sr(1)
28625 .m(2)
28626 .n(4)
28627 .k(k)
28628 .ks(3)
28629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28630 }
28631 }
28632
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)28633 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
28634 for (size_t k = 1; k <= 40; k += 9) {
28635 for (uint32_t n = 1; n <= 4; n++) {
28636 for (uint32_t m = 1; m <= 2; m++) {
28637 GemmMicrokernelTester()
28638 .mr(2)
28639 .nr(4)
28640 .kr(2)
28641 .sr(1)
28642 .m(m)
28643 .n(n)
28644 .k(k)
28645 .ks(3)
28646 .iterations(1)
28647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28648 }
28649 }
28650 }
28651 }
28652
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)28653 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
28654 for (uint32_t n = 5; n < 8; n++) {
28655 for (size_t k = 1; k <= 40; k += 9) {
28656 GemmMicrokernelTester()
28657 .mr(2)
28658 .nr(4)
28659 .kr(2)
28660 .sr(1)
28661 .m(2)
28662 .n(n)
28663 .k(k)
28664 .ks(3)
28665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28666 }
28667 }
28668 }
28669
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)28670 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
28671 for (uint32_t n = 8; n <= 12; n += 4) {
28672 for (size_t k = 1; k <= 40; k += 9) {
28673 GemmMicrokernelTester()
28674 .mr(2)
28675 .nr(4)
28676 .kr(2)
28677 .sr(1)
28678 .m(2)
28679 .n(n)
28680 .k(k)
28681 .ks(3)
28682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28683 }
28684 }
28685 }
28686
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)28687 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
28688 for (size_t k = 1; k <= 40; k += 9) {
28689 for (uint32_t n = 1; n <= 4; n++) {
28690 for (uint32_t m = 1; m <= 2; m++) {
28691 GemmMicrokernelTester()
28692 .mr(2)
28693 .nr(4)
28694 .kr(2)
28695 .sr(1)
28696 .m(m)
28697 .n(n)
28698 .k(k)
28699 .cm_stride(7)
28700 .iterations(1)
28701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28702 }
28703 }
28704 }
28705 }
28706
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,a_offset)28707 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
28708 for (size_t k = 1; k <= 40; k += 9) {
28709 GemmMicrokernelTester()
28710 .mr(2)
28711 .nr(4)
28712 .kr(2)
28713 .sr(1)
28714 .m(2)
28715 .n(4)
28716 .k(k)
28717 .ks(3)
28718 .a_offset(83)
28719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28720 }
28721 }
28722
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,zero)28723 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, zero) {
28724 for (size_t k = 1; k <= 40; k += 9) {
28725 for (uint32_t mz = 0; mz < 2; mz++) {
28726 GemmMicrokernelTester()
28727 .mr(2)
28728 .nr(4)
28729 .kr(2)
28730 .sr(1)
28731 .m(2)
28732 .n(4)
28733 .k(k)
28734 .ks(3)
28735 .a_offset(83)
28736 .zero_index(mz)
28737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28738 }
28739 }
28740 }
28741
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)28742 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
28743 GemmMicrokernelTester()
28744 .mr(2)
28745 .nr(4)
28746 .kr(2)
28747 .sr(1)
28748 .m(2)
28749 .n(4)
28750 .k(8)
28751 .qmin(128)
28752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28753 }
28754
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)28755 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
28756 GemmMicrokernelTester()
28757 .mr(2)
28758 .nr(4)
28759 .kr(2)
28760 .sr(1)
28761 .m(2)
28762 .n(4)
28763 .k(8)
28764 .qmax(128)
28765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28766 }
28767
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)28768 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
28769 GemmMicrokernelTester()
28770 .mr(2)
28771 .nr(4)
28772 .kr(2)
28773 .sr(1)
28774 .m(2)
28775 .n(4)
28776 .k(8)
28777 .cm_stride(7)
28778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28779 }
28780
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)28781 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
28782 for (size_t k = 1; k <= 40; k += 9) {
28783 GemmMicrokernelTester()
28784 .mr(2)
28785 .nr(4)
28786 .kr(2)
28787 .sr(1)
28788 .m(2)
28789 .n(4)
28790 .k(k)
28791 .a_zero_point(0)
28792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28793 }
28794 }
28795
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)28796 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
28797 for (size_t k = 1; k <= 40; k += 9) {
28798 GemmMicrokernelTester()
28799 .mr(2)
28800 .nr(4)
28801 .kr(2)
28802 .sr(1)
28803 .m(2)
28804 .n(4)
28805 .k(k)
28806 .b_zero_point(0)
28807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28808 }
28809 }
28810
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)28811 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
28812 for (size_t k = 1; k <= 40; k += 9) {
28813 GemmMicrokernelTester()
28814 .mr(2)
28815 .nr(4)
28816 .kr(2)
28817 .sr(1)
28818 .m(2)
28819 .n(4)
28820 .k(k)
28821 .a_zero_point(0)
28822 .b_zero_point(0)
28823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28824 }
28825 }
28826 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28827
28828
28829 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)28830 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
28831 GemmMicrokernelTester()
28832 .mr(2)
28833 .nr(4)
28834 .kr(2)
28835 .sr(1)
28836 .m(2)
28837 .n(4)
28838 .k(8)
28839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28840 }
28841
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)28842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
28843 GemmMicrokernelTester()
28844 .mr(2)
28845 .nr(4)
28846 .kr(2)
28847 .sr(1)
28848 .m(2)
28849 .n(4)
28850 .k(8)
28851 .cn_stride(7)
28852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28853 }
28854
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)28855 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
28856 for (uint32_t n = 1; n <= 4; n++) {
28857 for (uint32_t m = 1; m <= 2; m++) {
28858 GemmMicrokernelTester()
28859 .mr(2)
28860 .nr(4)
28861 .kr(2)
28862 .sr(1)
28863 .m(m)
28864 .n(n)
28865 .k(8)
28866 .iterations(1)
28867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28868 }
28869 }
28870 }
28871
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)28872 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
28873 for (uint32_t m = 1; m <= 2; m++) {
28874 GemmMicrokernelTester()
28875 .mr(2)
28876 .nr(4)
28877 .kr(2)
28878 .sr(1)
28879 .m(m)
28880 .n(4)
28881 .k(8)
28882 .iterations(1)
28883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28884 }
28885 }
28886
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)28887 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
28888 for (uint32_t n = 1; n <= 4; n++) {
28889 GemmMicrokernelTester()
28890 .mr(2)
28891 .nr(4)
28892 .kr(2)
28893 .sr(1)
28894 .m(2)
28895 .n(n)
28896 .k(8)
28897 .iterations(1)
28898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28899 }
28900 }
28901
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)28902 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
28903 for (size_t k = 1; k < 8; k++) {
28904 GemmMicrokernelTester()
28905 .mr(2)
28906 .nr(4)
28907 .kr(2)
28908 .sr(1)
28909 .m(2)
28910 .n(4)
28911 .k(k)
28912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28913 }
28914 }
28915
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)28916 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
28917 for (size_t k = 1; k < 8; k++) {
28918 for (uint32_t n = 1; n <= 4; n++) {
28919 for (uint32_t m = 1; m <= 2; m++) {
28920 GemmMicrokernelTester()
28921 .mr(2)
28922 .nr(4)
28923 .kr(2)
28924 .sr(1)
28925 .m(m)
28926 .n(n)
28927 .k(k)
28928 .iterations(1)
28929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28930 }
28931 }
28932 }
28933 }
28934
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)28935 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
28936 for (size_t k = 9; k < 16; k++) {
28937 GemmMicrokernelTester()
28938 .mr(2)
28939 .nr(4)
28940 .kr(2)
28941 .sr(1)
28942 .m(2)
28943 .n(4)
28944 .k(k)
28945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28946 }
28947 }
28948
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)28949 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
28950 for (size_t k = 9; k < 16; k++) {
28951 for (uint32_t n = 1; n <= 4; n++) {
28952 for (uint32_t m = 1; m <= 2; m++) {
28953 GemmMicrokernelTester()
28954 .mr(2)
28955 .nr(4)
28956 .kr(2)
28957 .sr(1)
28958 .m(m)
28959 .n(n)
28960 .k(k)
28961 .iterations(1)
28962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28963 }
28964 }
28965 }
28966 }
28967
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)28968 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
28969 for (size_t k = 16; k <= 80; k += 8) {
28970 GemmMicrokernelTester()
28971 .mr(2)
28972 .nr(4)
28973 .kr(2)
28974 .sr(1)
28975 .m(2)
28976 .n(4)
28977 .k(k)
28978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28979 }
28980 }
28981
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)28982 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
28983 for (size_t k = 16; k <= 80; k += 8) {
28984 for (uint32_t n = 1; n <= 4; n++) {
28985 for (uint32_t m = 1; m <= 2; m++) {
28986 GemmMicrokernelTester()
28987 .mr(2)
28988 .nr(4)
28989 .kr(2)
28990 .sr(1)
28991 .m(m)
28992 .n(n)
28993 .k(k)
28994 .iterations(1)
28995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28996 }
28997 }
28998 }
28999 }
29000
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29001 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29002 for (uint32_t n = 5; n < 8; n++) {
29003 for (size_t k = 1; k <= 40; k += 9) {
29004 GemmMicrokernelTester()
29005 .mr(2)
29006 .nr(4)
29007 .kr(2)
29008 .sr(1)
29009 .m(2)
29010 .n(n)
29011 .k(k)
29012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29013 }
29014 }
29015 }
29016
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29017 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29018 for (uint32_t n = 5; n < 8; n++) {
29019 for (size_t k = 1; k <= 40; k += 9) {
29020 GemmMicrokernelTester()
29021 .mr(2)
29022 .nr(4)
29023 .kr(2)
29024 .sr(1)
29025 .m(2)
29026 .n(n)
29027 .k(k)
29028 .cn_stride(7)
29029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29030 }
29031 }
29032 }
29033
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29034 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29035 for (uint32_t n = 5; n < 8; n++) {
29036 for (size_t k = 1; k <= 40; k += 9) {
29037 for (uint32_t m = 1; m <= 2; m++) {
29038 GemmMicrokernelTester()
29039 .mr(2)
29040 .nr(4)
29041 .kr(2)
29042 .sr(1)
29043 .m(m)
29044 .n(n)
29045 .k(k)
29046 .iterations(1)
29047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29048 }
29049 }
29050 }
29051 }
29052
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)29053 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
29054 for (uint32_t n = 8; n <= 12; n += 4) {
29055 for (size_t k = 1; k <= 40; k += 9) {
29056 GemmMicrokernelTester()
29057 .mr(2)
29058 .nr(4)
29059 .kr(2)
29060 .sr(1)
29061 .m(2)
29062 .n(n)
29063 .k(k)
29064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29065 }
29066 }
29067 }
29068
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29069 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29070 for (uint32_t n = 8; n <= 12; n += 4) {
29071 for (size_t k = 1; k <= 40; k += 9) {
29072 GemmMicrokernelTester()
29073 .mr(2)
29074 .nr(4)
29075 .kr(2)
29076 .sr(1)
29077 .m(2)
29078 .n(n)
29079 .k(k)
29080 .cn_stride(7)
29081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29082 }
29083 }
29084 }
29085
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29086 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29087 for (uint32_t n = 8; n <= 12; n += 4) {
29088 for (size_t k = 1; k <= 40; k += 9) {
29089 for (uint32_t m = 1; m <= 2; m++) {
29090 GemmMicrokernelTester()
29091 .mr(2)
29092 .nr(4)
29093 .kr(2)
29094 .sr(1)
29095 .m(m)
29096 .n(n)
29097 .k(k)
29098 .iterations(1)
29099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29100 }
29101 }
29102 }
29103 }
29104
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)29105 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
29106 for (size_t k = 1; k <= 40; k += 9) {
29107 GemmMicrokernelTester()
29108 .mr(2)
29109 .nr(4)
29110 .kr(2)
29111 .sr(1)
29112 .m(2)
29113 .n(4)
29114 .k(k)
29115 .ks(3)
29116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29117 }
29118 }
29119
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)29120 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
29121 for (size_t k = 1; k <= 40; k += 9) {
29122 for (uint32_t n = 1; n <= 4; n++) {
29123 for (uint32_t m = 1; m <= 2; m++) {
29124 GemmMicrokernelTester()
29125 .mr(2)
29126 .nr(4)
29127 .kr(2)
29128 .sr(1)
29129 .m(m)
29130 .n(n)
29131 .k(k)
29132 .ks(3)
29133 .iterations(1)
29134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29135 }
29136 }
29137 }
29138 }
29139
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)29140 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
29141 for (uint32_t n = 5; n < 8; n++) {
29142 for (size_t k = 1; k <= 40; k += 9) {
29143 GemmMicrokernelTester()
29144 .mr(2)
29145 .nr(4)
29146 .kr(2)
29147 .sr(1)
29148 .m(2)
29149 .n(n)
29150 .k(k)
29151 .ks(3)
29152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29153 }
29154 }
29155 }
29156
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)29157 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
29158 for (uint32_t n = 8; n <= 12; n += 4) {
29159 for (size_t k = 1; k <= 40; k += 9) {
29160 GemmMicrokernelTester()
29161 .mr(2)
29162 .nr(4)
29163 .kr(2)
29164 .sr(1)
29165 .m(2)
29166 .n(n)
29167 .k(k)
29168 .ks(3)
29169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29170 }
29171 }
29172 }
29173
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)29174 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
29175 for (size_t k = 1; k <= 40; k += 9) {
29176 for (uint32_t n = 1; n <= 4; n++) {
29177 for (uint32_t m = 1; m <= 2; m++) {
29178 GemmMicrokernelTester()
29179 .mr(2)
29180 .nr(4)
29181 .kr(2)
29182 .sr(1)
29183 .m(m)
29184 .n(n)
29185 .k(k)
29186 .cm_stride(7)
29187 .iterations(1)
29188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29189 }
29190 }
29191 }
29192 }
29193
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)29194 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
29195 for (size_t k = 1; k <= 40; k += 9) {
29196 GemmMicrokernelTester()
29197 .mr(2)
29198 .nr(4)
29199 .kr(2)
29200 .sr(1)
29201 .m(2)
29202 .n(4)
29203 .k(k)
29204 .ks(3)
29205 .a_offset(83)
29206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29207 }
29208 }
29209
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)29210 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
29211 for (size_t k = 1; k <= 40; k += 9) {
29212 for (uint32_t mz = 0; mz < 2; mz++) {
29213 GemmMicrokernelTester()
29214 .mr(2)
29215 .nr(4)
29216 .kr(2)
29217 .sr(1)
29218 .m(2)
29219 .n(4)
29220 .k(k)
29221 .ks(3)
29222 .a_offset(83)
29223 .zero_index(mz)
29224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29225 }
29226 }
29227 }
29228
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)29229 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
29230 GemmMicrokernelTester()
29231 .mr(2)
29232 .nr(4)
29233 .kr(2)
29234 .sr(1)
29235 .m(2)
29236 .n(4)
29237 .k(8)
29238 .qmin(128)
29239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29240 }
29241
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)29242 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
29243 GemmMicrokernelTester()
29244 .mr(2)
29245 .nr(4)
29246 .kr(2)
29247 .sr(1)
29248 .m(2)
29249 .n(4)
29250 .k(8)
29251 .qmax(128)
29252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29253 }
29254
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)29255 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
29256 GemmMicrokernelTester()
29257 .mr(2)
29258 .nr(4)
29259 .kr(2)
29260 .sr(1)
29261 .m(2)
29262 .n(4)
29263 .k(8)
29264 .cm_stride(7)
29265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29266 }
29267
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)29268 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
29269 for (size_t k = 1; k <= 40; k += 9) {
29270 GemmMicrokernelTester()
29271 .mr(2)
29272 .nr(4)
29273 .kr(2)
29274 .sr(1)
29275 .m(2)
29276 .n(4)
29277 .k(k)
29278 .a_zero_point(0)
29279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29280 }
29281 }
29282
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)29283 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
29284 for (size_t k = 1; k <= 40; k += 9) {
29285 GemmMicrokernelTester()
29286 .mr(2)
29287 .nr(4)
29288 .kr(2)
29289 .sr(1)
29290 .m(2)
29291 .n(4)
29292 .k(k)
29293 .b_zero_point(0)
29294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29295 }
29296 }
29297
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)29298 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
29299 for (size_t k = 1; k <= 40; k += 9) {
29300 GemmMicrokernelTester()
29301 .mr(2)
29302 .nr(4)
29303 .kr(2)
29304 .sr(1)
29305 .m(2)
29306 .n(4)
29307 .k(k)
29308 .a_zero_point(0)
29309 .b_zero_point(0)
29310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29311 }
29312 }
29313 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29314
29315
29316 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)29317 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29318 GemmMicrokernelTester()
29319 .mr(2)
29320 .nr(4)
29321 .kr(8)
29322 .sr(1)
29323 .m(2)
29324 .n(4)
29325 .k(8)
29326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29327 }
29328
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)29329 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
29330 GemmMicrokernelTester()
29331 .mr(2)
29332 .nr(4)
29333 .kr(8)
29334 .sr(1)
29335 .m(2)
29336 .n(4)
29337 .k(8)
29338 .cn_stride(7)
29339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29340 }
29341
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29342 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29343 for (uint32_t n = 1; n <= 4; n++) {
29344 for (uint32_t m = 1; m <= 2; m++) {
29345 GemmMicrokernelTester()
29346 .mr(2)
29347 .nr(4)
29348 .kr(8)
29349 .sr(1)
29350 .m(m)
29351 .n(n)
29352 .k(8)
29353 .iterations(1)
29354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29355 }
29356 }
29357 }
29358
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29360 for (uint32_t m = 1; m <= 2; m++) {
29361 GemmMicrokernelTester()
29362 .mr(2)
29363 .nr(4)
29364 .kr(8)
29365 .sr(1)
29366 .m(m)
29367 .n(4)
29368 .k(8)
29369 .iterations(1)
29370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29371 }
29372 }
29373
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29374 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29375 for (uint32_t n = 1; n <= 4; n++) {
29376 GemmMicrokernelTester()
29377 .mr(2)
29378 .nr(4)
29379 .kr(8)
29380 .sr(1)
29381 .m(2)
29382 .n(n)
29383 .k(8)
29384 .iterations(1)
29385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29386 }
29387 }
29388
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)29389 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29390 for (size_t k = 1; k < 8; k++) {
29391 GemmMicrokernelTester()
29392 .mr(2)
29393 .nr(4)
29394 .kr(8)
29395 .sr(1)
29396 .m(2)
29397 .n(4)
29398 .k(k)
29399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29400 }
29401 }
29402
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29403 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29404 for (size_t k = 1; k < 8; k++) {
29405 for (uint32_t n = 1; n <= 4; n++) {
29406 for (uint32_t m = 1; m <= 2; m++) {
29407 GemmMicrokernelTester()
29408 .mr(2)
29409 .nr(4)
29410 .kr(8)
29411 .sr(1)
29412 .m(m)
29413 .n(n)
29414 .k(k)
29415 .iterations(1)
29416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29417 }
29418 }
29419 }
29420 }
29421
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)29422 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29423 for (size_t k = 9; k < 16; k++) {
29424 GemmMicrokernelTester()
29425 .mr(2)
29426 .nr(4)
29427 .kr(8)
29428 .sr(1)
29429 .m(2)
29430 .n(4)
29431 .k(k)
29432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29433 }
29434 }
29435
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29436 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29437 for (size_t k = 9; k < 16; k++) {
29438 for (uint32_t n = 1; n <= 4; n++) {
29439 for (uint32_t m = 1; m <= 2; m++) {
29440 GemmMicrokernelTester()
29441 .mr(2)
29442 .nr(4)
29443 .kr(8)
29444 .sr(1)
29445 .m(m)
29446 .n(n)
29447 .k(k)
29448 .iterations(1)
29449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29450 }
29451 }
29452 }
29453 }
29454
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)29455 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
29456 for (size_t k = 16; k <= 80; k += 8) {
29457 GemmMicrokernelTester()
29458 .mr(2)
29459 .nr(4)
29460 .kr(8)
29461 .sr(1)
29462 .m(2)
29463 .n(4)
29464 .k(k)
29465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29466 }
29467 }
29468
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29469 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29470 for (size_t k = 16; k <= 80; k += 8) {
29471 for (uint32_t n = 1; n <= 4; n++) {
29472 for (uint32_t m = 1; m <= 2; m++) {
29473 GemmMicrokernelTester()
29474 .mr(2)
29475 .nr(4)
29476 .kr(8)
29477 .sr(1)
29478 .m(m)
29479 .n(n)
29480 .k(k)
29481 .iterations(1)
29482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29483 }
29484 }
29485 }
29486 }
29487
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)29488 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29489 for (uint32_t n = 5; n < 8; n++) {
29490 for (size_t k = 1; k <= 40; k += 9) {
29491 GemmMicrokernelTester()
29492 .mr(2)
29493 .nr(4)
29494 .kr(8)
29495 .sr(1)
29496 .m(2)
29497 .n(n)
29498 .k(k)
29499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29500 }
29501 }
29502 }
29503
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29504 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29505 for (uint32_t n = 5; n < 8; n++) {
29506 for (size_t k = 1; k <= 40; k += 9) {
29507 GemmMicrokernelTester()
29508 .mr(2)
29509 .nr(4)
29510 .kr(8)
29511 .sr(1)
29512 .m(2)
29513 .n(n)
29514 .k(k)
29515 .cn_stride(7)
29516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29517 }
29518 }
29519 }
29520
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29521 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29522 for (uint32_t n = 5; n < 8; n++) {
29523 for (size_t k = 1; k <= 40; k += 9) {
29524 for (uint32_t m = 1; m <= 2; m++) {
29525 GemmMicrokernelTester()
29526 .mr(2)
29527 .nr(4)
29528 .kr(8)
29529 .sr(1)
29530 .m(m)
29531 .n(n)
29532 .k(k)
29533 .iterations(1)
29534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29535 }
29536 }
29537 }
29538 }
29539
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)29540 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
29541 for (uint32_t n = 8; n <= 12; n += 4) {
29542 for (size_t k = 1; k <= 40; k += 9) {
29543 GemmMicrokernelTester()
29544 .mr(2)
29545 .nr(4)
29546 .kr(8)
29547 .sr(1)
29548 .m(2)
29549 .n(n)
29550 .k(k)
29551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29552 }
29553 }
29554 }
29555
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29556 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29557 for (uint32_t n = 8; n <= 12; n += 4) {
29558 for (size_t k = 1; k <= 40; k += 9) {
29559 GemmMicrokernelTester()
29560 .mr(2)
29561 .nr(4)
29562 .kr(8)
29563 .sr(1)
29564 .m(2)
29565 .n(n)
29566 .k(k)
29567 .cn_stride(7)
29568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29569 }
29570 }
29571 }
29572
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29573 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29574 for (uint32_t n = 8; n <= 12; n += 4) {
29575 for (size_t k = 1; k <= 40; k += 9) {
29576 for (uint32_t m = 1; m <= 2; m++) {
29577 GemmMicrokernelTester()
29578 .mr(2)
29579 .nr(4)
29580 .kr(8)
29581 .sr(1)
29582 .m(m)
29583 .n(n)
29584 .k(k)
29585 .iterations(1)
29586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29587 }
29588 }
29589 }
29590 }
29591
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)29592 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
29593 for (size_t k = 1; k <= 40; k += 9) {
29594 GemmMicrokernelTester()
29595 .mr(2)
29596 .nr(4)
29597 .kr(8)
29598 .sr(1)
29599 .m(2)
29600 .n(4)
29601 .k(k)
29602 .ks(3)
29603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29604 }
29605 }
29606
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)29607 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
29608 for (size_t k = 1; k <= 40; k += 9) {
29609 for (uint32_t n = 1; n <= 4; n++) {
29610 for (uint32_t m = 1; m <= 2; m++) {
29611 GemmMicrokernelTester()
29612 .mr(2)
29613 .nr(4)
29614 .kr(8)
29615 .sr(1)
29616 .m(m)
29617 .n(n)
29618 .k(k)
29619 .ks(3)
29620 .iterations(1)
29621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29622 }
29623 }
29624 }
29625 }
29626
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)29627 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
29628 for (uint32_t n = 5; n < 8; n++) {
29629 for (size_t k = 1; k <= 40; k += 9) {
29630 GemmMicrokernelTester()
29631 .mr(2)
29632 .nr(4)
29633 .kr(8)
29634 .sr(1)
29635 .m(2)
29636 .n(n)
29637 .k(k)
29638 .ks(3)
29639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29640 }
29641 }
29642 }
29643
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)29644 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
29645 for (uint32_t n = 8; n <= 12; n += 4) {
29646 for (size_t k = 1; k <= 40; k += 9) {
29647 GemmMicrokernelTester()
29648 .mr(2)
29649 .nr(4)
29650 .kr(8)
29651 .sr(1)
29652 .m(2)
29653 .n(n)
29654 .k(k)
29655 .ks(3)
29656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29657 }
29658 }
29659 }
29660
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)29661 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
29662 for (size_t k = 1; k <= 40; k += 9) {
29663 for (uint32_t n = 1; n <= 4; n++) {
29664 for (uint32_t m = 1; m <= 2; m++) {
29665 GemmMicrokernelTester()
29666 .mr(2)
29667 .nr(4)
29668 .kr(8)
29669 .sr(1)
29670 .m(m)
29671 .n(n)
29672 .k(k)
29673 .cm_stride(7)
29674 .iterations(1)
29675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29676 }
29677 }
29678 }
29679 }
29680
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,a_offset)29681 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
29682 for (size_t k = 1; k <= 40; k += 9) {
29683 GemmMicrokernelTester()
29684 .mr(2)
29685 .nr(4)
29686 .kr(8)
29687 .sr(1)
29688 .m(2)
29689 .n(4)
29690 .k(k)
29691 .ks(3)
29692 .a_offset(83)
29693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29694 }
29695 }
29696
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,zero)29697 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
29698 for (size_t k = 1; k <= 40; k += 9) {
29699 for (uint32_t mz = 0; mz < 2; mz++) {
29700 GemmMicrokernelTester()
29701 .mr(2)
29702 .nr(4)
29703 .kr(8)
29704 .sr(1)
29705 .m(2)
29706 .n(4)
29707 .k(k)
29708 .ks(3)
29709 .a_offset(83)
29710 .zero_index(mz)
29711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29712 }
29713 }
29714 }
29715
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)29716 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
29717 GemmMicrokernelTester()
29718 .mr(2)
29719 .nr(4)
29720 .kr(8)
29721 .sr(1)
29722 .m(2)
29723 .n(4)
29724 .k(8)
29725 .qmin(128)
29726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29727 }
29728
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)29729 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
29730 GemmMicrokernelTester()
29731 .mr(2)
29732 .nr(4)
29733 .kr(8)
29734 .sr(1)
29735 .m(2)
29736 .n(4)
29737 .k(8)
29738 .qmax(128)
29739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29740 }
29741
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)29742 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
29743 GemmMicrokernelTester()
29744 .mr(2)
29745 .nr(4)
29746 .kr(8)
29747 .sr(1)
29748 .m(2)
29749 .n(4)
29750 .k(8)
29751 .cm_stride(7)
29752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29753 }
29754
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)29755 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
29756 for (size_t k = 1; k <= 40; k += 9) {
29757 GemmMicrokernelTester()
29758 .mr(2)
29759 .nr(4)
29760 .kr(8)
29761 .sr(1)
29762 .m(2)
29763 .n(4)
29764 .k(k)
29765 .a_zero_point(0)
29766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29767 }
29768 }
29769
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)29770 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
29771 for (size_t k = 1; k <= 40; k += 9) {
29772 GemmMicrokernelTester()
29773 .mr(2)
29774 .nr(4)
29775 .kr(8)
29776 .sr(1)
29777 .m(2)
29778 .n(4)
29779 .k(k)
29780 .b_zero_point(0)
29781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29782 }
29783 }
29784
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)29785 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
29786 for (size_t k = 1; k <= 40; k += 9) {
29787 GemmMicrokernelTester()
29788 .mr(2)
29789 .nr(4)
29790 .kr(8)
29791 .sr(1)
29792 .m(2)
29793 .n(4)
29794 .k(k)
29795 .a_zero_point(0)
29796 .b_zero_point(0)
29797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29798 }
29799 }
29800 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29801
29802
29803 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)29804 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29805 GemmMicrokernelTester()
29806 .mr(3)
29807 .nr(4)
29808 .kr(2)
29809 .sr(1)
29810 .m(3)
29811 .n(4)
29812 .k(8)
29813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29814 }
29815
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)29816 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
29817 GemmMicrokernelTester()
29818 .mr(3)
29819 .nr(4)
29820 .kr(2)
29821 .sr(1)
29822 .m(3)
29823 .n(4)
29824 .k(8)
29825 .cn_stride(7)
29826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29827 }
29828
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29829 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29830 for (uint32_t n = 1; n <= 4; n++) {
29831 for (uint32_t m = 1; m <= 3; m++) {
29832 GemmMicrokernelTester()
29833 .mr(3)
29834 .nr(4)
29835 .kr(2)
29836 .sr(1)
29837 .m(m)
29838 .n(n)
29839 .k(8)
29840 .iterations(1)
29841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29842 }
29843 }
29844 }
29845
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29846 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29847 for (uint32_t m = 1; m <= 3; m++) {
29848 GemmMicrokernelTester()
29849 .mr(3)
29850 .nr(4)
29851 .kr(2)
29852 .sr(1)
29853 .m(m)
29854 .n(4)
29855 .k(8)
29856 .iterations(1)
29857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29858 }
29859 }
29860
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29861 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29862 for (uint32_t n = 1; n <= 4; n++) {
29863 GemmMicrokernelTester()
29864 .mr(3)
29865 .nr(4)
29866 .kr(2)
29867 .sr(1)
29868 .m(3)
29869 .n(n)
29870 .k(8)
29871 .iterations(1)
29872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29873 }
29874 }
29875
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)29876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29877 for (size_t k = 1; k < 8; k++) {
29878 GemmMicrokernelTester()
29879 .mr(3)
29880 .nr(4)
29881 .kr(2)
29882 .sr(1)
29883 .m(3)
29884 .n(4)
29885 .k(k)
29886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29887 }
29888 }
29889
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29890 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29891 for (size_t k = 1; k < 8; k++) {
29892 for (uint32_t n = 1; n <= 4; n++) {
29893 for (uint32_t m = 1; m <= 3; m++) {
29894 GemmMicrokernelTester()
29895 .mr(3)
29896 .nr(4)
29897 .kr(2)
29898 .sr(1)
29899 .m(m)
29900 .n(n)
29901 .k(k)
29902 .iterations(1)
29903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29904 }
29905 }
29906 }
29907 }
29908
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)29909 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29910 for (size_t k = 9; k < 16; k++) {
29911 GemmMicrokernelTester()
29912 .mr(3)
29913 .nr(4)
29914 .kr(2)
29915 .sr(1)
29916 .m(3)
29917 .n(4)
29918 .k(k)
29919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29920 }
29921 }
29922
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29923 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29924 for (size_t k = 9; k < 16; k++) {
29925 for (uint32_t n = 1; n <= 4; n++) {
29926 for (uint32_t m = 1; m <= 3; m++) {
29927 GemmMicrokernelTester()
29928 .mr(3)
29929 .nr(4)
29930 .kr(2)
29931 .sr(1)
29932 .m(m)
29933 .n(n)
29934 .k(k)
29935 .iterations(1)
29936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29937 }
29938 }
29939 }
29940 }
29941
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)29942 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
29943 for (size_t k = 16; k <= 80; k += 8) {
29944 GemmMicrokernelTester()
29945 .mr(3)
29946 .nr(4)
29947 .kr(2)
29948 .sr(1)
29949 .m(3)
29950 .n(4)
29951 .k(k)
29952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29953 }
29954 }
29955
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29956 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29957 for (size_t k = 16; k <= 80; k += 8) {
29958 for (uint32_t n = 1; n <= 4; n++) {
29959 for (uint32_t m = 1; m <= 3; m++) {
29960 GemmMicrokernelTester()
29961 .mr(3)
29962 .nr(4)
29963 .kr(2)
29964 .sr(1)
29965 .m(m)
29966 .n(n)
29967 .k(k)
29968 .iterations(1)
29969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29970 }
29971 }
29972 }
29973 }
29974
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29975 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29976 for (uint32_t n = 5; n < 8; n++) {
29977 for (size_t k = 1; k <= 40; k += 9) {
29978 GemmMicrokernelTester()
29979 .mr(3)
29980 .nr(4)
29981 .kr(2)
29982 .sr(1)
29983 .m(3)
29984 .n(n)
29985 .k(k)
29986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29987 }
29988 }
29989 }
29990
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29992 for (uint32_t n = 5; n < 8; n++) {
29993 for (size_t k = 1; k <= 40; k += 9) {
29994 GemmMicrokernelTester()
29995 .mr(3)
29996 .nr(4)
29997 .kr(2)
29998 .sr(1)
29999 .m(3)
30000 .n(n)
30001 .k(k)
30002 .cn_stride(7)
30003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30004 }
30005 }
30006 }
30007
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30008 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30009 for (uint32_t n = 5; n < 8; n++) {
30010 for (size_t k = 1; k <= 40; k += 9) {
30011 for (uint32_t m = 1; m <= 3; m++) {
30012 GemmMicrokernelTester()
30013 .mr(3)
30014 .nr(4)
30015 .kr(2)
30016 .sr(1)
30017 .m(m)
30018 .n(n)
30019 .k(k)
30020 .iterations(1)
30021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30022 }
30023 }
30024 }
30025 }
30026
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)30027 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
30028 for (uint32_t n = 8; n <= 12; n += 4) {
30029 for (size_t k = 1; k <= 40; k += 9) {
30030 GemmMicrokernelTester()
30031 .mr(3)
30032 .nr(4)
30033 .kr(2)
30034 .sr(1)
30035 .m(3)
30036 .n(n)
30037 .k(k)
30038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30039 }
30040 }
30041 }
30042
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30043 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30044 for (uint32_t n = 8; n <= 12; n += 4) {
30045 for (size_t k = 1; k <= 40; k += 9) {
30046 GemmMicrokernelTester()
30047 .mr(3)
30048 .nr(4)
30049 .kr(2)
30050 .sr(1)
30051 .m(3)
30052 .n(n)
30053 .k(k)
30054 .cn_stride(7)
30055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30056 }
30057 }
30058 }
30059
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30060 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30061 for (uint32_t n = 8; n <= 12; n += 4) {
30062 for (size_t k = 1; k <= 40; k += 9) {
30063 for (uint32_t m = 1; m <= 3; m++) {
30064 GemmMicrokernelTester()
30065 .mr(3)
30066 .nr(4)
30067 .kr(2)
30068 .sr(1)
30069 .m(m)
30070 .n(n)
30071 .k(k)
30072 .iterations(1)
30073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30074 }
30075 }
30076 }
30077 }
30078
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)30079 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
30080 for (size_t k = 1; k <= 40; k += 9) {
30081 GemmMicrokernelTester()
30082 .mr(3)
30083 .nr(4)
30084 .kr(2)
30085 .sr(1)
30086 .m(3)
30087 .n(4)
30088 .k(k)
30089 .ks(3)
30090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30091 }
30092 }
30093
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30094 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30095 for (size_t k = 1; k <= 40; k += 9) {
30096 for (uint32_t n = 1; n <= 4; n++) {
30097 for (uint32_t m = 1; m <= 3; m++) {
30098 GemmMicrokernelTester()
30099 .mr(3)
30100 .nr(4)
30101 .kr(2)
30102 .sr(1)
30103 .m(m)
30104 .n(n)
30105 .k(k)
30106 .ks(3)
30107 .iterations(1)
30108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30109 }
30110 }
30111 }
30112 }
30113
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30114 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30115 for (uint32_t n = 5; n < 8; n++) {
30116 for (size_t k = 1; k <= 40; k += 9) {
30117 GemmMicrokernelTester()
30118 .mr(3)
30119 .nr(4)
30120 .kr(2)
30121 .sr(1)
30122 .m(3)
30123 .n(n)
30124 .k(k)
30125 .ks(3)
30126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30127 }
30128 }
30129 }
30130
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30131 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30132 for (uint32_t n = 8; n <= 12; n += 4) {
30133 for (size_t k = 1; k <= 40; k += 9) {
30134 GemmMicrokernelTester()
30135 .mr(3)
30136 .nr(4)
30137 .kr(2)
30138 .sr(1)
30139 .m(3)
30140 .n(n)
30141 .k(k)
30142 .ks(3)
30143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30144 }
30145 }
30146 }
30147
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30148 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30149 for (size_t k = 1; k <= 40; k += 9) {
30150 for (uint32_t n = 1; n <= 4; n++) {
30151 for (uint32_t m = 1; m <= 3; m++) {
30152 GemmMicrokernelTester()
30153 .mr(3)
30154 .nr(4)
30155 .kr(2)
30156 .sr(1)
30157 .m(m)
30158 .n(n)
30159 .k(k)
30160 .cm_stride(7)
30161 .iterations(1)
30162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30163 }
30164 }
30165 }
30166 }
30167
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,a_offset)30168 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
30169 for (size_t k = 1; k <= 40; k += 9) {
30170 GemmMicrokernelTester()
30171 .mr(3)
30172 .nr(4)
30173 .kr(2)
30174 .sr(1)
30175 .m(3)
30176 .n(4)
30177 .k(k)
30178 .ks(3)
30179 .a_offset(127)
30180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30181 }
30182 }
30183
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,zero)30184 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
30185 for (size_t k = 1; k <= 40; k += 9) {
30186 for (uint32_t mz = 0; mz < 3; mz++) {
30187 GemmMicrokernelTester()
30188 .mr(3)
30189 .nr(4)
30190 .kr(2)
30191 .sr(1)
30192 .m(3)
30193 .n(4)
30194 .k(k)
30195 .ks(3)
30196 .a_offset(127)
30197 .zero_index(mz)
30198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30199 }
30200 }
30201 }
30202
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)30203 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
30204 GemmMicrokernelTester()
30205 .mr(3)
30206 .nr(4)
30207 .kr(2)
30208 .sr(1)
30209 .m(3)
30210 .n(4)
30211 .k(8)
30212 .qmin(128)
30213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30214 }
30215
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)30216 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
30217 GemmMicrokernelTester()
30218 .mr(3)
30219 .nr(4)
30220 .kr(2)
30221 .sr(1)
30222 .m(3)
30223 .n(4)
30224 .k(8)
30225 .qmax(128)
30226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30227 }
30228
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)30229 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
30230 GemmMicrokernelTester()
30231 .mr(3)
30232 .nr(4)
30233 .kr(2)
30234 .sr(1)
30235 .m(3)
30236 .n(4)
30237 .k(8)
30238 .cm_stride(7)
30239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30240 }
30241
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30242 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30243 for (size_t k = 1; k <= 40; k += 9) {
30244 GemmMicrokernelTester()
30245 .mr(3)
30246 .nr(4)
30247 .kr(2)
30248 .sr(1)
30249 .m(3)
30250 .n(4)
30251 .k(k)
30252 .a_zero_point(0)
30253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30254 }
30255 }
30256
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30257 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30258 for (size_t k = 1; k <= 40; k += 9) {
30259 GemmMicrokernelTester()
30260 .mr(3)
30261 .nr(4)
30262 .kr(2)
30263 .sr(1)
30264 .m(3)
30265 .n(4)
30266 .k(k)
30267 .b_zero_point(0)
30268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30269 }
30270 }
30271
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)30272 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30273 for (size_t k = 1; k <= 40; k += 9) {
30274 GemmMicrokernelTester()
30275 .mr(3)
30276 .nr(4)
30277 .kr(2)
30278 .sr(1)
30279 .m(3)
30280 .n(4)
30281 .k(k)
30282 .a_zero_point(0)
30283 .b_zero_point(0)
30284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30285 }
30286 }
30287 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30288
30289
30290 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)30291 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
30292 GemmMicrokernelTester()
30293 .mr(3)
30294 .nr(4)
30295 .kr(2)
30296 .sr(4)
30297 .m(3)
30298 .n(4)
30299 .k(8)
30300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30301 }
30302
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)30303 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
30304 GemmMicrokernelTester()
30305 .mr(3)
30306 .nr(4)
30307 .kr(2)
30308 .sr(4)
30309 .m(3)
30310 .n(4)
30311 .k(8)
30312 .cn_stride(7)
30313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30314 }
30315
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30316 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30317 for (uint32_t n = 1; n <= 4; n++) {
30318 for (uint32_t m = 1; m <= 3; m++) {
30319 GemmMicrokernelTester()
30320 .mr(3)
30321 .nr(4)
30322 .kr(2)
30323 .sr(4)
30324 .m(m)
30325 .n(n)
30326 .k(8)
30327 .iterations(1)
30328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30329 }
30330 }
30331 }
30332
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30333 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30334 for (uint32_t m = 1; m <= 3; m++) {
30335 GemmMicrokernelTester()
30336 .mr(3)
30337 .nr(4)
30338 .kr(2)
30339 .sr(4)
30340 .m(m)
30341 .n(4)
30342 .k(8)
30343 .iterations(1)
30344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30345 }
30346 }
30347
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30348 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30349 for (uint32_t n = 1; n <= 4; n++) {
30350 GemmMicrokernelTester()
30351 .mr(3)
30352 .nr(4)
30353 .kr(2)
30354 .sr(4)
30355 .m(3)
30356 .n(n)
30357 .k(8)
30358 .iterations(1)
30359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30360 }
30361 }
30362
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)30363 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30364 for (size_t k = 1; k < 8; k++) {
30365 GemmMicrokernelTester()
30366 .mr(3)
30367 .nr(4)
30368 .kr(2)
30369 .sr(4)
30370 .m(3)
30371 .n(4)
30372 .k(k)
30373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30374 }
30375 }
30376
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30377 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30378 for (size_t k = 1; k < 8; k++) {
30379 for (uint32_t n = 1; n <= 4; n++) {
30380 for (uint32_t m = 1; m <= 3; m++) {
30381 GemmMicrokernelTester()
30382 .mr(3)
30383 .nr(4)
30384 .kr(2)
30385 .sr(4)
30386 .m(m)
30387 .n(n)
30388 .k(k)
30389 .iterations(1)
30390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30391 }
30392 }
30393 }
30394 }
30395
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)30396 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
30397 for (size_t k = 9; k < 16; k++) {
30398 GemmMicrokernelTester()
30399 .mr(3)
30400 .nr(4)
30401 .kr(2)
30402 .sr(4)
30403 .m(3)
30404 .n(4)
30405 .k(k)
30406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30407 }
30408 }
30409
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)30410 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
30411 for (size_t k = 9; k < 16; k++) {
30412 for (uint32_t n = 1; n <= 4; n++) {
30413 for (uint32_t m = 1; m <= 3; m++) {
30414 GemmMicrokernelTester()
30415 .mr(3)
30416 .nr(4)
30417 .kr(2)
30418 .sr(4)
30419 .m(m)
30420 .n(n)
30421 .k(k)
30422 .iterations(1)
30423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30424 }
30425 }
30426 }
30427 }
30428
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)30429 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
30430 for (size_t k = 16; k <= 80; k += 8) {
30431 GemmMicrokernelTester()
30432 .mr(3)
30433 .nr(4)
30434 .kr(2)
30435 .sr(4)
30436 .m(3)
30437 .n(4)
30438 .k(k)
30439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30440 }
30441 }
30442
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)30443 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
30444 for (size_t k = 16; k <= 80; k += 8) {
30445 for (uint32_t n = 1; n <= 4; n++) {
30446 for (uint32_t m = 1; m <= 3; m++) {
30447 GemmMicrokernelTester()
30448 .mr(3)
30449 .nr(4)
30450 .kr(2)
30451 .sr(4)
30452 .m(m)
30453 .n(n)
30454 .k(k)
30455 .iterations(1)
30456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30457 }
30458 }
30459 }
30460 }
30461
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)30462 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
30463 for (uint32_t n = 5; n < 8; n++) {
30464 for (size_t k = 1; k <= 40; k += 9) {
30465 GemmMicrokernelTester()
30466 .mr(3)
30467 .nr(4)
30468 .kr(2)
30469 .sr(4)
30470 .m(3)
30471 .n(n)
30472 .k(k)
30473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30474 }
30475 }
30476 }
30477
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)30478 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
30479 for (uint32_t n = 5; n < 8; n++) {
30480 for (size_t k = 1; k <= 40; k += 9) {
30481 GemmMicrokernelTester()
30482 .mr(3)
30483 .nr(4)
30484 .kr(2)
30485 .sr(4)
30486 .m(3)
30487 .n(n)
30488 .k(k)
30489 .cn_stride(7)
30490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30491 }
30492 }
30493 }
30494
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30495 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30496 for (uint32_t n = 5; n < 8; n++) {
30497 for (size_t k = 1; k <= 40; k += 9) {
30498 for (uint32_t m = 1; m <= 3; m++) {
30499 GemmMicrokernelTester()
30500 .mr(3)
30501 .nr(4)
30502 .kr(2)
30503 .sr(4)
30504 .m(m)
30505 .n(n)
30506 .k(k)
30507 .iterations(1)
30508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30509 }
30510 }
30511 }
30512 }
30513
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)30514 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
30515 for (uint32_t n = 8; n <= 12; n += 4) {
30516 for (size_t k = 1; k <= 40; k += 9) {
30517 GemmMicrokernelTester()
30518 .mr(3)
30519 .nr(4)
30520 .kr(2)
30521 .sr(4)
30522 .m(3)
30523 .n(n)
30524 .k(k)
30525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30526 }
30527 }
30528 }
30529
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30530 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30531 for (uint32_t n = 8; n <= 12; n += 4) {
30532 for (size_t k = 1; k <= 40; k += 9) {
30533 GemmMicrokernelTester()
30534 .mr(3)
30535 .nr(4)
30536 .kr(2)
30537 .sr(4)
30538 .m(3)
30539 .n(n)
30540 .k(k)
30541 .cn_stride(7)
30542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30543 }
30544 }
30545 }
30546
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30547 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30548 for (uint32_t n = 8; n <= 12; n += 4) {
30549 for (size_t k = 1; k <= 40; k += 9) {
30550 for (uint32_t m = 1; m <= 3; m++) {
30551 GemmMicrokernelTester()
30552 .mr(3)
30553 .nr(4)
30554 .kr(2)
30555 .sr(4)
30556 .m(m)
30557 .n(n)
30558 .k(k)
30559 .iterations(1)
30560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30561 }
30562 }
30563 }
30564 }
30565
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)30566 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
30567 for (size_t k = 1; k <= 40; k += 9) {
30568 GemmMicrokernelTester()
30569 .mr(3)
30570 .nr(4)
30571 .kr(2)
30572 .sr(4)
30573 .m(3)
30574 .n(4)
30575 .k(k)
30576 .ks(3)
30577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30578 }
30579 }
30580
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30581 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30582 for (size_t k = 1; k <= 40; k += 9) {
30583 for (uint32_t n = 1; n <= 4; n++) {
30584 for (uint32_t m = 1; m <= 3; m++) {
30585 GemmMicrokernelTester()
30586 .mr(3)
30587 .nr(4)
30588 .kr(2)
30589 .sr(4)
30590 .m(m)
30591 .n(n)
30592 .k(k)
30593 .ks(3)
30594 .iterations(1)
30595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30596 }
30597 }
30598 }
30599 }
30600
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30601 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30602 for (uint32_t n = 5; n < 8; n++) {
30603 for (size_t k = 1; k <= 40; k += 9) {
30604 GemmMicrokernelTester()
30605 .mr(3)
30606 .nr(4)
30607 .kr(2)
30608 .sr(4)
30609 .m(3)
30610 .n(n)
30611 .k(k)
30612 .ks(3)
30613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30614 }
30615 }
30616 }
30617
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30618 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30619 for (uint32_t n = 8; n <= 12; n += 4) {
30620 for (size_t k = 1; k <= 40; k += 9) {
30621 GemmMicrokernelTester()
30622 .mr(3)
30623 .nr(4)
30624 .kr(2)
30625 .sr(4)
30626 .m(3)
30627 .n(n)
30628 .k(k)
30629 .ks(3)
30630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30631 }
30632 }
30633 }
30634
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30635 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30636 for (size_t k = 1; k <= 40; k += 9) {
30637 for (uint32_t n = 1; n <= 4; n++) {
30638 for (uint32_t m = 1; m <= 3; m++) {
30639 GemmMicrokernelTester()
30640 .mr(3)
30641 .nr(4)
30642 .kr(2)
30643 .sr(4)
30644 .m(m)
30645 .n(n)
30646 .k(k)
30647 .cm_stride(7)
30648 .iterations(1)
30649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30650 }
30651 }
30652 }
30653 }
30654
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)30655 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
30656 for (size_t k = 1; k <= 40; k += 9) {
30657 GemmMicrokernelTester()
30658 .mr(3)
30659 .nr(4)
30660 .kr(2)
30661 .sr(4)
30662 .m(3)
30663 .n(4)
30664 .k(k)
30665 .ks(3)
30666 .a_offset(127)
30667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30668 }
30669 }
30670
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)30671 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
30672 for (size_t k = 1; k <= 40; k += 9) {
30673 for (uint32_t mz = 0; mz < 3; mz++) {
30674 GemmMicrokernelTester()
30675 .mr(3)
30676 .nr(4)
30677 .kr(2)
30678 .sr(4)
30679 .m(3)
30680 .n(4)
30681 .k(k)
30682 .ks(3)
30683 .a_offset(127)
30684 .zero_index(mz)
30685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30686 }
30687 }
30688 }
30689
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)30690 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
30691 GemmMicrokernelTester()
30692 .mr(3)
30693 .nr(4)
30694 .kr(2)
30695 .sr(4)
30696 .m(3)
30697 .n(4)
30698 .k(8)
30699 .qmin(128)
30700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30701 }
30702
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)30703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
30704 GemmMicrokernelTester()
30705 .mr(3)
30706 .nr(4)
30707 .kr(2)
30708 .sr(4)
30709 .m(3)
30710 .n(4)
30711 .k(8)
30712 .qmax(128)
30713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30714 }
30715
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)30716 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
30717 GemmMicrokernelTester()
30718 .mr(3)
30719 .nr(4)
30720 .kr(2)
30721 .sr(4)
30722 .m(3)
30723 .n(4)
30724 .k(8)
30725 .cm_stride(7)
30726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30727 }
30728
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30729 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30730 for (size_t k = 1; k <= 40; k += 9) {
30731 GemmMicrokernelTester()
30732 .mr(3)
30733 .nr(4)
30734 .kr(2)
30735 .sr(4)
30736 .m(3)
30737 .n(4)
30738 .k(k)
30739 .a_zero_point(0)
30740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30741 }
30742 }
30743
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30744 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30745 for (size_t k = 1; k <= 40; k += 9) {
30746 GemmMicrokernelTester()
30747 .mr(3)
30748 .nr(4)
30749 .kr(2)
30750 .sr(4)
30751 .m(3)
30752 .n(4)
30753 .k(k)
30754 .b_zero_point(0)
30755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30756 }
30757 }
30758
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)30759 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30760 for (size_t k = 1; k <= 40; k += 9) {
30761 GemmMicrokernelTester()
30762 .mr(3)
30763 .nr(4)
30764 .kr(2)
30765 .sr(4)
30766 .m(3)
30767 .n(4)
30768 .k(k)
30769 .a_zero_point(0)
30770 .b_zero_point(0)
30771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30772 }
30773 }
30774 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30775
30776
30777 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)30778 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30779 GemmMicrokernelTester()
30780 .mr(3)
30781 .nr(4)
30782 .kr(8)
30783 .sr(1)
30784 .m(3)
30785 .n(4)
30786 .k(8)
30787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30788 }
30789
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)30790 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
30791 GemmMicrokernelTester()
30792 .mr(3)
30793 .nr(4)
30794 .kr(8)
30795 .sr(1)
30796 .m(3)
30797 .n(4)
30798 .k(8)
30799 .cn_stride(7)
30800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30801 }
30802
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30803 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30804 for (uint32_t n = 1; n <= 4; n++) {
30805 for (uint32_t m = 1; m <= 3; m++) {
30806 GemmMicrokernelTester()
30807 .mr(3)
30808 .nr(4)
30809 .kr(8)
30810 .sr(1)
30811 .m(m)
30812 .n(n)
30813 .k(8)
30814 .iterations(1)
30815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30816 }
30817 }
30818 }
30819
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30820 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30821 for (uint32_t m = 1; m <= 3; m++) {
30822 GemmMicrokernelTester()
30823 .mr(3)
30824 .nr(4)
30825 .kr(8)
30826 .sr(1)
30827 .m(m)
30828 .n(4)
30829 .k(8)
30830 .iterations(1)
30831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30832 }
30833 }
30834
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30835 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30836 for (uint32_t n = 1; n <= 4; n++) {
30837 GemmMicrokernelTester()
30838 .mr(3)
30839 .nr(4)
30840 .kr(8)
30841 .sr(1)
30842 .m(3)
30843 .n(n)
30844 .k(8)
30845 .iterations(1)
30846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30847 }
30848 }
30849
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)30850 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30851 for (size_t k = 1; k < 8; k++) {
30852 GemmMicrokernelTester()
30853 .mr(3)
30854 .nr(4)
30855 .kr(8)
30856 .sr(1)
30857 .m(3)
30858 .n(4)
30859 .k(k)
30860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30861 }
30862 }
30863
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30864 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30865 for (size_t k = 1; k < 8; k++) {
30866 for (uint32_t n = 1; n <= 4; n++) {
30867 for (uint32_t m = 1; m <= 3; m++) {
30868 GemmMicrokernelTester()
30869 .mr(3)
30870 .nr(4)
30871 .kr(8)
30872 .sr(1)
30873 .m(m)
30874 .n(n)
30875 .k(k)
30876 .iterations(1)
30877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30878 }
30879 }
30880 }
30881 }
30882
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)30883 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30884 for (size_t k = 9; k < 16; k++) {
30885 GemmMicrokernelTester()
30886 .mr(3)
30887 .nr(4)
30888 .kr(8)
30889 .sr(1)
30890 .m(3)
30891 .n(4)
30892 .k(k)
30893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30894 }
30895 }
30896
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30897 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30898 for (size_t k = 9; k < 16; k++) {
30899 for (uint32_t n = 1; n <= 4; n++) {
30900 for (uint32_t m = 1; m <= 3; m++) {
30901 GemmMicrokernelTester()
30902 .mr(3)
30903 .nr(4)
30904 .kr(8)
30905 .sr(1)
30906 .m(m)
30907 .n(n)
30908 .k(k)
30909 .iterations(1)
30910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30911 }
30912 }
30913 }
30914 }
30915
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)30916 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
30917 for (size_t k = 16; k <= 80; k += 8) {
30918 GemmMicrokernelTester()
30919 .mr(3)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(3)
30924 .n(4)
30925 .k(k)
30926 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30927 }
30928 }
30929
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30930 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30931 for (size_t k = 16; k <= 80; k += 8) {
30932 for (uint32_t n = 1; n <= 4; n++) {
30933 for (uint32_t m = 1; m <= 3; m++) {
30934 GemmMicrokernelTester()
30935 .mr(3)
30936 .nr(4)
30937 .kr(8)
30938 .sr(1)
30939 .m(m)
30940 .n(n)
30941 .k(k)
30942 .iterations(1)
30943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30944 }
30945 }
30946 }
30947 }
30948
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)30949 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30950 for (uint32_t n = 5; n < 8; n++) {
30951 for (size_t k = 1; k <= 40; k += 9) {
30952 GemmMicrokernelTester()
30953 .mr(3)
30954 .nr(4)
30955 .kr(8)
30956 .sr(1)
30957 .m(3)
30958 .n(n)
30959 .k(k)
30960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30961 }
30962 }
30963 }
30964
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30965 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30966 for (uint32_t n = 5; n < 8; n++) {
30967 for (size_t k = 1; k <= 40; k += 9) {
30968 GemmMicrokernelTester()
30969 .mr(3)
30970 .nr(4)
30971 .kr(8)
30972 .sr(1)
30973 .m(3)
30974 .n(n)
30975 .k(k)
30976 .cn_stride(7)
30977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30978 }
30979 }
30980 }
30981
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30982 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30983 for (uint32_t n = 5; n < 8; n++) {
30984 for (size_t k = 1; k <= 40; k += 9) {
30985 for (uint32_t m = 1; m <= 3; m++) {
30986 GemmMicrokernelTester()
30987 .mr(3)
30988 .nr(4)
30989 .kr(8)
30990 .sr(1)
30991 .m(m)
30992 .n(n)
30993 .k(k)
30994 .iterations(1)
30995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30996 }
30997 }
30998 }
30999 }
31000
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)31001 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
31002 for (uint32_t n = 8; n <= 12; n += 4) {
31003 for (size_t k = 1; k <= 40; k += 9) {
31004 GemmMicrokernelTester()
31005 .mr(3)
31006 .nr(4)
31007 .kr(8)
31008 .sr(1)
31009 .m(3)
31010 .n(n)
31011 .k(k)
31012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31013 }
31014 }
31015 }
31016
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31017 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31018 for (uint32_t n = 8; n <= 12; n += 4) {
31019 for (size_t k = 1; k <= 40; k += 9) {
31020 GemmMicrokernelTester()
31021 .mr(3)
31022 .nr(4)
31023 .kr(8)
31024 .sr(1)
31025 .m(3)
31026 .n(n)
31027 .k(k)
31028 .cn_stride(7)
31029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31030 }
31031 }
31032 }
31033
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31034 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31035 for (uint32_t n = 8; n <= 12; n += 4) {
31036 for (size_t k = 1; k <= 40; k += 9) {
31037 for (uint32_t m = 1; m <= 3; m++) {
31038 GemmMicrokernelTester()
31039 .mr(3)
31040 .nr(4)
31041 .kr(8)
31042 .sr(1)
31043 .m(m)
31044 .n(n)
31045 .k(k)
31046 .iterations(1)
31047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31048 }
31049 }
31050 }
31051 }
31052
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)31053 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
31054 for (size_t k = 1; k <= 40; k += 9) {
31055 GemmMicrokernelTester()
31056 .mr(3)
31057 .nr(4)
31058 .kr(8)
31059 .sr(1)
31060 .m(3)
31061 .n(4)
31062 .k(k)
31063 .ks(3)
31064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31065 }
31066 }
31067
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31068 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31069 for (size_t k = 1; k <= 40; k += 9) {
31070 for (uint32_t n = 1; n <= 4; n++) {
31071 for (uint32_t m = 1; m <= 3; m++) {
31072 GemmMicrokernelTester()
31073 .mr(3)
31074 .nr(4)
31075 .kr(8)
31076 .sr(1)
31077 .m(m)
31078 .n(n)
31079 .k(k)
31080 .ks(3)
31081 .iterations(1)
31082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31083 }
31084 }
31085 }
31086 }
31087
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31088 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31089 for (uint32_t n = 5; n < 8; n++) {
31090 for (size_t k = 1; k <= 40; k += 9) {
31091 GemmMicrokernelTester()
31092 .mr(3)
31093 .nr(4)
31094 .kr(8)
31095 .sr(1)
31096 .m(3)
31097 .n(n)
31098 .k(k)
31099 .ks(3)
31100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31101 }
31102 }
31103 }
31104
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31105 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31106 for (uint32_t n = 8; n <= 12; n += 4) {
31107 for (size_t k = 1; k <= 40; k += 9) {
31108 GemmMicrokernelTester()
31109 .mr(3)
31110 .nr(4)
31111 .kr(8)
31112 .sr(1)
31113 .m(3)
31114 .n(n)
31115 .k(k)
31116 .ks(3)
31117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31118 }
31119 }
31120 }
31121
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31122 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31123 for (size_t k = 1; k <= 40; k += 9) {
31124 for (uint32_t n = 1; n <= 4; n++) {
31125 for (uint32_t m = 1; m <= 3; m++) {
31126 GemmMicrokernelTester()
31127 .mr(3)
31128 .nr(4)
31129 .kr(8)
31130 .sr(1)
31131 .m(m)
31132 .n(n)
31133 .k(k)
31134 .cm_stride(7)
31135 .iterations(1)
31136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31137 }
31138 }
31139 }
31140 }
31141
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,a_offset)31142 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
31143 for (size_t k = 1; k <= 40; k += 9) {
31144 GemmMicrokernelTester()
31145 .mr(3)
31146 .nr(4)
31147 .kr(8)
31148 .sr(1)
31149 .m(3)
31150 .n(4)
31151 .k(k)
31152 .ks(3)
31153 .a_offset(127)
31154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31155 }
31156 }
31157
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,zero)31158 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, zero) {
31159 for (size_t k = 1; k <= 40; k += 9) {
31160 for (uint32_t mz = 0; mz < 3; mz++) {
31161 GemmMicrokernelTester()
31162 .mr(3)
31163 .nr(4)
31164 .kr(8)
31165 .sr(1)
31166 .m(3)
31167 .n(4)
31168 .k(k)
31169 .ks(3)
31170 .a_offset(127)
31171 .zero_index(mz)
31172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31173 }
31174 }
31175 }
31176
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)31177 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
31178 GemmMicrokernelTester()
31179 .mr(3)
31180 .nr(4)
31181 .kr(8)
31182 .sr(1)
31183 .m(3)
31184 .n(4)
31185 .k(8)
31186 .qmin(128)
31187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31188 }
31189
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)31190 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
31191 GemmMicrokernelTester()
31192 .mr(3)
31193 .nr(4)
31194 .kr(8)
31195 .sr(1)
31196 .m(3)
31197 .n(4)
31198 .k(8)
31199 .qmax(128)
31200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31201 }
31202
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)31203 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
31204 GemmMicrokernelTester()
31205 .mr(3)
31206 .nr(4)
31207 .kr(8)
31208 .sr(1)
31209 .m(3)
31210 .n(4)
31211 .k(8)
31212 .cm_stride(7)
31213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31214 }
31215
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31216 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31217 for (size_t k = 1; k <= 40; k += 9) {
31218 GemmMicrokernelTester()
31219 .mr(3)
31220 .nr(4)
31221 .kr(8)
31222 .sr(1)
31223 .m(3)
31224 .n(4)
31225 .k(k)
31226 .a_zero_point(0)
31227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31228 }
31229 }
31230
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31231 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31232 for (size_t k = 1; k <= 40; k += 9) {
31233 GemmMicrokernelTester()
31234 .mr(3)
31235 .nr(4)
31236 .kr(8)
31237 .sr(1)
31238 .m(3)
31239 .n(4)
31240 .k(k)
31241 .b_zero_point(0)
31242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31243 }
31244 }
31245
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)31246 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31247 for (size_t k = 1; k <= 40; k += 9) {
31248 GemmMicrokernelTester()
31249 .mr(3)
31250 .nr(4)
31251 .kr(8)
31252 .sr(1)
31253 .m(3)
31254 .n(4)
31255 .k(k)
31256 .a_zero_point(0)
31257 .b_zero_point(0)
31258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31259 }
31260 }
31261 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31262
31263
31264 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)31265 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31266 GemmMicrokernelTester()
31267 .mr(3)
31268 .nr(4)
31269 .kr(8)
31270 .sr(1)
31271 .m(3)
31272 .n(4)
31273 .k(8)
31274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31275 }
31276
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)31277 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
31278 GemmMicrokernelTester()
31279 .mr(3)
31280 .nr(4)
31281 .kr(8)
31282 .sr(1)
31283 .m(3)
31284 .n(4)
31285 .k(8)
31286 .cn_stride(7)
31287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31288 }
31289
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)31290 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
31291 for (uint32_t n = 1; n <= 4; n++) {
31292 for (uint32_t m = 1; m <= 3; m++) {
31293 GemmMicrokernelTester()
31294 .mr(3)
31295 .nr(4)
31296 .kr(8)
31297 .sr(1)
31298 .m(m)
31299 .n(n)
31300 .k(8)
31301 .iterations(1)
31302 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31303 }
31304 }
31305 }
31306
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)31307 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31308 for (uint32_t m = 1; m <= 3; m++) {
31309 GemmMicrokernelTester()
31310 .mr(3)
31311 .nr(4)
31312 .kr(8)
31313 .sr(1)
31314 .m(m)
31315 .n(4)
31316 .k(8)
31317 .iterations(1)
31318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31319 }
31320 }
31321
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)31322 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31323 for (uint32_t n = 1; n <= 4; n++) {
31324 GemmMicrokernelTester()
31325 .mr(3)
31326 .nr(4)
31327 .kr(8)
31328 .sr(1)
31329 .m(3)
31330 .n(n)
31331 .k(8)
31332 .iterations(1)
31333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31334 }
31335 }
31336
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)31337 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31338 for (size_t k = 1; k < 8; k++) {
31339 GemmMicrokernelTester()
31340 .mr(3)
31341 .nr(4)
31342 .kr(8)
31343 .sr(1)
31344 .m(3)
31345 .n(4)
31346 .k(k)
31347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31348 }
31349 }
31350
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)31351 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31352 for (size_t k = 1; k < 8; k++) {
31353 for (uint32_t n = 1; n <= 4; n++) {
31354 for (uint32_t m = 1; m <= 3; m++) {
31355 GemmMicrokernelTester()
31356 .mr(3)
31357 .nr(4)
31358 .kr(8)
31359 .sr(1)
31360 .m(m)
31361 .n(n)
31362 .k(k)
31363 .iterations(1)
31364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31365 }
31366 }
31367 }
31368 }
31369
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)31370 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31371 for (size_t k = 9; k < 16; k++) {
31372 GemmMicrokernelTester()
31373 .mr(3)
31374 .nr(4)
31375 .kr(8)
31376 .sr(1)
31377 .m(3)
31378 .n(4)
31379 .k(k)
31380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31381 }
31382 }
31383
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31384 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31385 for (size_t k = 9; k < 16; k++) {
31386 for (uint32_t n = 1; n <= 4; n++) {
31387 for (uint32_t m = 1; m <= 3; m++) {
31388 GemmMicrokernelTester()
31389 .mr(3)
31390 .nr(4)
31391 .kr(8)
31392 .sr(1)
31393 .m(m)
31394 .n(n)
31395 .k(k)
31396 .iterations(1)
31397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31398 }
31399 }
31400 }
31401 }
31402
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)31403 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
31404 for (size_t k = 16; k <= 80; k += 8) {
31405 GemmMicrokernelTester()
31406 .mr(3)
31407 .nr(4)
31408 .kr(8)
31409 .sr(1)
31410 .m(3)
31411 .n(4)
31412 .k(k)
31413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31414 }
31415 }
31416
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31417 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31418 for (size_t k = 16; k <= 80; k += 8) {
31419 for (uint32_t n = 1; n <= 4; n++) {
31420 for (uint32_t m = 1; m <= 3; m++) {
31421 GemmMicrokernelTester()
31422 .mr(3)
31423 .nr(4)
31424 .kr(8)
31425 .sr(1)
31426 .m(m)
31427 .n(n)
31428 .k(k)
31429 .iterations(1)
31430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31431 }
31432 }
31433 }
31434 }
31435
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)31436 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31437 for (uint32_t n = 5; n < 8; n++) {
31438 for (size_t k = 1; k <= 40; k += 9) {
31439 GemmMicrokernelTester()
31440 .mr(3)
31441 .nr(4)
31442 .kr(8)
31443 .sr(1)
31444 .m(3)
31445 .n(n)
31446 .k(k)
31447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31448 }
31449 }
31450 }
31451
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31452 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31453 for (uint32_t n = 5; n < 8; n++) {
31454 for (size_t k = 1; k <= 40; k += 9) {
31455 GemmMicrokernelTester()
31456 .mr(3)
31457 .nr(4)
31458 .kr(8)
31459 .sr(1)
31460 .m(3)
31461 .n(n)
31462 .k(k)
31463 .cn_stride(7)
31464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31465 }
31466 }
31467 }
31468
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31469 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31470 for (uint32_t n = 5; n < 8; n++) {
31471 for (size_t k = 1; k <= 40; k += 9) {
31472 for (uint32_t m = 1; m <= 3; m++) {
31473 GemmMicrokernelTester()
31474 .mr(3)
31475 .nr(4)
31476 .kr(8)
31477 .sr(1)
31478 .m(m)
31479 .n(n)
31480 .k(k)
31481 .iterations(1)
31482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31483 }
31484 }
31485 }
31486 }
31487
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)31488 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
31489 for (uint32_t n = 8; n <= 12; n += 4) {
31490 for (size_t k = 1; k <= 40; k += 9) {
31491 GemmMicrokernelTester()
31492 .mr(3)
31493 .nr(4)
31494 .kr(8)
31495 .sr(1)
31496 .m(3)
31497 .n(n)
31498 .k(k)
31499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31500 }
31501 }
31502 }
31503
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31504 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31505 for (uint32_t n = 8; n <= 12; n += 4) {
31506 for (size_t k = 1; k <= 40; k += 9) {
31507 GemmMicrokernelTester()
31508 .mr(3)
31509 .nr(4)
31510 .kr(8)
31511 .sr(1)
31512 .m(3)
31513 .n(n)
31514 .k(k)
31515 .cn_stride(7)
31516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31517 }
31518 }
31519 }
31520
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)31521 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
31522 for (uint32_t n = 8; n <= 12; n += 4) {
31523 for (size_t k = 1; k <= 40; k += 9) {
31524 for (uint32_t m = 1; m <= 3; m++) {
31525 GemmMicrokernelTester()
31526 .mr(3)
31527 .nr(4)
31528 .kr(8)
31529 .sr(1)
31530 .m(m)
31531 .n(n)
31532 .k(k)
31533 .iterations(1)
31534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31535 }
31536 }
31537 }
31538 }
31539
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)31540 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
31541 for (size_t k = 1; k <= 40; k += 9) {
31542 GemmMicrokernelTester()
31543 .mr(3)
31544 .nr(4)
31545 .kr(8)
31546 .sr(1)
31547 .m(3)
31548 .n(4)
31549 .k(k)
31550 .ks(3)
31551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31552 }
31553 }
31554
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)31555 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
31556 for (size_t k = 1; k <= 40; k += 9) {
31557 for (uint32_t n = 1; n <= 4; n++) {
31558 for (uint32_t m = 1; m <= 3; m++) {
31559 GemmMicrokernelTester()
31560 .mr(3)
31561 .nr(4)
31562 .kr(8)
31563 .sr(1)
31564 .m(m)
31565 .n(n)
31566 .k(k)
31567 .ks(3)
31568 .iterations(1)
31569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31570 }
31571 }
31572 }
31573 }
31574
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)31575 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
31576 for (uint32_t n = 5; n < 8; n++) {
31577 for (size_t k = 1; k <= 40; k += 9) {
31578 GemmMicrokernelTester()
31579 .mr(3)
31580 .nr(4)
31581 .kr(8)
31582 .sr(1)
31583 .m(3)
31584 .n(n)
31585 .k(k)
31586 .ks(3)
31587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31588 }
31589 }
31590 }
31591
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)31592 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
31593 for (uint32_t n = 8; n <= 12; n += 4) {
31594 for (size_t k = 1; k <= 40; k += 9) {
31595 GemmMicrokernelTester()
31596 .mr(3)
31597 .nr(4)
31598 .kr(8)
31599 .sr(1)
31600 .m(3)
31601 .n(n)
31602 .k(k)
31603 .ks(3)
31604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31605 }
31606 }
31607 }
31608
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)31609 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
31610 for (size_t k = 1; k <= 40; k += 9) {
31611 for (uint32_t n = 1; n <= 4; n++) {
31612 for (uint32_t m = 1; m <= 3; m++) {
31613 GemmMicrokernelTester()
31614 .mr(3)
31615 .nr(4)
31616 .kr(8)
31617 .sr(1)
31618 .m(m)
31619 .n(n)
31620 .k(k)
31621 .cm_stride(7)
31622 .iterations(1)
31623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31624 }
31625 }
31626 }
31627 }
31628
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,a_offset)31629 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
31630 for (size_t k = 1; k <= 40; k += 9) {
31631 GemmMicrokernelTester()
31632 .mr(3)
31633 .nr(4)
31634 .kr(8)
31635 .sr(1)
31636 .m(3)
31637 .n(4)
31638 .k(k)
31639 .ks(3)
31640 .a_offset(127)
31641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31642 }
31643 }
31644
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,zero)31645 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
31646 for (size_t k = 1; k <= 40; k += 9) {
31647 for (uint32_t mz = 0; mz < 3; mz++) {
31648 GemmMicrokernelTester()
31649 .mr(3)
31650 .nr(4)
31651 .kr(8)
31652 .sr(1)
31653 .m(3)
31654 .n(4)
31655 .k(k)
31656 .ks(3)
31657 .a_offset(127)
31658 .zero_index(mz)
31659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31660 }
31661 }
31662 }
31663
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)31664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
31665 GemmMicrokernelTester()
31666 .mr(3)
31667 .nr(4)
31668 .kr(8)
31669 .sr(1)
31670 .m(3)
31671 .n(4)
31672 .k(8)
31673 .qmin(128)
31674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31675 }
31676
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)31677 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
31678 GemmMicrokernelTester()
31679 .mr(3)
31680 .nr(4)
31681 .kr(8)
31682 .sr(1)
31683 .m(3)
31684 .n(4)
31685 .k(8)
31686 .qmax(128)
31687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31688 }
31689
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)31690 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
31691 GemmMicrokernelTester()
31692 .mr(3)
31693 .nr(4)
31694 .kr(8)
31695 .sr(1)
31696 .m(3)
31697 .n(4)
31698 .k(8)
31699 .cm_stride(7)
31700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31701 }
31702
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)31703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
31704 for (size_t k = 1; k <= 40; k += 9) {
31705 GemmMicrokernelTester()
31706 .mr(3)
31707 .nr(4)
31708 .kr(8)
31709 .sr(1)
31710 .m(3)
31711 .n(4)
31712 .k(k)
31713 .a_zero_point(0)
31714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31715 }
31716 }
31717
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)31718 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
31719 for (size_t k = 1; k <= 40; k += 9) {
31720 GemmMicrokernelTester()
31721 .mr(3)
31722 .nr(4)
31723 .kr(8)
31724 .sr(1)
31725 .m(3)
31726 .n(4)
31727 .k(k)
31728 .b_zero_point(0)
31729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31730 }
31731 }
31732
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)31733 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
31734 for (size_t k = 1; k <= 40; k += 9) {
31735 GemmMicrokernelTester()
31736 .mr(3)
31737 .nr(4)
31738 .kr(8)
31739 .sr(1)
31740 .m(3)
31741 .n(4)
31742 .k(k)
31743 .a_zero_point(0)
31744 .b_zero_point(0)
31745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31746 }
31747 }
31748 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31749
31750
31751 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)31752 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31753 GemmMicrokernelTester()
31754 .mr(4)
31755 .nr(4)
31756 .kr(2)
31757 .sr(1)
31758 .m(4)
31759 .n(4)
31760 .k(8)
31761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31762 }
31763
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)31764 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
31765 GemmMicrokernelTester()
31766 .mr(4)
31767 .nr(4)
31768 .kr(2)
31769 .sr(1)
31770 .m(4)
31771 .n(4)
31772 .k(8)
31773 .cn_stride(7)
31774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31775 }
31776
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31777 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31778 for (uint32_t n = 1; n <= 4; n++) {
31779 for (uint32_t m = 1; m <= 4; m++) {
31780 GemmMicrokernelTester()
31781 .mr(4)
31782 .nr(4)
31783 .kr(2)
31784 .sr(1)
31785 .m(m)
31786 .n(n)
31787 .k(8)
31788 .iterations(1)
31789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31790 }
31791 }
31792 }
31793
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31794 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31795 for (uint32_t m = 1; m <= 4; m++) {
31796 GemmMicrokernelTester()
31797 .mr(4)
31798 .nr(4)
31799 .kr(2)
31800 .sr(1)
31801 .m(m)
31802 .n(4)
31803 .k(8)
31804 .iterations(1)
31805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31806 }
31807 }
31808
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31809 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31810 for (uint32_t n = 1; n <= 4; n++) {
31811 GemmMicrokernelTester()
31812 .mr(4)
31813 .nr(4)
31814 .kr(2)
31815 .sr(1)
31816 .m(4)
31817 .n(n)
31818 .k(8)
31819 .iterations(1)
31820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31821 }
31822 }
31823
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)31824 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31825 for (size_t k = 1; k < 8; k++) {
31826 GemmMicrokernelTester()
31827 .mr(4)
31828 .nr(4)
31829 .kr(2)
31830 .sr(1)
31831 .m(4)
31832 .n(4)
31833 .k(k)
31834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31835 }
31836 }
31837
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31838 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31839 for (size_t k = 1; k < 8; k++) {
31840 for (uint32_t n = 1; n <= 4; n++) {
31841 for (uint32_t m = 1; m <= 4; m++) {
31842 GemmMicrokernelTester()
31843 .mr(4)
31844 .nr(4)
31845 .kr(2)
31846 .sr(1)
31847 .m(m)
31848 .n(n)
31849 .k(k)
31850 .iterations(1)
31851 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31852 }
31853 }
31854 }
31855 }
31856
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)31857 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31858 for (size_t k = 9; k < 16; k++) {
31859 GemmMicrokernelTester()
31860 .mr(4)
31861 .nr(4)
31862 .kr(2)
31863 .sr(1)
31864 .m(4)
31865 .n(4)
31866 .k(k)
31867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31868 }
31869 }
31870
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31871 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31872 for (size_t k = 9; k < 16; k++) {
31873 for (uint32_t n = 1; n <= 4; n++) {
31874 for (uint32_t m = 1; m <= 4; m++) {
31875 GemmMicrokernelTester()
31876 .mr(4)
31877 .nr(4)
31878 .kr(2)
31879 .sr(1)
31880 .m(m)
31881 .n(n)
31882 .k(k)
31883 .iterations(1)
31884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31885 }
31886 }
31887 }
31888 }
31889
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)31890 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
31891 for (size_t k = 16; k <= 80; k += 8) {
31892 GemmMicrokernelTester()
31893 .mr(4)
31894 .nr(4)
31895 .kr(2)
31896 .sr(1)
31897 .m(4)
31898 .n(4)
31899 .k(k)
31900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31901 }
31902 }
31903
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31904 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31905 for (size_t k = 16; k <= 80; k += 8) {
31906 for (uint32_t n = 1; n <= 4; n++) {
31907 for (uint32_t m = 1; m <= 4; m++) {
31908 GemmMicrokernelTester()
31909 .mr(4)
31910 .nr(4)
31911 .kr(2)
31912 .sr(1)
31913 .m(m)
31914 .n(n)
31915 .k(k)
31916 .iterations(1)
31917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31918 }
31919 }
31920 }
31921 }
31922
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)31923 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31924 for (uint32_t n = 5; n < 8; n++) {
31925 for (size_t k = 1; k <= 40; k += 9) {
31926 GemmMicrokernelTester()
31927 .mr(4)
31928 .nr(4)
31929 .kr(2)
31930 .sr(1)
31931 .m(4)
31932 .n(n)
31933 .k(k)
31934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31935 }
31936 }
31937 }
31938
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31939 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31940 for (uint32_t n = 5; n < 8; n++) {
31941 for (size_t k = 1; k <= 40; k += 9) {
31942 GemmMicrokernelTester()
31943 .mr(4)
31944 .nr(4)
31945 .kr(2)
31946 .sr(1)
31947 .m(4)
31948 .n(n)
31949 .k(k)
31950 .cn_stride(7)
31951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31952 }
31953 }
31954 }
31955
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31956 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31957 for (uint32_t n = 5; n < 8; n++) {
31958 for (size_t k = 1; k <= 40; k += 9) {
31959 for (uint32_t m = 1; m <= 4; m++) {
31960 GemmMicrokernelTester()
31961 .mr(4)
31962 .nr(4)
31963 .kr(2)
31964 .sr(1)
31965 .m(m)
31966 .n(n)
31967 .k(k)
31968 .iterations(1)
31969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31970 }
31971 }
31972 }
31973 }
31974
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)31975 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
31976 for (uint32_t n = 8; n <= 12; n += 4) {
31977 for (size_t k = 1; k <= 40; k += 9) {
31978 GemmMicrokernelTester()
31979 .mr(4)
31980 .nr(4)
31981 .kr(2)
31982 .sr(1)
31983 .m(4)
31984 .n(n)
31985 .k(k)
31986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31987 }
31988 }
31989 }
31990
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31991 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31992 for (uint32_t n = 8; n <= 12; n += 4) {
31993 for (size_t k = 1; k <= 40; k += 9) {
31994 GemmMicrokernelTester()
31995 .mr(4)
31996 .nr(4)
31997 .kr(2)
31998 .sr(1)
31999 .m(4)
32000 .n(n)
32001 .k(k)
32002 .cn_stride(7)
32003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32004 }
32005 }
32006 }
32007
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)32008 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
32009 for (uint32_t n = 8; n <= 12; n += 4) {
32010 for (size_t k = 1; k <= 40; k += 9) {
32011 for (uint32_t m = 1; m <= 4; m++) {
32012 GemmMicrokernelTester()
32013 .mr(4)
32014 .nr(4)
32015 .kr(2)
32016 .sr(1)
32017 .m(m)
32018 .n(n)
32019 .k(k)
32020 .iterations(1)
32021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32022 }
32023 }
32024 }
32025 }
32026
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)32027 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
32028 for (size_t k = 1; k <= 40; k += 9) {
32029 GemmMicrokernelTester()
32030 .mr(4)
32031 .nr(4)
32032 .kr(2)
32033 .sr(1)
32034 .m(4)
32035 .n(4)
32036 .k(k)
32037 .ks(3)
32038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32039 }
32040 }
32041
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)32042 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
32043 for (size_t k = 1; k <= 40; k += 9) {
32044 for (uint32_t n = 1; n <= 4; n++) {
32045 for (uint32_t m = 1; m <= 4; m++) {
32046 GemmMicrokernelTester()
32047 .mr(4)
32048 .nr(4)
32049 .kr(2)
32050 .sr(1)
32051 .m(m)
32052 .n(n)
32053 .k(k)
32054 .ks(3)
32055 .iterations(1)
32056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32057 }
32058 }
32059 }
32060 }
32061
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)32062 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
32063 for (uint32_t n = 5; n < 8; n++) {
32064 for (size_t k = 1; k <= 40; k += 9) {
32065 GemmMicrokernelTester()
32066 .mr(4)
32067 .nr(4)
32068 .kr(2)
32069 .sr(1)
32070 .m(4)
32071 .n(n)
32072 .k(k)
32073 .ks(3)
32074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32075 }
32076 }
32077 }
32078
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)32079 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
32080 for (uint32_t n = 8; n <= 12; n += 4) {
32081 for (size_t k = 1; k <= 40; k += 9) {
32082 GemmMicrokernelTester()
32083 .mr(4)
32084 .nr(4)
32085 .kr(2)
32086 .sr(1)
32087 .m(4)
32088 .n(n)
32089 .k(k)
32090 .ks(3)
32091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32092 }
32093 }
32094 }
32095
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)32096 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
32097 for (size_t k = 1; k <= 40; k += 9) {
32098 for (uint32_t n = 1; n <= 4; n++) {
32099 for (uint32_t m = 1; m <= 4; m++) {
32100 GemmMicrokernelTester()
32101 .mr(4)
32102 .nr(4)
32103 .kr(2)
32104 .sr(1)
32105 .m(m)
32106 .n(n)
32107 .k(k)
32108 .cm_stride(7)
32109 .iterations(1)
32110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32111 }
32112 }
32113 }
32114 }
32115
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,a_offset)32116 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
32117 for (size_t k = 1; k <= 40; k += 9) {
32118 GemmMicrokernelTester()
32119 .mr(4)
32120 .nr(4)
32121 .kr(2)
32122 .sr(1)
32123 .m(4)
32124 .n(4)
32125 .k(k)
32126 .ks(3)
32127 .a_offset(163)
32128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32129 }
32130 }
32131
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,zero)32132 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, zero) {
32133 for (size_t k = 1; k <= 40; k += 9) {
32134 for (uint32_t mz = 0; mz < 4; mz++) {
32135 GemmMicrokernelTester()
32136 .mr(4)
32137 .nr(4)
32138 .kr(2)
32139 .sr(1)
32140 .m(4)
32141 .n(4)
32142 .k(k)
32143 .ks(3)
32144 .a_offset(163)
32145 .zero_index(mz)
32146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32147 }
32148 }
32149 }
32150
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)32151 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
32152 GemmMicrokernelTester()
32153 .mr(4)
32154 .nr(4)
32155 .kr(2)
32156 .sr(1)
32157 .m(4)
32158 .n(4)
32159 .k(8)
32160 .qmin(128)
32161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32162 }
32163
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)32164 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
32165 GemmMicrokernelTester()
32166 .mr(4)
32167 .nr(4)
32168 .kr(2)
32169 .sr(1)
32170 .m(4)
32171 .n(4)
32172 .k(8)
32173 .qmax(128)
32174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32175 }
32176
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)32177 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
32178 GemmMicrokernelTester()
32179 .mr(4)
32180 .nr(4)
32181 .kr(2)
32182 .sr(1)
32183 .m(4)
32184 .n(4)
32185 .k(8)
32186 .cm_stride(7)
32187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32188 }
32189
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)32190 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
32191 for (size_t k = 1; k <= 40; k += 9) {
32192 GemmMicrokernelTester()
32193 .mr(4)
32194 .nr(4)
32195 .kr(2)
32196 .sr(1)
32197 .m(4)
32198 .n(4)
32199 .k(k)
32200 .a_zero_point(0)
32201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32202 }
32203 }
32204
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)32205 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
32206 for (size_t k = 1; k <= 40; k += 9) {
32207 GemmMicrokernelTester()
32208 .mr(4)
32209 .nr(4)
32210 .kr(2)
32211 .sr(1)
32212 .m(4)
32213 .n(4)
32214 .k(k)
32215 .b_zero_point(0)
32216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32217 }
32218 }
32219
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)32220 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
32221 for (size_t k = 1; k <= 40; k += 9) {
32222 GemmMicrokernelTester()
32223 .mr(4)
32224 .nr(4)
32225 .kr(2)
32226 .sr(1)
32227 .m(4)
32228 .n(4)
32229 .k(k)
32230 .a_zero_point(0)
32231 .b_zero_point(0)
32232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32233 }
32234 }
32235 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32236
32237
32238 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1)32239 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
32240 GemmMicrokernelTester()
32241 .mr(3)
32242 .nr(2)
32243 .kr(1)
32244 .sr(1)
32245 .m(3)
32246 .n(2)
32247 .k(1)
32248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32249 }
32250
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cn)32251 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
32252 GemmMicrokernelTester()
32253 .mr(3)
32254 .nr(2)
32255 .kr(1)
32256 .sr(1)
32257 .m(3)
32258 .n(2)
32259 .k(1)
32260 .cn_stride(5)
32261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32262 }
32263
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile)32264 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
32265 for (uint32_t n = 1; n <= 2; n++) {
32266 for (uint32_t m = 1; m <= 3; m++) {
32267 GemmMicrokernelTester()
32268 .mr(3)
32269 .nr(2)
32270 .kr(1)
32271 .sr(1)
32272 .m(m)
32273 .n(n)
32274 .k(1)
32275 .iterations(1)
32276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32277 }
32278 }
32279 }
32280
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_m)32281 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
32282 for (uint32_t m = 1; m <= 3; m++) {
32283 GemmMicrokernelTester()
32284 .mr(3)
32285 .nr(2)
32286 .kr(1)
32287 .sr(1)
32288 .m(m)
32289 .n(2)
32290 .k(1)
32291 .iterations(1)
32292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32293 }
32294 }
32295
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_n)32296 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
32297 for (uint32_t n = 1; n <= 2; n++) {
32298 GemmMicrokernelTester()
32299 .mr(3)
32300 .nr(2)
32301 .kr(1)
32302 .sr(1)
32303 .m(3)
32304 .n(n)
32305 .k(1)
32306 .iterations(1)
32307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32308 }
32309 }
32310
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1)32311 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
32312 for (size_t k = 2; k < 10; k++) {
32313 GemmMicrokernelTester()
32314 .mr(3)
32315 .nr(2)
32316 .kr(1)
32317 .sr(1)
32318 .m(3)
32319 .n(2)
32320 .k(k)
32321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32322 }
32323 }
32324
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_subtile)32325 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
32326 for (size_t k = 2; k < 10; k++) {
32327 for (uint32_t n = 1; n <= 2; n++) {
32328 for (uint32_t m = 1; m <= 3; m++) {
32329 GemmMicrokernelTester()
32330 .mr(3)
32331 .nr(2)
32332 .kr(1)
32333 .sr(1)
32334 .m(m)
32335 .n(n)
32336 .k(k)
32337 .iterations(1)
32338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32339 }
32340 }
32341 }
32342 }
32343
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2)32344 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
32345 for (uint32_t n = 3; n < 4; n++) {
32346 for (size_t k = 1; k <= 5; k += 2) {
32347 GemmMicrokernelTester()
32348 .mr(3)
32349 .nr(2)
32350 .kr(1)
32351 .sr(1)
32352 .m(3)
32353 .n(n)
32354 .k(k)
32355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32356 }
32357 }
32358 }
32359
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_cn)32360 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
32361 for (uint32_t n = 3; n < 4; n++) {
32362 for (size_t k = 1; k <= 5; k += 2) {
32363 GemmMicrokernelTester()
32364 .mr(3)
32365 .nr(2)
32366 .kr(1)
32367 .sr(1)
32368 .m(3)
32369 .n(n)
32370 .k(k)
32371 .cn_stride(5)
32372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32373 }
32374 }
32375 }
32376
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_subtile)32377 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
32378 for (uint32_t n = 3; n < 4; n++) {
32379 for (size_t k = 1; k <= 5; k += 2) {
32380 for (uint32_t m = 1; m <= 3; m++) {
32381 GemmMicrokernelTester()
32382 .mr(3)
32383 .nr(2)
32384 .kr(1)
32385 .sr(1)
32386 .m(m)
32387 .n(n)
32388 .k(k)
32389 .iterations(1)
32390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32391 }
32392 }
32393 }
32394 }
32395
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2)32396 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
32397 for (uint32_t n = 4; n <= 6; n += 2) {
32398 for (size_t k = 1; k <= 5; k += 2) {
32399 GemmMicrokernelTester()
32400 .mr(3)
32401 .nr(2)
32402 .kr(1)
32403 .sr(1)
32404 .m(3)
32405 .n(n)
32406 .k(k)
32407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32408 }
32409 }
32410 }
32411
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_cn)32412 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
32413 for (uint32_t n = 4; n <= 6; n += 2) {
32414 for (size_t k = 1; k <= 5; k += 2) {
32415 GemmMicrokernelTester()
32416 .mr(3)
32417 .nr(2)
32418 .kr(1)
32419 .sr(1)
32420 .m(3)
32421 .n(n)
32422 .k(k)
32423 .cn_stride(5)
32424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32425 }
32426 }
32427 }
32428
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_subtile)32429 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
32430 for (uint32_t n = 4; n <= 6; n += 2) {
32431 for (size_t k = 1; k <= 5; k += 2) {
32432 for (uint32_t m = 1; m <= 3; m++) {
32433 GemmMicrokernelTester()
32434 .mr(3)
32435 .nr(2)
32436 .kr(1)
32437 .sr(1)
32438 .m(m)
32439 .n(n)
32440 .k(k)
32441 .iterations(1)
32442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32443 }
32444 }
32445 }
32446 }
32447
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel)32448 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel) {
32449 for (size_t k = 1; k <= 5; k += 2) {
32450 GemmMicrokernelTester()
32451 .mr(3)
32452 .nr(2)
32453 .kr(1)
32454 .sr(1)
32455 .m(3)
32456 .n(2)
32457 .k(k)
32458 .ks(3)
32459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32460 }
32461 }
32462
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel_subtile)32463 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel_subtile) {
32464 for (size_t k = 1; k <= 5; k += 2) {
32465 for (uint32_t n = 1; n <= 2; n++) {
32466 for (uint32_t m = 1; m <= 3; m++) {
32467 GemmMicrokernelTester()
32468 .mr(3)
32469 .nr(2)
32470 .kr(1)
32471 .sr(1)
32472 .m(m)
32473 .n(n)
32474 .k(k)
32475 .ks(3)
32476 .iterations(1)
32477 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32478 }
32479 }
32480 }
32481 }
32482
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_small_kernel)32483 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_small_kernel) {
32484 for (uint32_t n = 3; n < 4; n++) {
32485 for (size_t k = 1; k <= 5; k += 2) {
32486 GemmMicrokernelTester()
32487 .mr(3)
32488 .nr(2)
32489 .kr(1)
32490 .sr(1)
32491 .m(3)
32492 .n(n)
32493 .k(k)
32494 .ks(3)
32495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32496 }
32497 }
32498 }
32499
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_small_kernel)32500 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_small_kernel) {
32501 for (uint32_t n = 4; n <= 6; n += 2) {
32502 for (size_t k = 1; k <= 5; k += 2) {
32503 GemmMicrokernelTester()
32504 .mr(3)
32505 .nr(2)
32506 .kr(1)
32507 .sr(1)
32508 .m(3)
32509 .n(n)
32510 .k(k)
32511 .ks(3)
32512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32513 }
32514 }
32515 }
32516
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm_subtile)32517 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
32518 for (size_t k = 1; k <= 5; k += 2) {
32519 for (uint32_t n = 1; n <= 2; n++) {
32520 for (uint32_t m = 1; m <= 3; m++) {
32521 GemmMicrokernelTester()
32522 .mr(3)
32523 .nr(2)
32524 .kr(1)
32525 .sr(1)
32526 .m(m)
32527 .n(n)
32528 .k(k)
32529 .cm_stride(5)
32530 .iterations(1)
32531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32532 }
32533 }
32534 }
32535 }
32536
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,a_offset)32537 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, a_offset) {
32538 for (size_t k = 1; k <= 5; k += 2) {
32539 GemmMicrokernelTester()
32540 .mr(3)
32541 .nr(2)
32542 .kr(1)
32543 .sr(1)
32544 .m(3)
32545 .n(2)
32546 .k(k)
32547 .ks(3)
32548 .a_offset(17)
32549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32550 }
32551 }
32552
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,zero)32553 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, zero) {
32554 for (size_t k = 1; k <= 5; k += 2) {
32555 for (uint32_t mz = 0; mz < 3; mz++) {
32556 GemmMicrokernelTester()
32557 .mr(3)
32558 .nr(2)
32559 .kr(1)
32560 .sr(1)
32561 .m(3)
32562 .n(2)
32563 .k(k)
32564 .ks(3)
32565 .a_offset(17)
32566 .zero_index(mz)
32567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32568 }
32569 }
32570 }
32571
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmin)32572 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
32573 GemmMicrokernelTester()
32574 .mr(3)
32575 .nr(2)
32576 .kr(1)
32577 .sr(1)
32578 .m(3)
32579 .n(2)
32580 .k(1)
32581 .qmin(128)
32582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32583 }
32584
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmax)32585 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
32586 GemmMicrokernelTester()
32587 .mr(3)
32588 .nr(2)
32589 .kr(1)
32590 .sr(1)
32591 .m(3)
32592 .n(2)
32593 .k(1)
32594 .qmax(128)
32595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32596 }
32597
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm)32598 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
32599 GemmMicrokernelTester()
32600 .mr(3)
32601 .nr(2)
32602 .kr(1)
32603 .sr(1)
32604 .m(3)
32605 .n(2)
32606 .k(1)
32607 .cm_stride(5)
32608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32609 }
32610
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_a_zero_point)32611 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_a_zero_point) {
32612 for (size_t k = 1; k <= 5; k += 2) {
32613 GemmMicrokernelTester()
32614 .mr(3)
32615 .nr(2)
32616 .kr(1)
32617 .sr(1)
32618 .m(3)
32619 .n(2)
32620 .k(k)
32621 .a_zero_point(0)
32622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32623 }
32624 }
32625
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_b_zero_point)32626 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_b_zero_point) {
32627 for (size_t k = 1; k <= 5; k += 2) {
32628 GemmMicrokernelTester()
32629 .mr(3)
32630 .nr(2)
32631 .kr(1)
32632 .sr(1)
32633 .m(3)
32634 .n(2)
32635 .k(k)
32636 .b_zero_point(0)
32637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32638 }
32639 }
32640
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_zero_point)32641 TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_zero_point) {
32642 for (size_t k = 1; k <= 5; k += 2) {
32643 GemmMicrokernelTester()
32644 .mr(3)
32645 .nr(2)
32646 .kr(1)
32647 .sr(1)
32648 .m(3)
32649 .n(2)
32650 .k(k)
32651 .a_zero_point(0)
32652 .b_zero_point(0)
32653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32654 }
32655 }
32656 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32657
32658
32659 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1)32660 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
32661 GemmMicrokernelTester()
32662 .mr(3)
32663 .nr(4)
32664 .kr(1)
32665 .sr(1)
32666 .m(3)
32667 .n(4)
32668 .k(1)
32669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32670 }
32671
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cn)32672 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
32673 GemmMicrokernelTester()
32674 .mr(3)
32675 .nr(4)
32676 .kr(1)
32677 .sr(1)
32678 .m(3)
32679 .n(4)
32680 .k(1)
32681 .cn_stride(7)
32682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32683 }
32684
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile)32685 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
32686 for (uint32_t n = 1; n <= 4; n++) {
32687 for (uint32_t m = 1; m <= 3; m++) {
32688 GemmMicrokernelTester()
32689 .mr(3)
32690 .nr(4)
32691 .kr(1)
32692 .sr(1)
32693 .m(m)
32694 .n(n)
32695 .k(1)
32696 .iterations(1)
32697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32698 }
32699 }
32700 }
32701
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_m)32702 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
32703 for (uint32_t m = 1; m <= 3; m++) {
32704 GemmMicrokernelTester()
32705 .mr(3)
32706 .nr(4)
32707 .kr(1)
32708 .sr(1)
32709 .m(m)
32710 .n(4)
32711 .k(1)
32712 .iterations(1)
32713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32714 }
32715 }
32716
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_n)32717 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
32718 for (uint32_t n = 1; n <= 4; n++) {
32719 GemmMicrokernelTester()
32720 .mr(3)
32721 .nr(4)
32722 .kr(1)
32723 .sr(1)
32724 .m(3)
32725 .n(n)
32726 .k(1)
32727 .iterations(1)
32728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32729 }
32730 }
32731
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1)32732 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
32733 for (size_t k = 2; k < 10; k++) {
32734 GemmMicrokernelTester()
32735 .mr(3)
32736 .nr(4)
32737 .kr(1)
32738 .sr(1)
32739 .m(3)
32740 .n(4)
32741 .k(k)
32742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32743 }
32744 }
32745
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_subtile)32746 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
32747 for (size_t k = 2; k < 10; k++) {
32748 for (uint32_t n = 1; n <= 4; n++) {
32749 for (uint32_t m = 1; m <= 3; m++) {
32750 GemmMicrokernelTester()
32751 .mr(3)
32752 .nr(4)
32753 .kr(1)
32754 .sr(1)
32755 .m(m)
32756 .n(n)
32757 .k(k)
32758 .iterations(1)
32759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32760 }
32761 }
32762 }
32763 }
32764
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4)32765 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
32766 for (uint32_t n = 5; n < 8; n++) {
32767 for (size_t k = 1; k <= 5; k += 2) {
32768 GemmMicrokernelTester()
32769 .mr(3)
32770 .nr(4)
32771 .kr(1)
32772 .sr(1)
32773 .m(3)
32774 .n(n)
32775 .k(k)
32776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32777 }
32778 }
32779 }
32780
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_cn)32781 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
32782 for (uint32_t n = 5; n < 8; n++) {
32783 for (size_t k = 1; k <= 5; k += 2) {
32784 GemmMicrokernelTester()
32785 .mr(3)
32786 .nr(4)
32787 .kr(1)
32788 .sr(1)
32789 .m(3)
32790 .n(n)
32791 .k(k)
32792 .cn_stride(7)
32793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32794 }
32795 }
32796 }
32797
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_subtile)32798 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
32799 for (uint32_t n = 5; n < 8; n++) {
32800 for (size_t k = 1; k <= 5; k += 2) {
32801 for (uint32_t m = 1; m <= 3; m++) {
32802 GemmMicrokernelTester()
32803 .mr(3)
32804 .nr(4)
32805 .kr(1)
32806 .sr(1)
32807 .m(m)
32808 .n(n)
32809 .k(k)
32810 .iterations(1)
32811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32812 }
32813 }
32814 }
32815 }
32816
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4)32817 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
32818 for (uint32_t n = 8; n <= 12; n += 4) {
32819 for (size_t k = 1; k <= 5; k += 2) {
32820 GemmMicrokernelTester()
32821 .mr(3)
32822 .nr(4)
32823 .kr(1)
32824 .sr(1)
32825 .m(3)
32826 .n(n)
32827 .k(k)
32828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32829 }
32830 }
32831 }
32832
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_cn)32833 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
32834 for (uint32_t n = 8; n <= 12; n += 4) {
32835 for (size_t k = 1; k <= 5; k += 2) {
32836 GemmMicrokernelTester()
32837 .mr(3)
32838 .nr(4)
32839 .kr(1)
32840 .sr(1)
32841 .m(3)
32842 .n(n)
32843 .k(k)
32844 .cn_stride(7)
32845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32846 }
32847 }
32848 }
32849
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_subtile)32850 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
32851 for (uint32_t n = 8; n <= 12; n += 4) {
32852 for (size_t k = 1; k <= 5; k += 2) {
32853 for (uint32_t m = 1; m <= 3; m++) {
32854 GemmMicrokernelTester()
32855 .mr(3)
32856 .nr(4)
32857 .kr(1)
32858 .sr(1)
32859 .m(m)
32860 .n(n)
32861 .k(k)
32862 .iterations(1)
32863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32864 }
32865 }
32866 }
32867 }
32868
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel)32869 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel) {
32870 for (size_t k = 1; k <= 5; k += 2) {
32871 GemmMicrokernelTester()
32872 .mr(3)
32873 .nr(4)
32874 .kr(1)
32875 .sr(1)
32876 .m(3)
32877 .n(4)
32878 .k(k)
32879 .ks(3)
32880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32881 }
32882 }
32883
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel_subtile)32884 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel_subtile) {
32885 for (size_t k = 1; k <= 5; k += 2) {
32886 for (uint32_t n = 1; n <= 4; n++) {
32887 for (uint32_t m = 1; m <= 3; m++) {
32888 GemmMicrokernelTester()
32889 .mr(3)
32890 .nr(4)
32891 .kr(1)
32892 .sr(1)
32893 .m(m)
32894 .n(n)
32895 .k(k)
32896 .ks(3)
32897 .iterations(1)
32898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32899 }
32900 }
32901 }
32902 }
32903
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_small_kernel)32904 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_small_kernel) {
32905 for (uint32_t n = 5; n < 8; n++) {
32906 for (size_t k = 1; k <= 5; k += 2) {
32907 GemmMicrokernelTester()
32908 .mr(3)
32909 .nr(4)
32910 .kr(1)
32911 .sr(1)
32912 .m(3)
32913 .n(n)
32914 .k(k)
32915 .ks(3)
32916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32917 }
32918 }
32919 }
32920
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_small_kernel)32921 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_small_kernel) {
32922 for (uint32_t n = 8; n <= 12; n += 4) {
32923 for (size_t k = 1; k <= 5; k += 2) {
32924 GemmMicrokernelTester()
32925 .mr(3)
32926 .nr(4)
32927 .kr(1)
32928 .sr(1)
32929 .m(3)
32930 .n(n)
32931 .k(k)
32932 .ks(3)
32933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32934 }
32935 }
32936 }
32937
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm_subtile)32938 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
32939 for (size_t k = 1; k <= 5; k += 2) {
32940 for (uint32_t n = 1; n <= 4; n++) {
32941 for (uint32_t m = 1; m <= 3; m++) {
32942 GemmMicrokernelTester()
32943 .mr(3)
32944 .nr(4)
32945 .kr(1)
32946 .sr(1)
32947 .m(m)
32948 .n(n)
32949 .k(k)
32950 .cm_stride(7)
32951 .iterations(1)
32952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32953 }
32954 }
32955 }
32956 }
32957
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,a_offset)32958 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, a_offset) {
32959 for (size_t k = 1; k <= 5; k += 2) {
32960 GemmMicrokernelTester()
32961 .mr(3)
32962 .nr(4)
32963 .kr(1)
32964 .sr(1)
32965 .m(3)
32966 .n(4)
32967 .k(k)
32968 .ks(3)
32969 .a_offset(17)
32970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32971 }
32972 }
32973
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,zero)32974 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, zero) {
32975 for (size_t k = 1; k <= 5; k += 2) {
32976 for (uint32_t mz = 0; mz < 3; mz++) {
32977 GemmMicrokernelTester()
32978 .mr(3)
32979 .nr(4)
32980 .kr(1)
32981 .sr(1)
32982 .m(3)
32983 .n(4)
32984 .k(k)
32985 .ks(3)
32986 .a_offset(17)
32987 .zero_index(mz)
32988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32989 }
32990 }
32991 }
32992
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmin)32993 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
32994 GemmMicrokernelTester()
32995 .mr(3)
32996 .nr(4)
32997 .kr(1)
32998 .sr(1)
32999 .m(3)
33000 .n(4)
33001 .k(1)
33002 .qmin(128)
33003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33004 }
33005
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmax)33006 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
33007 GemmMicrokernelTester()
33008 .mr(3)
33009 .nr(4)
33010 .kr(1)
33011 .sr(1)
33012 .m(3)
33013 .n(4)
33014 .k(1)
33015 .qmax(128)
33016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33017 }
33018
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm)33019 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
33020 GemmMicrokernelTester()
33021 .mr(3)
33022 .nr(4)
33023 .kr(1)
33024 .sr(1)
33025 .m(3)
33026 .n(4)
33027 .k(1)
33028 .cm_stride(7)
33029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33030 }
33031
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_a_zero_point)33032 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_a_zero_point) {
33033 for (size_t k = 1; k <= 5; k += 2) {
33034 GemmMicrokernelTester()
33035 .mr(3)
33036 .nr(4)
33037 .kr(1)
33038 .sr(1)
33039 .m(3)
33040 .n(4)
33041 .k(k)
33042 .a_zero_point(0)
33043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33044 }
33045 }
33046
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_b_zero_point)33047 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_b_zero_point) {
33048 for (size_t k = 1; k <= 5; k += 2) {
33049 GemmMicrokernelTester()
33050 .mr(3)
33051 .nr(4)
33052 .kr(1)
33053 .sr(1)
33054 .m(3)
33055 .n(4)
33056 .k(k)
33057 .b_zero_point(0)
33058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33059 }
33060 }
33061
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_zero_point)33062 TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_zero_point) {
33063 for (size_t k = 1; k <= 5; k += 2) {
33064 GemmMicrokernelTester()
33065 .mr(3)
33066 .nr(4)
33067 .kr(1)
33068 .sr(1)
33069 .m(3)
33070 .n(4)
33071 .k(k)
33072 .a_zero_point(0)
33073 .b_zero_point(0)
33074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33075 }
33076 }
33077 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33078
33079
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)33080 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
33081 GemmMicrokernelTester()
33082 .mr(1)
33083 .nr(2)
33084 .kr(1)
33085 .sr(1)
33086 .m(1)
33087 .n(2)
33088 .k(1)
33089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33090 }
33091
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)33092 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
33093 GemmMicrokernelTester()
33094 .mr(1)
33095 .nr(2)
33096 .kr(1)
33097 .sr(1)
33098 .m(1)
33099 .n(2)
33100 .k(1)
33101 .cn_stride(5)
33102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33103 }
33104
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)33105 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
33106 for (uint32_t n = 1; n <= 2; n++) {
33107 for (uint32_t m = 1; m <= 1; m++) {
33108 GemmMicrokernelTester()
33109 .mr(1)
33110 .nr(2)
33111 .kr(1)
33112 .sr(1)
33113 .m(m)
33114 .n(n)
33115 .k(1)
33116 .iterations(1)
33117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33118 }
33119 }
33120 }
33121
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)33122 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33123 for (uint32_t m = 1; m <= 1; m++) {
33124 GemmMicrokernelTester()
33125 .mr(1)
33126 .nr(2)
33127 .kr(1)
33128 .sr(1)
33129 .m(m)
33130 .n(2)
33131 .k(1)
33132 .iterations(1)
33133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33134 }
33135 }
33136
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)33137 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33138 for (uint32_t n = 1; n <= 2; n++) {
33139 GemmMicrokernelTester()
33140 .mr(1)
33141 .nr(2)
33142 .kr(1)
33143 .sr(1)
33144 .m(1)
33145 .n(n)
33146 .k(1)
33147 .iterations(1)
33148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33149 }
33150 }
33151
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)33152 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
33153 for (size_t k = 2; k < 10; k++) {
33154 GemmMicrokernelTester()
33155 .mr(1)
33156 .nr(2)
33157 .kr(1)
33158 .sr(1)
33159 .m(1)
33160 .n(2)
33161 .k(k)
33162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33163 }
33164 }
33165
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)33166 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
33167 for (size_t k = 2; k < 10; k++) {
33168 for (uint32_t n = 1; n <= 2; n++) {
33169 for (uint32_t m = 1; m <= 1; m++) {
33170 GemmMicrokernelTester()
33171 .mr(1)
33172 .nr(2)
33173 .kr(1)
33174 .sr(1)
33175 .m(m)
33176 .n(n)
33177 .k(k)
33178 .iterations(1)
33179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33180 }
33181 }
33182 }
33183 }
33184
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)33185 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
33186 for (uint32_t n = 3; n < 4; n++) {
33187 for (size_t k = 1; k <= 5; k += 2) {
33188 GemmMicrokernelTester()
33189 .mr(1)
33190 .nr(2)
33191 .kr(1)
33192 .sr(1)
33193 .m(1)
33194 .n(n)
33195 .k(k)
33196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33197 }
33198 }
33199 }
33200
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)33201 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
33202 for (uint32_t n = 3; n < 4; n++) {
33203 for (size_t k = 1; k <= 5; k += 2) {
33204 GemmMicrokernelTester()
33205 .mr(1)
33206 .nr(2)
33207 .kr(1)
33208 .sr(1)
33209 .m(1)
33210 .n(n)
33211 .k(k)
33212 .cn_stride(5)
33213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33214 }
33215 }
33216 }
33217
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)33218 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
33219 for (uint32_t n = 3; n < 4; n++) {
33220 for (size_t k = 1; k <= 5; k += 2) {
33221 for (uint32_t m = 1; m <= 1; m++) {
33222 GemmMicrokernelTester()
33223 .mr(1)
33224 .nr(2)
33225 .kr(1)
33226 .sr(1)
33227 .m(m)
33228 .n(n)
33229 .k(k)
33230 .iterations(1)
33231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33232 }
33233 }
33234 }
33235 }
33236
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)33237 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
33238 for (uint32_t n = 4; n <= 6; n += 2) {
33239 for (size_t k = 1; k <= 5; k += 2) {
33240 GemmMicrokernelTester()
33241 .mr(1)
33242 .nr(2)
33243 .kr(1)
33244 .sr(1)
33245 .m(1)
33246 .n(n)
33247 .k(k)
33248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33249 }
33250 }
33251 }
33252
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)33253 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
33254 for (uint32_t n = 4; n <= 6; n += 2) {
33255 for (size_t k = 1; k <= 5; k += 2) {
33256 GemmMicrokernelTester()
33257 .mr(1)
33258 .nr(2)
33259 .kr(1)
33260 .sr(1)
33261 .m(1)
33262 .n(n)
33263 .k(k)
33264 .cn_stride(5)
33265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33266 }
33267 }
33268 }
33269
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)33270 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
33271 for (uint32_t n = 4; n <= 6; n += 2) {
33272 for (size_t k = 1; k <= 5; k += 2) {
33273 for (uint32_t m = 1; m <= 1; m++) {
33274 GemmMicrokernelTester()
33275 .mr(1)
33276 .nr(2)
33277 .kr(1)
33278 .sr(1)
33279 .m(m)
33280 .n(n)
33281 .k(k)
33282 .iterations(1)
33283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33284 }
33285 }
33286 }
33287 }
33288
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel)33289 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
33290 for (size_t k = 1; k <= 5; k += 2) {
33291 GemmMicrokernelTester()
33292 .mr(1)
33293 .nr(2)
33294 .kr(1)
33295 .sr(1)
33296 .m(1)
33297 .n(2)
33298 .k(k)
33299 .ks(3)
33300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33301 }
33302 }
33303
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel_subtile)33304 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
33305 for (size_t k = 1; k <= 5; k += 2) {
33306 for (uint32_t n = 1; n <= 2; n++) {
33307 for (uint32_t m = 1; m <= 1; m++) {
33308 GemmMicrokernelTester()
33309 .mr(1)
33310 .nr(2)
33311 .kr(1)
33312 .sr(1)
33313 .m(m)
33314 .n(n)
33315 .k(k)
33316 .ks(3)
33317 .iterations(1)
33318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33319 }
33320 }
33321 }
33322 }
33323
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_small_kernel)33324 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
33325 for (uint32_t n = 3; n < 4; n++) {
33326 for (size_t k = 1; k <= 5; k += 2) {
33327 GemmMicrokernelTester()
33328 .mr(1)
33329 .nr(2)
33330 .kr(1)
33331 .sr(1)
33332 .m(1)
33333 .n(n)
33334 .k(k)
33335 .ks(3)
33336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33337 }
33338 }
33339 }
33340
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_small_kernel)33341 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
33342 for (uint32_t n = 4; n <= 6; n += 2) {
33343 for (size_t k = 1; k <= 5; k += 2) {
33344 GemmMicrokernelTester()
33345 .mr(1)
33346 .nr(2)
33347 .kr(1)
33348 .sr(1)
33349 .m(1)
33350 .n(n)
33351 .k(k)
33352 .ks(3)
33353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33354 }
33355 }
33356 }
33357
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)33358 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
33359 for (size_t k = 1; k <= 5; k += 2) {
33360 for (uint32_t n = 1; n <= 2; n++) {
33361 for (uint32_t m = 1; m <= 1; m++) {
33362 GemmMicrokernelTester()
33363 .mr(1)
33364 .nr(2)
33365 .kr(1)
33366 .sr(1)
33367 .m(m)
33368 .n(n)
33369 .k(k)
33370 .cm_stride(5)
33371 .iterations(1)
33372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33373 }
33374 }
33375 }
33376 }
33377
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,a_offset)33378 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
33379 for (size_t k = 1; k <= 5; k += 2) {
33380 GemmMicrokernelTester()
33381 .mr(1)
33382 .nr(2)
33383 .kr(1)
33384 .sr(1)
33385 .m(1)
33386 .n(2)
33387 .k(k)
33388 .ks(3)
33389 .a_offset(7)
33390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33391 }
33392 }
33393
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,zero)33394 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
33395 for (size_t k = 1; k <= 5; k += 2) {
33396 for (uint32_t mz = 0; mz < 1; mz++) {
33397 GemmMicrokernelTester()
33398 .mr(1)
33399 .nr(2)
33400 .kr(1)
33401 .sr(1)
33402 .m(1)
33403 .n(2)
33404 .k(k)
33405 .ks(3)
33406 .a_offset(7)
33407 .zero_index(mz)
33408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33409 }
33410 }
33411 }
33412
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)33413 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
33414 GemmMicrokernelTester()
33415 .mr(1)
33416 .nr(2)
33417 .kr(1)
33418 .sr(1)
33419 .m(1)
33420 .n(2)
33421 .k(1)
33422 .qmin(128)
33423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33424 }
33425
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)33426 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
33427 GemmMicrokernelTester()
33428 .mr(1)
33429 .nr(2)
33430 .kr(1)
33431 .sr(1)
33432 .m(1)
33433 .n(2)
33434 .k(1)
33435 .qmax(128)
33436 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33437 }
33438
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)33439 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
33440 GemmMicrokernelTester()
33441 .mr(1)
33442 .nr(2)
33443 .kr(1)
33444 .sr(1)
33445 .m(1)
33446 .n(2)
33447 .k(1)
33448 .cm_stride(5)
33449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33450 }
33451
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_a_zero_point)33452 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_a_zero_point) {
33453 for (size_t k = 1; k <= 5; k += 2) {
33454 GemmMicrokernelTester()
33455 .mr(1)
33456 .nr(2)
33457 .kr(1)
33458 .sr(1)
33459 .m(1)
33460 .n(2)
33461 .k(k)
33462 .a_zero_point(0)
33463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33464 }
33465 }
33466
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_b_zero_point)33467 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_b_zero_point) {
33468 for (size_t k = 1; k <= 5; k += 2) {
33469 GemmMicrokernelTester()
33470 .mr(1)
33471 .nr(2)
33472 .kr(1)
33473 .sr(1)
33474 .m(1)
33475 .n(2)
33476 .k(k)
33477 .b_zero_point(0)
33478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33479 }
33480 }
33481
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_zero_point)33482 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_zero_point) {
33483 for (size_t k = 1; k <= 5; k += 2) {
33484 GemmMicrokernelTester()
33485 .mr(1)
33486 .nr(2)
33487 .kr(1)
33488 .sr(1)
33489 .m(1)
33490 .n(2)
33491 .k(k)
33492 .a_zero_point(0)
33493 .b_zero_point(0)
33494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33495 }
33496 }
33497
33498
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)33499 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
33500 GemmMicrokernelTester()
33501 .mr(1)
33502 .nr(4)
33503 .kr(1)
33504 .sr(1)
33505 .m(1)
33506 .n(4)
33507 .k(1)
33508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33509 }
33510
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)33511 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
33512 GemmMicrokernelTester()
33513 .mr(1)
33514 .nr(4)
33515 .kr(1)
33516 .sr(1)
33517 .m(1)
33518 .n(4)
33519 .k(1)
33520 .cn_stride(7)
33521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33522 }
33523
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)33524 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
33525 for (uint32_t n = 1; n <= 4; n++) {
33526 for (uint32_t m = 1; m <= 1; m++) {
33527 GemmMicrokernelTester()
33528 .mr(1)
33529 .nr(4)
33530 .kr(1)
33531 .sr(1)
33532 .m(m)
33533 .n(n)
33534 .k(1)
33535 .iterations(1)
33536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33537 }
33538 }
33539 }
33540
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)33541 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33542 for (uint32_t m = 1; m <= 1; m++) {
33543 GemmMicrokernelTester()
33544 .mr(1)
33545 .nr(4)
33546 .kr(1)
33547 .sr(1)
33548 .m(m)
33549 .n(4)
33550 .k(1)
33551 .iterations(1)
33552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33553 }
33554 }
33555
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)33556 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33557 for (uint32_t n = 1; n <= 4; n++) {
33558 GemmMicrokernelTester()
33559 .mr(1)
33560 .nr(4)
33561 .kr(1)
33562 .sr(1)
33563 .m(1)
33564 .n(n)
33565 .k(1)
33566 .iterations(1)
33567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33568 }
33569 }
33570
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)33571 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
33572 for (size_t k = 2; k < 10; k++) {
33573 GemmMicrokernelTester()
33574 .mr(1)
33575 .nr(4)
33576 .kr(1)
33577 .sr(1)
33578 .m(1)
33579 .n(4)
33580 .k(k)
33581 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33582 }
33583 }
33584
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)33585 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
33586 for (size_t k = 2; k < 10; k++) {
33587 for (uint32_t n = 1; n <= 4; n++) {
33588 for (uint32_t m = 1; m <= 1; m++) {
33589 GemmMicrokernelTester()
33590 .mr(1)
33591 .nr(4)
33592 .kr(1)
33593 .sr(1)
33594 .m(m)
33595 .n(n)
33596 .k(k)
33597 .iterations(1)
33598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33599 }
33600 }
33601 }
33602 }
33603
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)33604 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
33605 for (uint32_t n = 5; n < 8; n++) {
33606 for (size_t k = 1; k <= 5; k += 2) {
33607 GemmMicrokernelTester()
33608 .mr(1)
33609 .nr(4)
33610 .kr(1)
33611 .sr(1)
33612 .m(1)
33613 .n(n)
33614 .k(k)
33615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33616 }
33617 }
33618 }
33619
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)33620 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
33621 for (uint32_t n = 5; n < 8; n++) {
33622 for (size_t k = 1; k <= 5; k += 2) {
33623 GemmMicrokernelTester()
33624 .mr(1)
33625 .nr(4)
33626 .kr(1)
33627 .sr(1)
33628 .m(1)
33629 .n(n)
33630 .k(k)
33631 .cn_stride(7)
33632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33633 }
33634 }
33635 }
33636
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)33637 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
33638 for (uint32_t n = 5; n < 8; n++) {
33639 for (size_t k = 1; k <= 5; k += 2) {
33640 for (uint32_t m = 1; m <= 1; m++) {
33641 GemmMicrokernelTester()
33642 .mr(1)
33643 .nr(4)
33644 .kr(1)
33645 .sr(1)
33646 .m(m)
33647 .n(n)
33648 .k(k)
33649 .iterations(1)
33650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33651 }
33652 }
33653 }
33654 }
33655
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)33656 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
33657 for (uint32_t n = 8; n <= 12; n += 4) {
33658 for (size_t k = 1; k <= 5; k += 2) {
33659 GemmMicrokernelTester()
33660 .mr(1)
33661 .nr(4)
33662 .kr(1)
33663 .sr(1)
33664 .m(1)
33665 .n(n)
33666 .k(k)
33667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33668 }
33669 }
33670 }
33671
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)33672 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
33673 for (uint32_t n = 8; n <= 12; n += 4) {
33674 for (size_t k = 1; k <= 5; k += 2) {
33675 GemmMicrokernelTester()
33676 .mr(1)
33677 .nr(4)
33678 .kr(1)
33679 .sr(1)
33680 .m(1)
33681 .n(n)
33682 .k(k)
33683 .cn_stride(7)
33684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33685 }
33686 }
33687 }
33688
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)33689 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
33690 for (uint32_t n = 8; n <= 12; n += 4) {
33691 for (size_t k = 1; k <= 5; k += 2) {
33692 for (uint32_t m = 1; m <= 1; m++) {
33693 GemmMicrokernelTester()
33694 .mr(1)
33695 .nr(4)
33696 .kr(1)
33697 .sr(1)
33698 .m(m)
33699 .n(n)
33700 .k(k)
33701 .iterations(1)
33702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33703 }
33704 }
33705 }
33706 }
33707
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)33708 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
33709 for (size_t k = 1; k <= 5; k += 2) {
33710 GemmMicrokernelTester()
33711 .mr(1)
33712 .nr(4)
33713 .kr(1)
33714 .sr(1)
33715 .m(1)
33716 .n(4)
33717 .k(k)
33718 .ks(3)
33719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33720 }
33721 }
33722
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)33723 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
33724 for (size_t k = 1; k <= 5; k += 2) {
33725 for (uint32_t n = 1; n <= 4; n++) {
33726 for (uint32_t m = 1; m <= 1; m++) {
33727 GemmMicrokernelTester()
33728 .mr(1)
33729 .nr(4)
33730 .kr(1)
33731 .sr(1)
33732 .m(m)
33733 .n(n)
33734 .k(k)
33735 .ks(3)
33736 .iterations(1)
33737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33738 }
33739 }
33740 }
33741 }
33742
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)33743 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
33744 for (uint32_t n = 5; n < 8; n++) {
33745 for (size_t k = 1; k <= 5; k += 2) {
33746 GemmMicrokernelTester()
33747 .mr(1)
33748 .nr(4)
33749 .kr(1)
33750 .sr(1)
33751 .m(1)
33752 .n(n)
33753 .k(k)
33754 .ks(3)
33755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33756 }
33757 }
33758 }
33759
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)33760 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
33761 for (uint32_t n = 8; n <= 12; n += 4) {
33762 for (size_t k = 1; k <= 5; k += 2) {
33763 GemmMicrokernelTester()
33764 .mr(1)
33765 .nr(4)
33766 .kr(1)
33767 .sr(1)
33768 .m(1)
33769 .n(n)
33770 .k(k)
33771 .ks(3)
33772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33773 }
33774 }
33775 }
33776
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)33777 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
33778 for (size_t k = 1; k <= 5; k += 2) {
33779 for (uint32_t n = 1; n <= 4; n++) {
33780 for (uint32_t m = 1; m <= 1; m++) {
33781 GemmMicrokernelTester()
33782 .mr(1)
33783 .nr(4)
33784 .kr(1)
33785 .sr(1)
33786 .m(m)
33787 .n(n)
33788 .k(k)
33789 .cm_stride(7)
33790 .iterations(1)
33791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33792 }
33793 }
33794 }
33795 }
33796
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)33797 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
33798 for (size_t k = 1; k <= 5; k += 2) {
33799 GemmMicrokernelTester()
33800 .mr(1)
33801 .nr(4)
33802 .kr(1)
33803 .sr(1)
33804 .m(1)
33805 .n(4)
33806 .k(k)
33807 .ks(3)
33808 .a_offset(7)
33809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33810 }
33811 }
33812
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)33813 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
33814 for (size_t k = 1; k <= 5; k += 2) {
33815 for (uint32_t mz = 0; mz < 1; mz++) {
33816 GemmMicrokernelTester()
33817 .mr(1)
33818 .nr(4)
33819 .kr(1)
33820 .sr(1)
33821 .m(1)
33822 .n(4)
33823 .k(k)
33824 .ks(3)
33825 .a_offset(7)
33826 .zero_index(mz)
33827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33828 }
33829 }
33830 }
33831
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)33832 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
33833 GemmMicrokernelTester()
33834 .mr(1)
33835 .nr(4)
33836 .kr(1)
33837 .sr(1)
33838 .m(1)
33839 .n(4)
33840 .k(1)
33841 .qmin(128)
33842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33843 }
33844
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)33845 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
33846 GemmMicrokernelTester()
33847 .mr(1)
33848 .nr(4)
33849 .kr(1)
33850 .sr(1)
33851 .m(1)
33852 .n(4)
33853 .k(1)
33854 .qmax(128)
33855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33856 }
33857
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)33858 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
33859 GemmMicrokernelTester()
33860 .mr(1)
33861 .nr(4)
33862 .kr(1)
33863 .sr(1)
33864 .m(1)
33865 .n(4)
33866 .k(1)
33867 .cm_stride(7)
33868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33869 }
33870
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_a_zero_point)33871 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_a_zero_point) {
33872 for (size_t k = 1; k <= 5; k += 2) {
33873 GemmMicrokernelTester()
33874 .mr(1)
33875 .nr(4)
33876 .kr(1)
33877 .sr(1)
33878 .m(1)
33879 .n(4)
33880 .k(k)
33881 .a_zero_point(0)
33882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33883 }
33884 }
33885
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_b_zero_point)33886 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_b_zero_point) {
33887 for (size_t k = 1; k <= 5; k += 2) {
33888 GemmMicrokernelTester()
33889 .mr(1)
33890 .nr(4)
33891 .kr(1)
33892 .sr(1)
33893 .m(1)
33894 .n(4)
33895 .k(k)
33896 .b_zero_point(0)
33897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33898 }
33899 }
33900
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_zero_point)33901 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_zero_point) {
33902 for (size_t k = 1; k <= 5; k += 2) {
33903 GemmMicrokernelTester()
33904 .mr(1)
33905 .nr(4)
33906 .kr(1)
33907 .sr(1)
33908 .m(1)
33909 .n(4)
33910 .k(k)
33911 .a_zero_point(0)
33912 .b_zero_point(0)
33913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33914 }
33915 }
33916
33917
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)33918 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
33919 GemmMicrokernelTester()
33920 .mr(2)
33921 .nr(2)
33922 .kr(1)
33923 .sr(1)
33924 .m(2)
33925 .n(2)
33926 .k(1)
33927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33928 }
33929
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)33930 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
33931 GemmMicrokernelTester()
33932 .mr(2)
33933 .nr(2)
33934 .kr(1)
33935 .sr(1)
33936 .m(2)
33937 .n(2)
33938 .k(1)
33939 .cn_stride(5)
33940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33941 }
33942
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)33943 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
33944 for (uint32_t n = 1; n <= 2; n++) {
33945 for (uint32_t m = 1; m <= 2; m++) {
33946 GemmMicrokernelTester()
33947 .mr(2)
33948 .nr(2)
33949 .kr(1)
33950 .sr(1)
33951 .m(m)
33952 .n(n)
33953 .k(1)
33954 .iterations(1)
33955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33956 }
33957 }
33958 }
33959
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)33960 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33961 for (uint32_t m = 1; m <= 2; m++) {
33962 GemmMicrokernelTester()
33963 .mr(2)
33964 .nr(2)
33965 .kr(1)
33966 .sr(1)
33967 .m(m)
33968 .n(2)
33969 .k(1)
33970 .iterations(1)
33971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33972 }
33973 }
33974
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)33975 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33976 for (uint32_t n = 1; n <= 2; n++) {
33977 GemmMicrokernelTester()
33978 .mr(2)
33979 .nr(2)
33980 .kr(1)
33981 .sr(1)
33982 .m(2)
33983 .n(n)
33984 .k(1)
33985 .iterations(1)
33986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33987 }
33988 }
33989
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)33990 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
33991 for (size_t k = 2; k < 10; k++) {
33992 GemmMicrokernelTester()
33993 .mr(2)
33994 .nr(2)
33995 .kr(1)
33996 .sr(1)
33997 .m(2)
33998 .n(2)
33999 .k(k)
34000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34001 }
34002 }
34003
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)34004 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
34005 for (size_t k = 2; k < 10; k++) {
34006 for (uint32_t n = 1; n <= 2; n++) {
34007 for (uint32_t m = 1; m <= 2; m++) {
34008 GemmMicrokernelTester()
34009 .mr(2)
34010 .nr(2)
34011 .kr(1)
34012 .sr(1)
34013 .m(m)
34014 .n(n)
34015 .k(k)
34016 .iterations(1)
34017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34018 }
34019 }
34020 }
34021 }
34022
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)34023 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
34024 for (uint32_t n = 3; n < 4; n++) {
34025 for (size_t k = 1; k <= 5; k += 2) {
34026 GemmMicrokernelTester()
34027 .mr(2)
34028 .nr(2)
34029 .kr(1)
34030 .sr(1)
34031 .m(2)
34032 .n(n)
34033 .k(k)
34034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34035 }
34036 }
34037 }
34038
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)34039 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
34040 for (uint32_t n = 3; n < 4; n++) {
34041 for (size_t k = 1; k <= 5; k += 2) {
34042 GemmMicrokernelTester()
34043 .mr(2)
34044 .nr(2)
34045 .kr(1)
34046 .sr(1)
34047 .m(2)
34048 .n(n)
34049 .k(k)
34050 .cn_stride(5)
34051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34052 }
34053 }
34054 }
34055
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)34056 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
34057 for (uint32_t n = 3; n < 4; n++) {
34058 for (size_t k = 1; k <= 5; k += 2) {
34059 for (uint32_t m = 1; m <= 2; m++) {
34060 GemmMicrokernelTester()
34061 .mr(2)
34062 .nr(2)
34063 .kr(1)
34064 .sr(1)
34065 .m(m)
34066 .n(n)
34067 .k(k)
34068 .iterations(1)
34069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34070 }
34071 }
34072 }
34073 }
34074
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)34075 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
34076 for (uint32_t n = 4; n <= 6; n += 2) {
34077 for (size_t k = 1; k <= 5; k += 2) {
34078 GemmMicrokernelTester()
34079 .mr(2)
34080 .nr(2)
34081 .kr(1)
34082 .sr(1)
34083 .m(2)
34084 .n(n)
34085 .k(k)
34086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34087 }
34088 }
34089 }
34090
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)34091 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
34092 for (uint32_t n = 4; n <= 6; n += 2) {
34093 for (size_t k = 1; k <= 5; k += 2) {
34094 GemmMicrokernelTester()
34095 .mr(2)
34096 .nr(2)
34097 .kr(1)
34098 .sr(1)
34099 .m(2)
34100 .n(n)
34101 .k(k)
34102 .cn_stride(5)
34103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34104 }
34105 }
34106 }
34107
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)34108 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
34109 for (uint32_t n = 4; n <= 6; n += 2) {
34110 for (size_t k = 1; k <= 5; k += 2) {
34111 for (uint32_t m = 1; m <= 2; m++) {
34112 GemmMicrokernelTester()
34113 .mr(2)
34114 .nr(2)
34115 .kr(1)
34116 .sr(1)
34117 .m(m)
34118 .n(n)
34119 .k(k)
34120 .iterations(1)
34121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34122 }
34123 }
34124 }
34125 }
34126
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel)34127 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
34128 for (size_t k = 1; k <= 5; k += 2) {
34129 GemmMicrokernelTester()
34130 .mr(2)
34131 .nr(2)
34132 .kr(1)
34133 .sr(1)
34134 .m(2)
34135 .n(2)
34136 .k(k)
34137 .ks(3)
34138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34139 }
34140 }
34141
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel_subtile)34142 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
34143 for (size_t k = 1; k <= 5; k += 2) {
34144 for (uint32_t n = 1; n <= 2; n++) {
34145 for (uint32_t m = 1; m <= 2; m++) {
34146 GemmMicrokernelTester()
34147 .mr(2)
34148 .nr(2)
34149 .kr(1)
34150 .sr(1)
34151 .m(m)
34152 .n(n)
34153 .k(k)
34154 .ks(3)
34155 .iterations(1)
34156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34157 }
34158 }
34159 }
34160 }
34161
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_small_kernel)34162 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
34163 for (uint32_t n = 3; n < 4; n++) {
34164 for (size_t k = 1; k <= 5; k += 2) {
34165 GemmMicrokernelTester()
34166 .mr(2)
34167 .nr(2)
34168 .kr(1)
34169 .sr(1)
34170 .m(2)
34171 .n(n)
34172 .k(k)
34173 .ks(3)
34174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34175 }
34176 }
34177 }
34178
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_small_kernel)34179 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
34180 for (uint32_t n = 4; n <= 6; n += 2) {
34181 for (size_t k = 1; k <= 5; k += 2) {
34182 GemmMicrokernelTester()
34183 .mr(2)
34184 .nr(2)
34185 .kr(1)
34186 .sr(1)
34187 .m(2)
34188 .n(n)
34189 .k(k)
34190 .ks(3)
34191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34192 }
34193 }
34194 }
34195
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)34196 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
34197 for (size_t k = 1; k <= 5; k += 2) {
34198 for (uint32_t n = 1; n <= 2; n++) {
34199 for (uint32_t m = 1; m <= 2; m++) {
34200 GemmMicrokernelTester()
34201 .mr(2)
34202 .nr(2)
34203 .kr(1)
34204 .sr(1)
34205 .m(m)
34206 .n(n)
34207 .k(k)
34208 .cm_stride(5)
34209 .iterations(1)
34210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34211 }
34212 }
34213 }
34214 }
34215
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,a_offset)34216 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
34217 for (size_t k = 1; k <= 5; k += 2) {
34218 GemmMicrokernelTester()
34219 .mr(2)
34220 .nr(2)
34221 .kr(1)
34222 .sr(1)
34223 .m(2)
34224 .n(2)
34225 .k(k)
34226 .ks(3)
34227 .a_offset(13)
34228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34229 }
34230 }
34231
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,zero)34232 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
34233 for (size_t k = 1; k <= 5; k += 2) {
34234 for (uint32_t mz = 0; mz < 2; mz++) {
34235 GemmMicrokernelTester()
34236 .mr(2)
34237 .nr(2)
34238 .kr(1)
34239 .sr(1)
34240 .m(2)
34241 .n(2)
34242 .k(k)
34243 .ks(3)
34244 .a_offset(13)
34245 .zero_index(mz)
34246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34247 }
34248 }
34249 }
34250
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)34251 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
34252 GemmMicrokernelTester()
34253 .mr(2)
34254 .nr(2)
34255 .kr(1)
34256 .sr(1)
34257 .m(2)
34258 .n(2)
34259 .k(1)
34260 .qmin(128)
34261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34262 }
34263
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)34264 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
34265 GemmMicrokernelTester()
34266 .mr(2)
34267 .nr(2)
34268 .kr(1)
34269 .sr(1)
34270 .m(2)
34271 .n(2)
34272 .k(1)
34273 .qmax(128)
34274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34275 }
34276
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)34277 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
34278 GemmMicrokernelTester()
34279 .mr(2)
34280 .nr(2)
34281 .kr(1)
34282 .sr(1)
34283 .m(2)
34284 .n(2)
34285 .k(1)
34286 .cm_stride(5)
34287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34288 }
34289
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_a_zero_point)34290 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_a_zero_point) {
34291 for (size_t k = 1; k <= 5; k += 2) {
34292 GemmMicrokernelTester()
34293 .mr(2)
34294 .nr(2)
34295 .kr(1)
34296 .sr(1)
34297 .m(2)
34298 .n(2)
34299 .k(k)
34300 .a_zero_point(0)
34301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34302 }
34303 }
34304
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_b_zero_point)34305 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_b_zero_point) {
34306 for (size_t k = 1; k <= 5; k += 2) {
34307 GemmMicrokernelTester()
34308 .mr(2)
34309 .nr(2)
34310 .kr(1)
34311 .sr(1)
34312 .m(2)
34313 .n(2)
34314 .k(k)
34315 .b_zero_point(0)
34316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34317 }
34318 }
34319
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_zero_point)34320 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_zero_point) {
34321 for (size_t k = 1; k <= 5; k += 2) {
34322 GemmMicrokernelTester()
34323 .mr(2)
34324 .nr(2)
34325 .kr(1)
34326 .sr(1)
34327 .m(2)
34328 .n(2)
34329 .k(k)
34330 .a_zero_point(0)
34331 .b_zero_point(0)
34332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34333 }
34334 }
34335
34336
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)34337 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
34338 GemmMicrokernelTester()
34339 .mr(2)
34340 .nr(4)
34341 .kr(1)
34342 .sr(1)
34343 .m(2)
34344 .n(4)
34345 .k(1)
34346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34347 }
34348
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)34349 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
34350 GemmMicrokernelTester()
34351 .mr(2)
34352 .nr(4)
34353 .kr(1)
34354 .sr(1)
34355 .m(2)
34356 .n(4)
34357 .k(1)
34358 .cn_stride(7)
34359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34360 }
34361
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)34362 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
34363 for (uint32_t n = 1; n <= 4; n++) {
34364 for (uint32_t m = 1; m <= 2; m++) {
34365 GemmMicrokernelTester()
34366 .mr(2)
34367 .nr(4)
34368 .kr(1)
34369 .sr(1)
34370 .m(m)
34371 .n(n)
34372 .k(1)
34373 .iterations(1)
34374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34375 }
34376 }
34377 }
34378
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)34379 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
34380 for (uint32_t m = 1; m <= 2; m++) {
34381 GemmMicrokernelTester()
34382 .mr(2)
34383 .nr(4)
34384 .kr(1)
34385 .sr(1)
34386 .m(m)
34387 .n(4)
34388 .k(1)
34389 .iterations(1)
34390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34391 }
34392 }
34393
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)34394 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
34395 for (uint32_t n = 1; n <= 4; n++) {
34396 GemmMicrokernelTester()
34397 .mr(2)
34398 .nr(4)
34399 .kr(1)
34400 .sr(1)
34401 .m(2)
34402 .n(n)
34403 .k(1)
34404 .iterations(1)
34405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34406 }
34407 }
34408
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)34409 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
34410 for (size_t k = 2; k < 10; k++) {
34411 GemmMicrokernelTester()
34412 .mr(2)
34413 .nr(4)
34414 .kr(1)
34415 .sr(1)
34416 .m(2)
34417 .n(4)
34418 .k(k)
34419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34420 }
34421 }
34422
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)34423 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
34424 for (size_t k = 2; k < 10; k++) {
34425 for (uint32_t n = 1; n <= 4; n++) {
34426 for (uint32_t m = 1; m <= 2; m++) {
34427 GemmMicrokernelTester()
34428 .mr(2)
34429 .nr(4)
34430 .kr(1)
34431 .sr(1)
34432 .m(m)
34433 .n(n)
34434 .k(k)
34435 .iterations(1)
34436 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34437 }
34438 }
34439 }
34440 }
34441
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)34442 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
34443 for (uint32_t n = 5; n < 8; n++) {
34444 for (size_t k = 1; k <= 5; k += 2) {
34445 GemmMicrokernelTester()
34446 .mr(2)
34447 .nr(4)
34448 .kr(1)
34449 .sr(1)
34450 .m(2)
34451 .n(n)
34452 .k(k)
34453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34454 }
34455 }
34456 }
34457
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)34458 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
34459 for (uint32_t n = 5; n < 8; n++) {
34460 for (size_t k = 1; k <= 5; k += 2) {
34461 GemmMicrokernelTester()
34462 .mr(2)
34463 .nr(4)
34464 .kr(1)
34465 .sr(1)
34466 .m(2)
34467 .n(n)
34468 .k(k)
34469 .cn_stride(7)
34470 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34471 }
34472 }
34473 }
34474
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)34475 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
34476 for (uint32_t n = 5; n < 8; n++) {
34477 for (size_t k = 1; k <= 5; k += 2) {
34478 for (uint32_t m = 1; m <= 2; m++) {
34479 GemmMicrokernelTester()
34480 .mr(2)
34481 .nr(4)
34482 .kr(1)
34483 .sr(1)
34484 .m(m)
34485 .n(n)
34486 .k(k)
34487 .iterations(1)
34488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34489 }
34490 }
34491 }
34492 }
34493
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)34494 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
34495 for (uint32_t n = 8; n <= 12; n += 4) {
34496 for (size_t k = 1; k <= 5; k += 2) {
34497 GemmMicrokernelTester()
34498 .mr(2)
34499 .nr(4)
34500 .kr(1)
34501 .sr(1)
34502 .m(2)
34503 .n(n)
34504 .k(k)
34505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34506 }
34507 }
34508 }
34509
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)34510 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
34511 for (uint32_t n = 8; n <= 12; n += 4) {
34512 for (size_t k = 1; k <= 5; k += 2) {
34513 GemmMicrokernelTester()
34514 .mr(2)
34515 .nr(4)
34516 .kr(1)
34517 .sr(1)
34518 .m(2)
34519 .n(n)
34520 .k(k)
34521 .cn_stride(7)
34522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34523 }
34524 }
34525 }
34526
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)34527 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
34528 for (uint32_t n = 8; n <= 12; n += 4) {
34529 for (size_t k = 1; k <= 5; k += 2) {
34530 for (uint32_t m = 1; m <= 2; m++) {
34531 GemmMicrokernelTester()
34532 .mr(2)
34533 .nr(4)
34534 .kr(1)
34535 .sr(1)
34536 .m(m)
34537 .n(n)
34538 .k(k)
34539 .iterations(1)
34540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34541 }
34542 }
34543 }
34544 }
34545
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)34546 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
34547 for (size_t k = 1; k <= 5; k += 2) {
34548 GemmMicrokernelTester()
34549 .mr(2)
34550 .nr(4)
34551 .kr(1)
34552 .sr(1)
34553 .m(2)
34554 .n(4)
34555 .k(k)
34556 .ks(3)
34557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34558 }
34559 }
34560
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)34561 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
34562 for (size_t k = 1; k <= 5; k += 2) {
34563 for (uint32_t n = 1; n <= 4; n++) {
34564 for (uint32_t m = 1; m <= 2; m++) {
34565 GemmMicrokernelTester()
34566 .mr(2)
34567 .nr(4)
34568 .kr(1)
34569 .sr(1)
34570 .m(m)
34571 .n(n)
34572 .k(k)
34573 .ks(3)
34574 .iterations(1)
34575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34576 }
34577 }
34578 }
34579 }
34580
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)34581 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
34582 for (uint32_t n = 5; n < 8; n++) {
34583 for (size_t k = 1; k <= 5; k += 2) {
34584 GemmMicrokernelTester()
34585 .mr(2)
34586 .nr(4)
34587 .kr(1)
34588 .sr(1)
34589 .m(2)
34590 .n(n)
34591 .k(k)
34592 .ks(3)
34593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34594 }
34595 }
34596 }
34597
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)34598 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
34599 for (uint32_t n = 8; n <= 12; n += 4) {
34600 for (size_t k = 1; k <= 5; k += 2) {
34601 GemmMicrokernelTester()
34602 .mr(2)
34603 .nr(4)
34604 .kr(1)
34605 .sr(1)
34606 .m(2)
34607 .n(n)
34608 .k(k)
34609 .ks(3)
34610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34611 }
34612 }
34613 }
34614
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)34615 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
34616 for (size_t k = 1; k <= 5; k += 2) {
34617 for (uint32_t n = 1; n <= 4; n++) {
34618 for (uint32_t m = 1; m <= 2; m++) {
34619 GemmMicrokernelTester()
34620 .mr(2)
34621 .nr(4)
34622 .kr(1)
34623 .sr(1)
34624 .m(m)
34625 .n(n)
34626 .k(k)
34627 .cm_stride(7)
34628 .iterations(1)
34629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34630 }
34631 }
34632 }
34633 }
34634
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)34635 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
34636 for (size_t k = 1; k <= 5; k += 2) {
34637 GemmMicrokernelTester()
34638 .mr(2)
34639 .nr(4)
34640 .kr(1)
34641 .sr(1)
34642 .m(2)
34643 .n(4)
34644 .k(k)
34645 .ks(3)
34646 .a_offset(13)
34647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34648 }
34649 }
34650
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)34651 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
34652 for (size_t k = 1; k <= 5; k += 2) {
34653 for (uint32_t mz = 0; mz < 2; mz++) {
34654 GemmMicrokernelTester()
34655 .mr(2)
34656 .nr(4)
34657 .kr(1)
34658 .sr(1)
34659 .m(2)
34660 .n(4)
34661 .k(k)
34662 .ks(3)
34663 .a_offset(13)
34664 .zero_index(mz)
34665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34666 }
34667 }
34668 }
34669
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)34670 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
34671 GemmMicrokernelTester()
34672 .mr(2)
34673 .nr(4)
34674 .kr(1)
34675 .sr(1)
34676 .m(2)
34677 .n(4)
34678 .k(1)
34679 .qmin(128)
34680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34681 }
34682
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)34683 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
34684 GemmMicrokernelTester()
34685 .mr(2)
34686 .nr(4)
34687 .kr(1)
34688 .sr(1)
34689 .m(2)
34690 .n(4)
34691 .k(1)
34692 .qmax(128)
34693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34694 }
34695
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)34696 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
34697 GemmMicrokernelTester()
34698 .mr(2)
34699 .nr(4)
34700 .kr(1)
34701 .sr(1)
34702 .m(2)
34703 .n(4)
34704 .k(1)
34705 .cm_stride(7)
34706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34707 }
34708
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_a_zero_point)34709 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_a_zero_point) {
34710 for (size_t k = 1; k <= 5; k += 2) {
34711 GemmMicrokernelTester()
34712 .mr(2)
34713 .nr(4)
34714 .kr(1)
34715 .sr(1)
34716 .m(2)
34717 .n(4)
34718 .k(k)
34719 .a_zero_point(0)
34720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34721 }
34722 }
34723
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_b_zero_point)34724 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_b_zero_point) {
34725 for (size_t k = 1; k <= 5; k += 2) {
34726 GemmMicrokernelTester()
34727 .mr(2)
34728 .nr(4)
34729 .kr(1)
34730 .sr(1)
34731 .m(2)
34732 .n(4)
34733 .k(k)
34734 .b_zero_point(0)
34735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34736 }
34737 }
34738
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_zero_point)34739 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_zero_point) {
34740 for (size_t k = 1; k <= 5; k += 2) {
34741 GemmMicrokernelTester()
34742 .mr(2)
34743 .nr(4)
34744 .kr(1)
34745 .sr(1)
34746 .m(2)
34747 .n(4)
34748 .k(k)
34749 .a_zero_point(0)
34750 .b_zero_point(0)
34751 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34752 }
34753 }
34754
34755
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)34756 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
34757 GemmMicrokernelTester()
34758 .mr(3)
34759 .nr(2)
34760 .kr(1)
34761 .sr(1)
34762 .m(3)
34763 .n(2)
34764 .k(1)
34765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34766 }
34767
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)34768 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
34769 GemmMicrokernelTester()
34770 .mr(3)
34771 .nr(2)
34772 .kr(1)
34773 .sr(1)
34774 .m(3)
34775 .n(2)
34776 .k(1)
34777 .cn_stride(5)
34778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34779 }
34780
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)34781 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
34782 for (uint32_t n = 1; n <= 2; n++) {
34783 for (uint32_t m = 1; m <= 3; m++) {
34784 GemmMicrokernelTester()
34785 .mr(3)
34786 .nr(2)
34787 .kr(1)
34788 .sr(1)
34789 .m(m)
34790 .n(n)
34791 .k(1)
34792 .iterations(1)
34793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34794 }
34795 }
34796 }
34797
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)34798 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
34799 for (uint32_t m = 1; m <= 3; m++) {
34800 GemmMicrokernelTester()
34801 .mr(3)
34802 .nr(2)
34803 .kr(1)
34804 .sr(1)
34805 .m(m)
34806 .n(2)
34807 .k(1)
34808 .iterations(1)
34809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34810 }
34811 }
34812
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)34813 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
34814 for (uint32_t n = 1; n <= 2; n++) {
34815 GemmMicrokernelTester()
34816 .mr(3)
34817 .nr(2)
34818 .kr(1)
34819 .sr(1)
34820 .m(3)
34821 .n(n)
34822 .k(1)
34823 .iterations(1)
34824 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34825 }
34826 }
34827
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)34828 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
34829 for (size_t k = 2; k < 10; k++) {
34830 GemmMicrokernelTester()
34831 .mr(3)
34832 .nr(2)
34833 .kr(1)
34834 .sr(1)
34835 .m(3)
34836 .n(2)
34837 .k(k)
34838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34839 }
34840 }
34841
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)34842 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
34843 for (size_t k = 2; k < 10; k++) {
34844 for (uint32_t n = 1; n <= 2; n++) {
34845 for (uint32_t m = 1; m <= 3; m++) {
34846 GemmMicrokernelTester()
34847 .mr(3)
34848 .nr(2)
34849 .kr(1)
34850 .sr(1)
34851 .m(m)
34852 .n(n)
34853 .k(k)
34854 .iterations(1)
34855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34856 }
34857 }
34858 }
34859 }
34860
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)34861 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
34862 for (uint32_t n = 3; n < 4; n++) {
34863 for (size_t k = 1; k <= 5; k += 2) {
34864 GemmMicrokernelTester()
34865 .mr(3)
34866 .nr(2)
34867 .kr(1)
34868 .sr(1)
34869 .m(3)
34870 .n(n)
34871 .k(k)
34872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34873 }
34874 }
34875 }
34876
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)34877 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
34878 for (uint32_t n = 3; n < 4; n++) {
34879 for (size_t k = 1; k <= 5; k += 2) {
34880 GemmMicrokernelTester()
34881 .mr(3)
34882 .nr(2)
34883 .kr(1)
34884 .sr(1)
34885 .m(3)
34886 .n(n)
34887 .k(k)
34888 .cn_stride(5)
34889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34890 }
34891 }
34892 }
34893
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)34894 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
34895 for (uint32_t n = 3; n < 4; n++) {
34896 for (size_t k = 1; k <= 5; k += 2) {
34897 for (uint32_t m = 1; m <= 3; m++) {
34898 GemmMicrokernelTester()
34899 .mr(3)
34900 .nr(2)
34901 .kr(1)
34902 .sr(1)
34903 .m(m)
34904 .n(n)
34905 .k(k)
34906 .iterations(1)
34907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34908 }
34909 }
34910 }
34911 }
34912
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)34913 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
34914 for (uint32_t n = 4; n <= 6; n += 2) {
34915 for (size_t k = 1; k <= 5; k += 2) {
34916 GemmMicrokernelTester()
34917 .mr(3)
34918 .nr(2)
34919 .kr(1)
34920 .sr(1)
34921 .m(3)
34922 .n(n)
34923 .k(k)
34924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34925 }
34926 }
34927 }
34928
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)34929 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
34930 for (uint32_t n = 4; n <= 6; n += 2) {
34931 for (size_t k = 1; k <= 5; k += 2) {
34932 GemmMicrokernelTester()
34933 .mr(3)
34934 .nr(2)
34935 .kr(1)
34936 .sr(1)
34937 .m(3)
34938 .n(n)
34939 .k(k)
34940 .cn_stride(5)
34941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34942 }
34943 }
34944 }
34945
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)34946 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
34947 for (uint32_t n = 4; n <= 6; n += 2) {
34948 for (size_t k = 1; k <= 5; k += 2) {
34949 for (uint32_t m = 1; m <= 3; m++) {
34950 GemmMicrokernelTester()
34951 .mr(3)
34952 .nr(2)
34953 .kr(1)
34954 .sr(1)
34955 .m(m)
34956 .n(n)
34957 .k(k)
34958 .iterations(1)
34959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34960 }
34961 }
34962 }
34963 }
34964
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)34965 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
34966 for (size_t k = 1; k <= 5; k += 2) {
34967 GemmMicrokernelTester()
34968 .mr(3)
34969 .nr(2)
34970 .kr(1)
34971 .sr(1)
34972 .m(3)
34973 .n(2)
34974 .k(k)
34975 .ks(3)
34976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34977 }
34978 }
34979
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)34980 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
34981 for (size_t k = 1; k <= 5; k += 2) {
34982 for (uint32_t n = 1; n <= 2; n++) {
34983 for (uint32_t m = 1; m <= 3; m++) {
34984 GemmMicrokernelTester()
34985 .mr(3)
34986 .nr(2)
34987 .kr(1)
34988 .sr(1)
34989 .m(m)
34990 .n(n)
34991 .k(k)
34992 .ks(3)
34993 .iterations(1)
34994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34995 }
34996 }
34997 }
34998 }
34999
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)35000 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
35001 for (uint32_t n = 3; n < 4; n++) {
35002 for (size_t k = 1; k <= 5; k += 2) {
35003 GemmMicrokernelTester()
35004 .mr(3)
35005 .nr(2)
35006 .kr(1)
35007 .sr(1)
35008 .m(3)
35009 .n(n)
35010 .k(k)
35011 .ks(3)
35012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35013 }
35014 }
35015 }
35016
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)35017 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
35018 for (uint32_t n = 4; n <= 6; n += 2) {
35019 for (size_t k = 1; k <= 5; k += 2) {
35020 GemmMicrokernelTester()
35021 .mr(3)
35022 .nr(2)
35023 .kr(1)
35024 .sr(1)
35025 .m(3)
35026 .n(n)
35027 .k(k)
35028 .ks(3)
35029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35030 }
35031 }
35032 }
35033
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)35034 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
35035 for (size_t k = 1; k <= 5; k += 2) {
35036 for (uint32_t n = 1; n <= 2; n++) {
35037 for (uint32_t m = 1; m <= 3; m++) {
35038 GemmMicrokernelTester()
35039 .mr(3)
35040 .nr(2)
35041 .kr(1)
35042 .sr(1)
35043 .m(m)
35044 .n(n)
35045 .k(k)
35046 .cm_stride(5)
35047 .iterations(1)
35048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35049 }
35050 }
35051 }
35052 }
35053
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)35054 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
35055 for (size_t k = 1; k <= 5; k += 2) {
35056 GemmMicrokernelTester()
35057 .mr(3)
35058 .nr(2)
35059 .kr(1)
35060 .sr(1)
35061 .m(3)
35062 .n(2)
35063 .k(k)
35064 .ks(3)
35065 .a_offset(17)
35066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35067 }
35068 }
35069
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)35070 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
35071 for (size_t k = 1; k <= 5; k += 2) {
35072 for (uint32_t mz = 0; mz < 3; mz++) {
35073 GemmMicrokernelTester()
35074 .mr(3)
35075 .nr(2)
35076 .kr(1)
35077 .sr(1)
35078 .m(3)
35079 .n(2)
35080 .k(k)
35081 .ks(3)
35082 .a_offset(17)
35083 .zero_index(mz)
35084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35085 }
35086 }
35087 }
35088
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)35089 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
35090 GemmMicrokernelTester()
35091 .mr(3)
35092 .nr(2)
35093 .kr(1)
35094 .sr(1)
35095 .m(3)
35096 .n(2)
35097 .k(1)
35098 .qmin(128)
35099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35100 }
35101
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)35102 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
35103 GemmMicrokernelTester()
35104 .mr(3)
35105 .nr(2)
35106 .kr(1)
35107 .sr(1)
35108 .m(3)
35109 .n(2)
35110 .k(1)
35111 .qmax(128)
35112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35113 }
35114
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)35115 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
35116 GemmMicrokernelTester()
35117 .mr(3)
35118 .nr(2)
35119 .kr(1)
35120 .sr(1)
35121 .m(3)
35122 .n(2)
35123 .k(1)
35124 .cm_stride(5)
35125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35126 }
35127
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_a_zero_point)35128 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_a_zero_point) {
35129 for (size_t k = 1; k <= 5; k += 2) {
35130 GemmMicrokernelTester()
35131 .mr(3)
35132 .nr(2)
35133 .kr(1)
35134 .sr(1)
35135 .m(3)
35136 .n(2)
35137 .k(k)
35138 .a_zero_point(0)
35139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35140 }
35141 }
35142
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_b_zero_point)35143 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_b_zero_point) {
35144 for (size_t k = 1; k <= 5; k += 2) {
35145 GemmMicrokernelTester()
35146 .mr(3)
35147 .nr(2)
35148 .kr(1)
35149 .sr(1)
35150 .m(3)
35151 .n(2)
35152 .k(k)
35153 .b_zero_point(0)
35154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35155 }
35156 }
35157
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_zero_point)35158 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_zero_point) {
35159 for (size_t k = 1; k <= 5; k += 2) {
35160 GemmMicrokernelTester()
35161 .mr(3)
35162 .nr(2)
35163 .kr(1)
35164 .sr(1)
35165 .m(3)
35166 .n(2)
35167 .k(k)
35168 .a_zero_point(0)
35169 .b_zero_point(0)
35170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35171 }
35172 }
35173
35174
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)35175 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
35176 GemmMicrokernelTester()
35177 .mr(3)
35178 .nr(2)
35179 .kr(1)
35180 .sr(1)
35181 .m(3)
35182 .n(2)
35183 .k(1)
35184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35185 }
35186
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)35187 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
35188 GemmMicrokernelTester()
35189 .mr(3)
35190 .nr(2)
35191 .kr(1)
35192 .sr(1)
35193 .m(3)
35194 .n(2)
35195 .k(1)
35196 .cn_stride(5)
35197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35198 }
35199
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)35200 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
35201 for (uint32_t n = 1; n <= 2; n++) {
35202 for (uint32_t m = 1; m <= 3; m++) {
35203 GemmMicrokernelTester()
35204 .mr(3)
35205 .nr(2)
35206 .kr(1)
35207 .sr(1)
35208 .m(m)
35209 .n(n)
35210 .k(1)
35211 .iterations(1)
35212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35213 }
35214 }
35215 }
35216
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)35217 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
35218 for (uint32_t m = 1; m <= 3; m++) {
35219 GemmMicrokernelTester()
35220 .mr(3)
35221 .nr(2)
35222 .kr(1)
35223 .sr(1)
35224 .m(m)
35225 .n(2)
35226 .k(1)
35227 .iterations(1)
35228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35229 }
35230 }
35231
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)35232 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
35233 for (uint32_t n = 1; n <= 2; n++) {
35234 GemmMicrokernelTester()
35235 .mr(3)
35236 .nr(2)
35237 .kr(1)
35238 .sr(1)
35239 .m(3)
35240 .n(n)
35241 .k(1)
35242 .iterations(1)
35243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35244 }
35245 }
35246
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)35247 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
35248 for (size_t k = 2; k < 10; k++) {
35249 GemmMicrokernelTester()
35250 .mr(3)
35251 .nr(2)
35252 .kr(1)
35253 .sr(1)
35254 .m(3)
35255 .n(2)
35256 .k(k)
35257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35258 }
35259 }
35260
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)35261 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
35262 for (size_t k = 2; k < 10; k++) {
35263 for (uint32_t n = 1; n <= 2; n++) {
35264 for (uint32_t m = 1; m <= 3; m++) {
35265 GemmMicrokernelTester()
35266 .mr(3)
35267 .nr(2)
35268 .kr(1)
35269 .sr(1)
35270 .m(m)
35271 .n(n)
35272 .k(k)
35273 .iterations(1)
35274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35275 }
35276 }
35277 }
35278 }
35279
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)35280 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
35281 for (uint32_t n = 3; n < 4; n++) {
35282 for (size_t k = 1; k <= 5; k += 2) {
35283 GemmMicrokernelTester()
35284 .mr(3)
35285 .nr(2)
35286 .kr(1)
35287 .sr(1)
35288 .m(3)
35289 .n(n)
35290 .k(k)
35291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35292 }
35293 }
35294 }
35295
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)35296 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
35297 for (uint32_t n = 3; n < 4; n++) {
35298 for (size_t k = 1; k <= 5; k += 2) {
35299 GemmMicrokernelTester()
35300 .mr(3)
35301 .nr(2)
35302 .kr(1)
35303 .sr(1)
35304 .m(3)
35305 .n(n)
35306 .k(k)
35307 .cn_stride(5)
35308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35309 }
35310 }
35311 }
35312
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)35313 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
35314 for (uint32_t n = 3; n < 4; n++) {
35315 for (size_t k = 1; k <= 5; k += 2) {
35316 for (uint32_t m = 1; m <= 3; m++) {
35317 GemmMicrokernelTester()
35318 .mr(3)
35319 .nr(2)
35320 .kr(1)
35321 .sr(1)
35322 .m(m)
35323 .n(n)
35324 .k(k)
35325 .iterations(1)
35326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35327 }
35328 }
35329 }
35330 }
35331
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)35332 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
35333 for (uint32_t n = 4; n <= 6; n += 2) {
35334 for (size_t k = 1; k <= 5; k += 2) {
35335 GemmMicrokernelTester()
35336 .mr(3)
35337 .nr(2)
35338 .kr(1)
35339 .sr(1)
35340 .m(3)
35341 .n(n)
35342 .k(k)
35343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35344 }
35345 }
35346 }
35347
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)35348 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
35349 for (uint32_t n = 4; n <= 6; n += 2) {
35350 for (size_t k = 1; k <= 5; k += 2) {
35351 GemmMicrokernelTester()
35352 .mr(3)
35353 .nr(2)
35354 .kr(1)
35355 .sr(1)
35356 .m(3)
35357 .n(n)
35358 .k(k)
35359 .cn_stride(5)
35360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35361 }
35362 }
35363 }
35364
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)35365 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
35366 for (uint32_t n = 4; n <= 6; n += 2) {
35367 for (size_t k = 1; k <= 5; k += 2) {
35368 for (uint32_t m = 1; m <= 3; m++) {
35369 GemmMicrokernelTester()
35370 .mr(3)
35371 .nr(2)
35372 .kr(1)
35373 .sr(1)
35374 .m(m)
35375 .n(n)
35376 .k(k)
35377 .iterations(1)
35378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35379 }
35380 }
35381 }
35382 }
35383
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)35384 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
35385 for (size_t k = 1; k <= 5; k += 2) {
35386 GemmMicrokernelTester()
35387 .mr(3)
35388 .nr(2)
35389 .kr(1)
35390 .sr(1)
35391 .m(3)
35392 .n(2)
35393 .k(k)
35394 .ks(3)
35395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35396 }
35397 }
35398
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)35399 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
35400 for (size_t k = 1; k <= 5; k += 2) {
35401 for (uint32_t n = 1; n <= 2; n++) {
35402 for (uint32_t m = 1; m <= 3; m++) {
35403 GemmMicrokernelTester()
35404 .mr(3)
35405 .nr(2)
35406 .kr(1)
35407 .sr(1)
35408 .m(m)
35409 .n(n)
35410 .k(k)
35411 .ks(3)
35412 .iterations(1)
35413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35414 }
35415 }
35416 }
35417 }
35418
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)35419 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
35420 for (uint32_t n = 3; n < 4; n++) {
35421 for (size_t k = 1; k <= 5; k += 2) {
35422 GemmMicrokernelTester()
35423 .mr(3)
35424 .nr(2)
35425 .kr(1)
35426 .sr(1)
35427 .m(3)
35428 .n(n)
35429 .k(k)
35430 .ks(3)
35431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35432 }
35433 }
35434 }
35435
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)35436 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
35437 for (uint32_t n = 4; n <= 6; n += 2) {
35438 for (size_t k = 1; k <= 5; k += 2) {
35439 GemmMicrokernelTester()
35440 .mr(3)
35441 .nr(2)
35442 .kr(1)
35443 .sr(1)
35444 .m(3)
35445 .n(n)
35446 .k(k)
35447 .ks(3)
35448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35449 }
35450 }
35451 }
35452
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)35453 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
35454 for (size_t k = 1; k <= 5; k += 2) {
35455 for (uint32_t n = 1; n <= 2; n++) {
35456 for (uint32_t m = 1; m <= 3; m++) {
35457 GemmMicrokernelTester()
35458 .mr(3)
35459 .nr(2)
35460 .kr(1)
35461 .sr(1)
35462 .m(m)
35463 .n(n)
35464 .k(k)
35465 .cm_stride(5)
35466 .iterations(1)
35467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35468 }
35469 }
35470 }
35471 }
35472
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)35473 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
35474 for (size_t k = 1; k <= 5; k += 2) {
35475 GemmMicrokernelTester()
35476 .mr(3)
35477 .nr(2)
35478 .kr(1)
35479 .sr(1)
35480 .m(3)
35481 .n(2)
35482 .k(k)
35483 .ks(3)
35484 .a_offset(17)
35485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35486 }
35487 }
35488
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)35489 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
35490 for (size_t k = 1; k <= 5; k += 2) {
35491 for (uint32_t mz = 0; mz < 3; mz++) {
35492 GemmMicrokernelTester()
35493 .mr(3)
35494 .nr(2)
35495 .kr(1)
35496 .sr(1)
35497 .m(3)
35498 .n(2)
35499 .k(k)
35500 .ks(3)
35501 .a_offset(17)
35502 .zero_index(mz)
35503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35504 }
35505 }
35506 }
35507
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)35508 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
35509 GemmMicrokernelTester()
35510 .mr(3)
35511 .nr(2)
35512 .kr(1)
35513 .sr(1)
35514 .m(3)
35515 .n(2)
35516 .k(1)
35517 .qmin(128)
35518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35519 }
35520
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)35521 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
35522 GemmMicrokernelTester()
35523 .mr(3)
35524 .nr(2)
35525 .kr(1)
35526 .sr(1)
35527 .m(3)
35528 .n(2)
35529 .k(1)
35530 .qmax(128)
35531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35532 }
35533
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)35534 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
35535 GemmMicrokernelTester()
35536 .mr(3)
35537 .nr(2)
35538 .kr(1)
35539 .sr(1)
35540 .m(3)
35541 .n(2)
35542 .k(1)
35543 .cm_stride(5)
35544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35545 }
35546
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_a_zero_point)35547 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
35548 for (size_t k = 1; k <= 5; k += 2) {
35549 GemmMicrokernelTester()
35550 .mr(3)
35551 .nr(2)
35552 .kr(1)
35553 .sr(1)
35554 .m(3)
35555 .n(2)
35556 .k(k)
35557 .a_zero_point(0)
35558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35559 }
35560 }
35561
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_b_zero_point)35562 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
35563 for (size_t k = 1; k <= 5; k += 2) {
35564 GemmMicrokernelTester()
35565 .mr(3)
35566 .nr(2)
35567 .kr(1)
35568 .sr(1)
35569 .m(3)
35570 .n(2)
35571 .k(k)
35572 .b_zero_point(0)
35573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35574 }
35575 }
35576
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_zero_point)35577 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
35578 for (size_t k = 1; k <= 5; k += 2) {
35579 GemmMicrokernelTester()
35580 .mr(3)
35581 .nr(2)
35582 .kr(1)
35583 .sr(1)
35584 .m(3)
35585 .n(2)
35586 .k(k)
35587 .a_zero_point(0)
35588 .b_zero_point(0)
35589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35590 }
35591 }
35592
35593
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)35594 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
35595 GemmMicrokernelTester()
35596 .mr(3)
35597 .nr(4)
35598 .kr(1)
35599 .sr(1)
35600 .m(3)
35601 .n(4)
35602 .k(1)
35603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35604 }
35605
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)35606 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
35607 GemmMicrokernelTester()
35608 .mr(3)
35609 .nr(4)
35610 .kr(1)
35611 .sr(1)
35612 .m(3)
35613 .n(4)
35614 .k(1)
35615 .cn_stride(7)
35616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35617 }
35618
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)35619 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
35620 for (uint32_t n = 1; n <= 4; n++) {
35621 for (uint32_t m = 1; m <= 3; m++) {
35622 GemmMicrokernelTester()
35623 .mr(3)
35624 .nr(4)
35625 .kr(1)
35626 .sr(1)
35627 .m(m)
35628 .n(n)
35629 .k(1)
35630 .iterations(1)
35631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35632 }
35633 }
35634 }
35635
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)35636 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
35637 for (uint32_t m = 1; m <= 3; m++) {
35638 GemmMicrokernelTester()
35639 .mr(3)
35640 .nr(4)
35641 .kr(1)
35642 .sr(1)
35643 .m(m)
35644 .n(4)
35645 .k(1)
35646 .iterations(1)
35647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35648 }
35649 }
35650
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)35651 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
35652 for (uint32_t n = 1; n <= 4; n++) {
35653 GemmMicrokernelTester()
35654 .mr(3)
35655 .nr(4)
35656 .kr(1)
35657 .sr(1)
35658 .m(3)
35659 .n(n)
35660 .k(1)
35661 .iterations(1)
35662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35663 }
35664 }
35665
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)35666 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
35667 for (size_t k = 2; k < 10; k++) {
35668 GemmMicrokernelTester()
35669 .mr(3)
35670 .nr(4)
35671 .kr(1)
35672 .sr(1)
35673 .m(3)
35674 .n(4)
35675 .k(k)
35676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35677 }
35678 }
35679
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)35680 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
35681 for (size_t k = 2; k < 10; k++) {
35682 for (uint32_t n = 1; n <= 4; n++) {
35683 for (uint32_t m = 1; m <= 3; m++) {
35684 GemmMicrokernelTester()
35685 .mr(3)
35686 .nr(4)
35687 .kr(1)
35688 .sr(1)
35689 .m(m)
35690 .n(n)
35691 .k(k)
35692 .iterations(1)
35693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35694 }
35695 }
35696 }
35697 }
35698
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)35699 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
35700 for (uint32_t n = 5; n < 8; n++) {
35701 for (size_t k = 1; k <= 5; k += 2) {
35702 GemmMicrokernelTester()
35703 .mr(3)
35704 .nr(4)
35705 .kr(1)
35706 .sr(1)
35707 .m(3)
35708 .n(n)
35709 .k(k)
35710 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35711 }
35712 }
35713 }
35714
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)35715 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
35716 for (uint32_t n = 5; n < 8; n++) {
35717 for (size_t k = 1; k <= 5; k += 2) {
35718 GemmMicrokernelTester()
35719 .mr(3)
35720 .nr(4)
35721 .kr(1)
35722 .sr(1)
35723 .m(3)
35724 .n(n)
35725 .k(k)
35726 .cn_stride(7)
35727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35728 }
35729 }
35730 }
35731
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)35732 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
35733 for (uint32_t n = 5; n < 8; n++) {
35734 for (size_t k = 1; k <= 5; k += 2) {
35735 for (uint32_t m = 1; m <= 3; m++) {
35736 GemmMicrokernelTester()
35737 .mr(3)
35738 .nr(4)
35739 .kr(1)
35740 .sr(1)
35741 .m(m)
35742 .n(n)
35743 .k(k)
35744 .iterations(1)
35745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35746 }
35747 }
35748 }
35749 }
35750
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)35751 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
35752 for (uint32_t n = 8; n <= 12; n += 4) {
35753 for (size_t k = 1; k <= 5; k += 2) {
35754 GemmMicrokernelTester()
35755 .mr(3)
35756 .nr(4)
35757 .kr(1)
35758 .sr(1)
35759 .m(3)
35760 .n(n)
35761 .k(k)
35762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35763 }
35764 }
35765 }
35766
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)35767 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
35768 for (uint32_t n = 8; n <= 12; n += 4) {
35769 for (size_t k = 1; k <= 5; k += 2) {
35770 GemmMicrokernelTester()
35771 .mr(3)
35772 .nr(4)
35773 .kr(1)
35774 .sr(1)
35775 .m(3)
35776 .n(n)
35777 .k(k)
35778 .cn_stride(7)
35779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35780 }
35781 }
35782 }
35783
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)35784 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
35785 for (uint32_t n = 8; n <= 12; n += 4) {
35786 for (size_t k = 1; k <= 5; k += 2) {
35787 for (uint32_t m = 1; m <= 3; m++) {
35788 GemmMicrokernelTester()
35789 .mr(3)
35790 .nr(4)
35791 .kr(1)
35792 .sr(1)
35793 .m(m)
35794 .n(n)
35795 .k(k)
35796 .iterations(1)
35797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35798 }
35799 }
35800 }
35801 }
35802
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel)35803 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
35804 for (size_t k = 1; k <= 5; k += 2) {
35805 GemmMicrokernelTester()
35806 .mr(3)
35807 .nr(4)
35808 .kr(1)
35809 .sr(1)
35810 .m(3)
35811 .n(4)
35812 .k(k)
35813 .ks(3)
35814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35815 }
35816 }
35817
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel_subtile)35818 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
35819 for (size_t k = 1; k <= 5; k += 2) {
35820 for (uint32_t n = 1; n <= 4; n++) {
35821 for (uint32_t m = 1; m <= 3; m++) {
35822 GemmMicrokernelTester()
35823 .mr(3)
35824 .nr(4)
35825 .kr(1)
35826 .sr(1)
35827 .m(m)
35828 .n(n)
35829 .k(k)
35830 .ks(3)
35831 .iterations(1)
35832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35833 }
35834 }
35835 }
35836 }
35837
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_small_kernel)35838 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
35839 for (uint32_t n = 5; n < 8; n++) {
35840 for (size_t k = 1; k <= 5; k += 2) {
35841 GemmMicrokernelTester()
35842 .mr(3)
35843 .nr(4)
35844 .kr(1)
35845 .sr(1)
35846 .m(3)
35847 .n(n)
35848 .k(k)
35849 .ks(3)
35850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35851 }
35852 }
35853 }
35854
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_small_kernel)35855 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
35856 for (uint32_t n = 8; n <= 12; n += 4) {
35857 for (size_t k = 1; k <= 5; k += 2) {
35858 GemmMicrokernelTester()
35859 .mr(3)
35860 .nr(4)
35861 .kr(1)
35862 .sr(1)
35863 .m(3)
35864 .n(n)
35865 .k(k)
35866 .ks(3)
35867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35868 }
35869 }
35870 }
35871
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)35872 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
35873 for (size_t k = 1; k <= 5; k += 2) {
35874 for (uint32_t n = 1; n <= 4; n++) {
35875 for (uint32_t m = 1; m <= 3; m++) {
35876 GemmMicrokernelTester()
35877 .mr(3)
35878 .nr(4)
35879 .kr(1)
35880 .sr(1)
35881 .m(m)
35882 .n(n)
35883 .k(k)
35884 .cm_stride(7)
35885 .iterations(1)
35886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35887 }
35888 }
35889 }
35890 }
35891
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,a_offset)35892 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
35893 for (size_t k = 1; k <= 5; k += 2) {
35894 GemmMicrokernelTester()
35895 .mr(3)
35896 .nr(4)
35897 .kr(1)
35898 .sr(1)
35899 .m(3)
35900 .n(4)
35901 .k(k)
35902 .ks(3)
35903 .a_offset(17)
35904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35905 }
35906 }
35907
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,zero)35908 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
35909 for (size_t k = 1; k <= 5; k += 2) {
35910 for (uint32_t mz = 0; mz < 3; mz++) {
35911 GemmMicrokernelTester()
35912 .mr(3)
35913 .nr(4)
35914 .kr(1)
35915 .sr(1)
35916 .m(3)
35917 .n(4)
35918 .k(k)
35919 .ks(3)
35920 .a_offset(17)
35921 .zero_index(mz)
35922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35923 }
35924 }
35925 }
35926
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)35927 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
35928 GemmMicrokernelTester()
35929 .mr(3)
35930 .nr(4)
35931 .kr(1)
35932 .sr(1)
35933 .m(3)
35934 .n(4)
35935 .k(1)
35936 .qmin(128)
35937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35938 }
35939
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)35940 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
35941 GemmMicrokernelTester()
35942 .mr(3)
35943 .nr(4)
35944 .kr(1)
35945 .sr(1)
35946 .m(3)
35947 .n(4)
35948 .k(1)
35949 .qmax(128)
35950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35951 }
35952
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)35953 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
35954 GemmMicrokernelTester()
35955 .mr(3)
35956 .nr(4)
35957 .kr(1)
35958 .sr(1)
35959 .m(3)
35960 .n(4)
35961 .k(1)
35962 .cm_stride(7)
35963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35964 }
35965
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_a_zero_point)35966 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_a_zero_point) {
35967 for (size_t k = 1; k <= 5; k += 2) {
35968 GemmMicrokernelTester()
35969 .mr(3)
35970 .nr(4)
35971 .kr(1)
35972 .sr(1)
35973 .m(3)
35974 .n(4)
35975 .k(k)
35976 .a_zero_point(0)
35977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35978 }
35979 }
35980
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_b_zero_point)35981 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_b_zero_point) {
35982 for (size_t k = 1; k <= 5; k += 2) {
35983 GemmMicrokernelTester()
35984 .mr(3)
35985 .nr(4)
35986 .kr(1)
35987 .sr(1)
35988 .m(3)
35989 .n(4)
35990 .k(k)
35991 .b_zero_point(0)
35992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35993 }
35994 }
35995
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_zero_point)35996 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_zero_point) {
35997 for (size_t k = 1; k <= 5; k += 2) {
35998 GemmMicrokernelTester()
35999 .mr(3)
36000 .nr(4)
36001 .kr(1)
36002 .sr(1)
36003 .m(3)
36004 .n(4)
36005 .k(k)
36006 .a_zero_point(0)
36007 .b_zero_point(0)
36008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36009 }
36010 }
36011
36012
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)36013 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
36014 GemmMicrokernelTester()
36015 .mr(3)
36016 .nr(4)
36017 .kr(1)
36018 .sr(1)
36019 .m(3)
36020 .n(4)
36021 .k(1)
36022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36023 }
36024
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)36025 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
36026 GemmMicrokernelTester()
36027 .mr(3)
36028 .nr(4)
36029 .kr(1)
36030 .sr(1)
36031 .m(3)
36032 .n(4)
36033 .k(1)
36034 .cn_stride(7)
36035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36036 }
36037
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)36038 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
36039 for (uint32_t n = 1; n <= 4; n++) {
36040 for (uint32_t m = 1; m <= 3; m++) {
36041 GemmMicrokernelTester()
36042 .mr(3)
36043 .nr(4)
36044 .kr(1)
36045 .sr(1)
36046 .m(m)
36047 .n(n)
36048 .k(1)
36049 .iterations(1)
36050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36051 }
36052 }
36053 }
36054
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)36055 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
36056 for (uint32_t m = 1; m <= 3; m++) {
36057 GemmMicrokernelTester()
36058 .mr(3)
36059 .nr(4)
36060 .kr(1)
36061 .sr(1)
36062 .m(m)
36063 .n(4)
36064 .k(1)
36065 .iterations(1)
36066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36067 }
36068 }
36069
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)36070 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
36071 for (uint32_t n = 1; n <= 4; n++) {
36072 GemmMicrokernelTester()
36073 .mr(3)
36074 .nr(4)
36075 .kr(1)
36076 .sr(1)
36077 .m(3)
36078 .n(n)
36079 .k(1)
36080 .iterations(1)
36081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36082 }
36083 }
36084
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)36085 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
36086 for (size_t k = 2; k < 10; k++) {
36087 GemmMicrokernelTester()
36088 .mr(3)
36089 .nr(4)
36090 .kr(1)
36091 .sr(1)
36092 .m(3)
36093 .n(4)
36094 .k(k)
36095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36096 }
36097 }
36098
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)36099 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
36100 for (size_t k = 2; k < 10; k++) {
36101 for (uint32_t n = 1; n <= 4; n++) {
36102 for (uint32_t m = 1; m <= 3; m++) {
36103 GemmMicrokernelTester()
36104 .mr(3)
36105 .nr(4)
36106 .kr(1)
36107 .sr(1)
36108 .m(m)
36109 .n(n)
36110 .k(k)
36111 .iterations(1)
36112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36113 }
36114 }
36115 }
36116 }
36117
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)36118 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
36119 for (uint32_t n = 5; n < 8; n++) {
36120 for (size_t k = 1; k <= 5; k += 2) {
36121 GemmMicrokernelTester()
36122 .mr(3)
36123 .nr(4)
36124 .kr(1)
36125 .sr(1)
36126 .m(3)
36127 .n(n)
36128 .k(k)
36129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36130 }
36131 }
36132 }
36133
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)36134 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
36135 for (uint32_t n = 5; n < 8; n++) {
36136 for (size_t k = 1; k <= 5; k += 2) {
36137 GemmMicrokernelTester()
36138 .mr(3)
36139 .nr(4)
36140 .kr(1)
36141 .sr(1)
36142 .m(3)
36143 .n(n)
36144 .k(k)
36145 .cn_stride(7)
36146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36147 }
36148 }
36149 }
36150
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)36151 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
36152 for (uint32_t n = 5; n < 8; n++) {
36153 for (size_t k = 1; k <= 5; k += 2) {
36154 for (uint32_t m = 1; m <= 3; m++) {
36155 GemmMicrokernelTester()
36156 .mr(3)
36157 .nr(4)
36158 .kr(1)
36159 .sr(1)
36160 .m(m)
36161 .n(n)
36162 .k(k)
36163 .iterations(1)
36164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36165 }
36166 }
36167 }
36168 }
36169
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)36170 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
36171 for (uint32_t n = 8; n <= 12; n += 4) {
36172 for (size_t k = 1; k <= 5; k += 2) {
36173 GemmMicrokernelTester()
36174 .mr(3)
36175 .nr(4)
36176 .kr(1)
36177 .sr(1)
36178 .m(3)
36179 .n(n)
36180 .k(k)
36181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36182 }
36183 }
36184 }
36185
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)36186 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
36187 for (uint32_t n = 8; n <= 12; n += 4) {
36188 for (size_t k = 1; k <= 5; k += 2) {
36189 GemmMicrokernelTester()
36190 .mr(3)
36191 .nr(4)
36192 .kr(1)
36193 .sr(1)
36194 .m(3)
36195 .n(n)
36196 .k(k)
36197 .cn_stride(7)
36198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36199 }
36200 }
36201 }
36202
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)36203 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
36204 for (uint32_t n = 8; n <= 12; n += 4) {
36205 for (size_t k = 1; k <= 5; k += 2) {
36206 for (uint32_t m = 1; m <= 3; m++) {
36207 GemmMicrokernelTester()
36208 .mr(3)
36209 .nr(4)
36210 .kr(1)
36211 .sr(1)
36212 .m(m)
36213 .n(n)
36214 .k(k)
36215 .iterations(1)
36216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36217 }
36218 }
36219 }
36220 }
36221
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel)36222 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
36223 for (size_t k = 1; k <= 5; k += 2) {
36224 GemmMicrokernelTester()
36225 .mr(3)
36226 .nr(4)
36227 .kr(1)
36228 .sr(1)
36229 .m(3)
36230 .n(4)
36231 .k(k)
36232 .ks(3)
36233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36234 }
36235 }
36236
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel_subtile)36237 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
36238 for (size_t k = 1; k <= 5; k += 2) {
36239 for (uint32_t n = 1; n <= 4; n++) {
36240 for (uint32_t m = 1; m <= 3; m++) {
36241 GemmMicrokernelTester()
36242 .mr(3)
36243 .nr(4)
36244 .kr(1)
36245 .sr(1)
36246 .m(m)
36247 .n(n)
36248 .k(k)
36249 .ks(3)
36250 .iterations(1)
36251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36252 }
36253 }
36254 }
36255 }
36256
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_small_kernel)36257 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
36258 for (uint32_t n = 5; n < 8; n++) {
36259 for (size_t k = 1; k <= 5; k += 2) {
36260 GemmMicrokernelTester()
36261 .mr(3)
36262 .nr(4)
36263 .kr(1)
36264 .sr(1)
36265 .m(3)
36266 .n(n)
36267 .k(k)
36268 .ks(3)
36269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36270 }
36271 }
36272 }
36273
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_small_kernel)36274 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
36275 for (uint32_t n = 8; n <= 12; n += 4) {
36276 for (size_t k = 1; k <= 5; k += 2) {
36277 GemmMicrokernelTester()
36278 .mr(3)
36279 .nr(4)
36280 .kr(1)
36281 .sr(1)
36282 .m(3)
36283 .n(n)
36284 .k(k)
36285 .ks(3)
36286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36287 }
36288 }
36289 }
36290
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)36291 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
36292 for (size_t k = 1; k <= 5; k += 2) {
36293 for (uint32_t n = 1; n <= 4; n++) {
36294 for (uint32_t m = 1; m <= 3; m++) {
36295 GemmMicrokernelTester()
36296 .mr(3)
36297 .nr(4)
36298 .kr(1)
36299 .sr(1)
36300 .m(m)
36301 .n(n)
36302 .k(k)
36303 .cm_stride(7)
36304 .iterations(1)
36305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36306 }
36307 }
36308 }
36309 }
36310
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,a_offset)36311 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
36312 for (size_t k = 1; k <= 5; k += 2) {
36313 GemmMicrokernelTester()
36314 .mr(3)
36315 .nr(4)
36316 .kr(1)
36317 .sr(1)
36318 .m(3)
36319 .n(4)
36320 .k(k)
36321 .ks(3)
36322 .a_offset(17)
36323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36324 }
36325 }
36326
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,zero)36327 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
36328 for (size_t k = 1; k <= 5; k += 2) {
36329 for (uint32_t mz = 0; mz < 3; mz++) {
36330 GemmMicrokernelTester()
36331 .mr(3)
36332 .nr(4)
36333 .kr(1)
36334 .sr(1)
36335 .m(3)
36336 .n(4)
36337 .k(k)
36338 .ks(3)
36339 .a_offset(17)
36340 .zero_index(mz)
36341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36342 }
36343 }
36344 }
36345
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)36346 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
36347 GemmMicrokernelTester()
36348 .mr(3)
36349 .nr(4)
36350 .kr(1)
36351 .sr(1)
36352 .m(3)
36353 .n(4)
36354 .k(1)
36355 .qmin(128)
36356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36357 }
36358
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)36359 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
36360 GemmMicrokernelTester()
36361 .mr(3)
36362 .nr(4)
36363 .kr(1)
36364 .sr(1)
36365 .m(3)
36366 .n(4)
36367 .k(1)
36368 .qmax(128)
36369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36370 }
36371
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)36372 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
36373 GemmMicrokernelTester()
36374 .mr(3)
36375 .nr(4)
36376 .kr(1)
36377 .sr(1)
36378 .m(3)
36379 .n(4)
36380 .k(1)
36381 .cm_stride(7)
36382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36383 }
36384
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_a_zero_point)36385 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
36386 for (size_t k = 1; k <= 5; k += 2) {
36387 GemmMicrokernelTester()
36388 .mr(3)
36389 .nr(4)
36390 .kr(1)
36391 .sr(1)
36392 .m(3)
36393 .n(4)
36394 .k(k)
36395 .a_zero_point(0)
36396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36397 }
36398 }
36399
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_b_zero_point)36400 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
36401 for (size_t k = 1; k <= 5; k += 2) {
36402 GemmMicrokernelTester()
36403 .mr(3)
36404 .nr(4)
36405 .kr(1)
36406 .sr(1)
36407 .m(3)
36408 .n(4)
36409 .k(k)
36410 .b_zero_point(0)
36411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36412 }
36413 }
36414
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_zero_point)36415 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
36416 for (size_t k = 1; k <= 5; k += 2) {
36417 GemmMicrokernelTester()
36418 .mr(3)
36419 .nr(4)
36420 .kr(1)
36421 .sr(1)
36422 .m(3)
36423 .n(4)
36424 .k(k)
36425 .a_zero_point(0)
36426 .b_zero_point(0)
36427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36428 }
36429 }
36430
36431
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)36432 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
36433 GemmMicrokernelTester()
36434 .mr(4)
36435 .nr(2)
36436 .kr(1)
36437 .sr(1)
36438 .m(4)
36439 .n(2)
36440 .k(1)
36441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36442 }
36443
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)36444 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
36445 GemmMicrokernelTester()
36446 .mr(4)
36447 .nr(2)
36448 .kr(1)
36449 .sr(1)
36450 .m(4)
36451 .n(2)
36452 .k(1)
36453 .cn_stride(5)
36454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36455 }
36456
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)36457 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
36458 for (uint32_t n = 1; n <= 2; n++) {
36459 for (uint32_t m = 1; m <= 4; m++) {
36460 GemmMicrokernelTester()
36461 .mr(4)
36462 .nr(2)
36463 .kr(1)
36464 .sr(1)
36465 .m(m)
36466 .n(n)
36467 .k(1)
36468 .iterations(1)
36469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36470 }
36471 }
36472 }
36473
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)36474 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
36475 for (uint32_t m = 1; m <= 4; m++) {
36476 GemmMicrokernelTester()
36477 .mr(4)
36478 .nr(2)
36479 .kr(1)
36480 .sr(1)
36481 .m(m)
36482 .n(2)
36483 .k(1)
36484 .iterations(1)
36485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36486 }
36487 }
36488
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)36489 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36490 for (uint32_t n = 1; n <= 2; n++) {
36491 GemmMicrokernelTester()
36492 .mr(4)
36493 .nr(2)
36494 .kr(1)
36495 .sr(1)
36496 .m(4)
36497 .n(n)
36498 .k(1)
36499 .iterations(1)
36500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36501 }
36502 }
36503
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)36504 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
36505 for (size_t k = 2; k < 10; k++) {
36506 GemmMicrokernelTester()
36507 .mr(4)
36508 .nr(2)
36509 .kr(1)
36510 .sr(1)
36511 .m(4)
36512 .n(2)
36513 .k(k)
36514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36515 }
36516 }
36517
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)36518 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
36519 for (size_t k = 2; k < 10; k++) {
36520 for (uint32_t n = 1; n <= 2; n++) {
36521 for (uint32_t m = 1; m <= 4; m++) {
36522 GemmMicrokernelTester()
36523 .mr(4)
36524 .nr(2)
36525 .kr(1)
36526 .sr(1)
36527 .m(m)
36528 .n(n)
36529 .k(k)
36530 .iterations(1)
36531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36532 }
36533 }
36534 }
36535 }
36536
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)36537 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
36538 for (uint32_t n = 3; n < 4; n++) {
36539 for (size_t k = 1; k <= 5; k += 2) {
36540 GemmMicrokernelTester()
36541 .mr(4)
36542 .nr(2)
36543 .kr(1)
36544 .sr(1)
36545 .m(4)
36546 .n(n)
36547 .k(k)
36548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36549 }
36550 }
36551 }
36552
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)36553 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
36554 for (uint32_t n = 3; n < 4; n++) {
36555 for (size_t k = 1; k <= 5; k += 2) {
36556 GemmMicrokernelTester()
36557 .mr(4)
36558 .nr(2)
36559 .kr(1)
36560 .sr(1)
36561 .m(4)
36562 .n(n)
36563 .k(k)
36564 .cn_stride(5)
36565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36566 }
36567 }
36568 }
36569
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)36570 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
36571 for (uint32_t n = 3; n < 4; n++) {
36572 for (size_t k = 1; k <= 5; k += 2) {
36573 for (uint32_t m = 1; m <= 4; m++) {
36574 GemmMicrokernelTester()
36575 .mr(4)
36576 .nr(2)
36577 .kr(1)
36578 .sr(1)
36579 .m(m)
36580 .n(n)
36581 .k(k)
36582 .iterations(1)
36583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36584 }
36585 }
36586 }
36587 }
36588
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)36589 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
36590 for (uint32_t n = 4; n <= 6; n += 2) {
36591 for (size_t k = 1; k <= 5; k += 2) {
36592 GemmMicrokernelTester()
36593 .mr(4)
36594 .nr(2)
36595 .kr(1)
36596 .sr(1)
36597 .m(4)
36598 .n(n)
36599 .k(k)
36600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36601 }
36602 }
36603 }
36604
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)36605 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
36606 for (uint32_t n = 4; n <= 6; n += 2) {
36607 for (size_t k = 1; k <= 5; k += 2) {
36608 GemmMicrokernelTester()
36609 .mr(4)
36610 .nr(2)
36611 .kr(1)
36612 .sr(1)
36613 .m(4)
36614 .n(n)
36615 .k(k)
36616 .cn_stride(5)
36617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36618 }
36619 }
36620 }
36621
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)36622 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
36623 for (uint32_t n = 4; n <= 6; n += 2) {
36624 for (size_t k = 1; k <= 5; k += 2) {
36625 for (uint32_t m = 1; m <= 4; m++) {
36626 GemmMicrokernelTester()
36627 .mr(4)
36628 .nr(2)
36629 .kr(1)
36630 .sr(1)
36631 .m(m)
36632 .n(n)
36633 .k(k)
36634 .iterations(1)
36635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36636 }
36637 }
36638 }
36639 }
36640
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel)36641 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
36642 for (size_t k = 1; k <= 5; k += 2) {
36643 GemmMicrokernelTester()
36644 .mr(4)
36645 .nr(2)
36646 .kr(1)
36647 .sr(1)
36648 .m(4)
36649 .n(2)
36650 .k(k)
36651 .ks(3)
36652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36653 }
36654 }
36655
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel_subtile)36656 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
36657 for (size_t k = 1; k <= 5; k += 2) {
36658 for (uint32_t n = 1; n <= 2; n++) {
36659 for (uint32_t m = 1; m <= 4; m++) {
36660 GemmMicrokernelTester()
36661 .mr(4)
36662 .nr(2)
36663 .kr(1)
36664 .sr(1)
36665 .m(m)
36666 .n(n)
36667 .k(k)
36668 .ks(3)
36669 .iterations(1)
36670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36671 }
36672 }
36673 }
36674 }
36675
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_small_kernel)36676 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
36677 for (uint32_t n = 3; n < 4; n++) {
36678 for (size_t k = 1; k <= 5; k += 2) {
36679 GemmMicrokernelTester()
36680 .mr(4)
36681 .nr(2)
36682 .kr(1)
36683 .sr(1)
36684 .m(4)
36685 .n(n)
36686 .k(k)
36687 .ks(3)
36688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36689 }
36690 }
36691 }
36692
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_small_kernel)36693 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
36694 for (uint32_t n = 4; n <= 6; n += 2) {
36695 for (size_t k = 1; k <= 5; k += 2) {
36696 GemmMicrokernelTester()
36697 .mr(4)
36698 .nr(2)
36699 .kr(1)
36700 .sr(1)
36701 .m(4)
36702 .n(n)
36703 .k(k)
36704 .ks(3)
36705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36706 }
36707 }
36708 }
36709
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)36710 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
36711 for (size_t k = 1; k <= 5; k += 2) {
36712 for (uint32_t n = 1; n <= 2; n++) {
36713 for (uint32_t m = 1; m <= 4; m++) {
36714 GemmMicrokernelTester()
36715 .mr(4)
36716 .nr(2)
36717 .kr(1)
36718 .sr(1)
36719 .m(m)
36720 .n(n)
36721 .k(k)
36722 .cm_stride(5)
36723 .iterations(1)
36724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36725 }
36726 }
36727 }
36728 }
36729
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,a_offset)36730 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
36731 for (size_t k = 1; k <= 5; k += 2) {
36732 GemmMicrokernelTester()
36733 .mr(4)
36734 .nr(2)
36735 .kr(1)
36736 .sr(1)
36737 .m(4)
36738 .n(2)
36739 .k(k)
36740 .ks(3)
36741 .a_offset(23)
36742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36743 }
36744 }
36745
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,zero)36746 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
36747 for (size_t k = 1; k <= 5; k += 2) {
36748 for (uint32_t mz = 0; mz < 4; mz++) {
36749 GemmMicrokernelTester()
36750 .mr(4)
36751 .nr(2)
36752 .kr(1)
36753 .sr(1)
36754 .m(4)
36755 .n(2)
36756 .k(k)
36757 .ks(3)
36758 .a_offset(23)
36759 .zero_index(mz)
36760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36761 }
36762 }
36763 }
36764
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)36765 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
36766 GemmMicrokernelTester()
36767 .mr(4)
36768 .nr(2)
36769 .kr(1)
36770 .sr(1)
36771 .m(4)
36772 .n(2)
36773 .k(1)
36774 .qmin(128)
36775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36776 }
36777
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)36778 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
36779 GemmMicrokernelTester()
36780 .mr(4)
36781 .nr(2)
36782 .kr(1)
36783 .sr(1)
36784 .m(4)
36785 .n(2)
36786 .k(1)
36787 .qmax(128)
36788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36789 }
36790
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)36791 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
36792 GemmMicrokernelTester()
36793 .mr(4)
36794 .nr(2)
36795 .kr(1)
36796 .sr(1)
36797 .m(4)
36798 .n(2)
36799 .k(1)
36800 .cm_stride(5)
36801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36802 }
36803
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_a_zero_point)36804 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_a_zero_point) {
36805 for (size_t k = 1; k <= 5; k += 2) {
36806 GemmMicrokernelTester()
36807 .mr(4)
36808 .nr(2)
36809 .kr(1)
36810 .sr(1)
36811 .m(4)
36812 .n(2)
36813 .k(k)
36814 .a_zero_point(0)
36815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36816 }
36817 }
36818
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_b_zero_point)36819 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_b_zero_point) {
36820 for (size_t k = 1; k <= 5; k += 2) {
36821 GemmMicrokernelTester()
36822 .mr(4)
36823 .nr(2)
36824 .kr(1)
36825 .sr(1)
36826 .m(4)
36827 .n(2)
36828 .k(k)
36829 .b_zero_point(0)
36830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36831 }
36832 }
36833
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_zero_point)36834 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_zero_point) {
36835 for (size_t k = 1; k <= 5; k += 2) {
36836 GemmMicrokernelTester()
36837 .mr(4)
36838 .nr(2)
36839 .kr(1)
36840 .sr(1)
36841 .m(4)
36842 .n(2)
36843 .k(k)
36844 .a_zero_point(0)
36845 .b_zero_point(0)
36846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36847 }
36848 }
36849
36850
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)36851 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
36852 GemmMicrokernelTester()
36853 .mr(4)
36854 .nr(2)
36855 .kr(1)
36856 .sr(1)
36857 .m(4)
36858 .n(2)
36859 .k(1)
36860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36861 }
36862
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)36863 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
36864 GemmMicrokernelTester()
36865 .mr(4)
36866 .nr(2)
36867 .kr(1)
36868 .sr(1)
36869 .m(4)
36870 .n(2)
36871 .k(1)
36872 .cn_stride(5)
36873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36874 }
36875
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)36876 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
36877 for (uint32_t n = 1; n <= 2; n++) {
36878 for (uint32_t m = 1; m <= 4; m++) {
36879 GemmMicrokernelTester()
36880 .mr(4)
36881 .nr(2)
36882 .kr(1)
36883 .sr(1)
36884 .m(m)
36885 .n(n)
36886 .k(1)
36887 .iterations(1)
36888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36889 }
36890 }
36891 }
36892
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)36893 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
36894 for (uint32_t m = 1; m <= 4; m++) {
36895 GemmMicrokernelTester()
36896 .mr(4)
36897 .nr(2)
36898 .kr(1)
36899 .sr(1)
36900 .m(m)
36901 .n(2)
36902 .k(1)
36903 .iterations(1)
36904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36905 }
36906 }
36907
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)36908 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
36909 for (uint32_t n = 1; n <= 2; n++) {
36910 GemmMicrokernelTester()
36911 .mr(4)
36912 .nr(2)
36913 .kr(1)
36914 .sr(1)
36915 .m(4)
36916 .n(n)
36917 .k(1)
36918 .iterations(1)
36919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36920 }
36921 }
36922
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)36923 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
36924 for (size_t k = 2; k < 10; k++) {
36925 GemmMicrokernelTester()
36926 .mr(4)
36927 .nr(2)
36928 .kr(1)
36929 .sr(1)
36930 .m(4)
36931 .n(2)
36932 .k(k)
36933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36934 }
36935 }
36936
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)36937 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
36938 for (size_t k = 2; k < 10; k++) {
36939 for (uint32_t n = 1; n <= 2; n++) {
36940 for (uint32_t m = 1; m <= 4; m++) {
36941 GemmMicrokernelTester()
36942 .mr(4)
36943 .nr(2)
36944 .kr(1)
36945 .sr(1)
36946 .m(m)
36947 .n(n)
36948 .k(k)
36949 .iterations(1)
36950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36951 }
36952 }
36953 }
36954 }
36955
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)36956 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
36957 for (uint32_t n = 3; n < 4; n++) {
36958 for (size_t k = 1; k <= 5; k += 2) {
36959 GemmMicrokernelTester()
36960 .mr(4)
36961 .nr(2)
36962 .kr(1)
36963 .sr(1)
36964 .m(4)
36965 .n(n)
36966 .k(k)
36967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36968 }
36969 }
36970 }
36971
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)36972 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
36973 for (uint32_t n = 3; n < 4; n++) {
36974 for (size_t k = 1; k <= 5; k += 2) {
36975 GemmMicrokernelTester()
36976 .mr(4)
36977 .nr(2)
36978 .kr(1)
36979 .sr(1)
36980 .m(4)
36981 .n(n)
36982 .k(k)
36983 .cn_stride(5)
36984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36985 }
36986 }
36987 }
36988
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)36989 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
36990 for (uint32_t n = 3; n < 4; n++) {
36991 for (size_t k = 1; k <= 5; k += 2) {
36992 for (uint32_t m = 1; m <= 4; m++) {
36993 GemmMicrokernelTester()
36994 .mr(4)
36995 .nr(2)
36996 .kr(1)
36997 .sr(1)
36998 .m(m)
36999 .n(n)
37000 .k(k)
37001 .iterations(1)
37002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37003 }
37004 }
37005 }
37006 }
37007
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)37008 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
37009 for (uint32_t n = 4; n <= 6; n += 2) {
37010 for (size_t k = 1; k <= 5; k += 2) {
37011 GemmMicrokernelTester()
37012 .mr(4)
37013 .nr(2)
37014 .kr(1)
37015 .sr(1)
37016 .m(4)
37017 .n(n)
37018 .k(k)
37019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37020 }
37021 }
37022 }
37023
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)37024 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
37025 for (uint32_t n = 4; n <= 6; n += 2) {
37026 for (size_t k = 1; k <= 5; k += 2) {
37027 GemmMicrokernelTester()
37028 .mr(4)
37029 .nr(2)
37030 .kr(1)
37031 .sr(1)
37032 .m(4)
37033 .n(n)
37034 .k(k)
37035 .cn_stride(5)
37036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37037 }
37038 }
37039 }
37040
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)37041 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
37042 for (uint32_t n = 4; n <= 6; n += 2) {
37043 for (size_t k = 1; k <= 5; k += 2) {
37044 for (uint32_t m = 1; m <= 4; m++) {
37045 GemmMicrokernelTester()
37046 .mr(4)
37047 .nr(2)
37048 .kr(1)
37049 .sr(1)
37050 .m(m)
37051 .n(n)
37052 .k(k)
37053 .iterations(1)
37054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37055 }
37056 }
37057 }
37058 }
37059
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)37060 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
37061 for (size_t k = 1; k <= 5; k += 2) {
37062 GemmMicrokernelTester()
37063 .mr(4)
37064 .nr(2)
37065 .kr(1)
37066 .sr(1)
37067 .m(4)
37068 .n(2)
37069 .k(k)
37070 .ks(3)
37071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37072 }
37073 }
37074
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)37075 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
37076 for (size_t k = 1; k <= 5; k += 2) {
37077 for (uint32_t n = 1; n <= 2; n++) {
37078 for (uint32_t m = 1; m <= 4; m++) {
37079 GemmMicrokernelTester()
37080 .mr(4)
37081 .nr(2)
37082 .kr(1)
37083 .sr(1)
37084 .m(m)
37085 .n(n)
37086 .k(k)
37087 .ks(3)
37088 .iterations(1)
37089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37090 }
37091 }
37092 }
37093 }
37094
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)37095 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
37096 for (uint32_t n = 3; n < 4; n++) {
37097 for (size_t k = 1; k <= 5; k += 2) {
37098 GemmMicrokernelTester()
37099 .mr(4)
37100 .nr(2)
37101 .kr(1)
37102 .sr(1)
37103 .m(4)
37104 .n(n)
37105 .k(k)
37106 .ks(3)
37107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37108 }
37109 }
37110 }
37111
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)37112 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
37113 for (uint32_t n = 4; n <= 6; n += 2) {
37114 for (size_t k = 1; k <= 5; k += 2) {
37115 GemmMicrokernelTester()
37116 .mr(4)
37117 .nr(2)
37118 .kr(1)
37119 .sr(1)
37120 .m(4)
37121 .n(n)
37122 .k(k)
37123 .ks(3)
37124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37125 }
37126 }
37127 }
37128
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)37129 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
37130 for (size_t k = 1; k <= 5; k += 2) {
37131 for (uint32_t n = 1; n <= 2; n++) {
37132 for (uint32_t m = 1; m <= 4; m++) {
37133 GemmMicrokernelTester()
37134 .mr(4)
37135 .nr(2)
37136 .kr(1)
37137 .sr(1)
37138 .m(m)
37139 .n(n)
37140 .k(k)
37141 .cm_stride(5)
37142 .iterations(1)
37143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37144 }
37145 }
37146 }
37147 }
37148
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)37149 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
37150 for (size_t k = 1; k <= 5; k += 2) {
37151 GemmMicrokernelTester()
37152 .mr(4)
37153 .nr(2)
37154 .kr(1)
37155 .sr(1)
37156 .m(4)
37157 .n(2)
37158 .k(k)
37159 .ks(3)
37160 .a_offset(23)
37161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37162 }
37163 }
37164
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)37165 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
37166 for (size_t k = 1; k <= 5; k += 2) {
37167 for (uint32_t mz = 0; mz < 4; mz++) {
37168 GemmMicrokernelTester()
37169 .mr(4)
37170 .nr(2)
37171 .kr(1)
37172 .sr(1)
37173 .m(4)
37174 .n(2)
37175 .k(k)
37176 .ks(3)
37177 .a_offset(23)
37178 .zero_index(mz)
37179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37180 }
37181 }
37182 }
37183
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)37184 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
37185 GemmMicrokernelTester()
37186 .mr(4)
37187 .nr(2)
37188 .kr(1)
37189 .sr(1)
37190 .m(4)
37191 .n(2)
37192 .k(1)
37193 .qmin(128)
37194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37195 }
37196
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)37197 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
37198 GemmMicrokernelTester()
37199 .mr(4)
37200 .nr(2)
37201 .kr(1)
37202 .sr(1)
37203 .m(4)
37204 .n(2)
37205 .k(1)
37206 .qmax(128)
37207 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37208 }
37209
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)37210 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
37211 GemmMicrokernelTester()
37212 .mr(4)
37213 .nr(2)
37214 .kr(1)
37215 .sr(1)
37216 .m(4)
37217 .n(2)
37218 .k(1)
37219 .cm_stride(5)
37220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37221 }
37222
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_a_zero_point)37223 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
37224 for (size_t k = 1; k <= 5; k += 2) {
37225 GemmMicrokernelTester()
37226 .mr(4)
37227 .nr(2)
37228 .kr(1)
37229 .sr(1)
37230 .m(4)
37231 .n(2)
37232 .k(k)
37233 .a_zero_point(0)
37234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37235 }
37236 }
37237
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_b_zero_point)37238 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
37239 for (size_t k = 1; k <= 5; k += 2) {
37240 GemmMicrokernelTester()
37241 .mr(4)
37242 .nr(2)
37243 .kr(1)
37244 .sr(1)
37245 .m(4)
37246 .n(2)
37247 .k(k)
37248 .b_zero_point(0)
37249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37250 }
37251 }
37252
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_zero_point)37253 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
37254 for (size_t k = 1; k <= 5; k += 2) {
37255 GemmMicrokernelTester()
37256 .mr(4)
37257 .nr(2)
37258 .kr(1)
37259 .sr(1)
37260 .m(4)
37261 .n(2)
37262 .k(k)
37263 .a_zero_point(0)
37264 .b_zero_point(0)
37265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37266 }
37267 }
37268
37269
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)37270 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
37271 GemmMicrokernelTester()
37272 .mr(4)
37273 .nr(4)
37274 .kr(1)
37275 .sr(1)
37276 .m(4)
37277 .n(4)
37278 .k(1)
37279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37280 }
37281
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)37282 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
37283 GemmMicrokernelTester()
37284 .mr(4)
37285 .nr(4)
37286 .kr(1)
37287 .sr(1)
37288 .m(4)
37289 .n(4)
37290 .k(1)
37291 .cn_stride(7)
37292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37293 }
37294
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)37295 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
37296 for (uint32_t n = 1; n <= 4; n++) {
37297 for (uint32_t m = 1; m <= 4; m++) {
37298 GemmMicrokernelTester()
37299 .mr(4)
37300 .nr(4)
37301 .kr(1)
37302 .sr(1)
37303 .m(m)
37304 .n(n)
37305 .k(1)
37306 .iterations(1)
37307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37308 }
37309 }
37310 }
37311
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)37312 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
37313 for (uint32_t m = 1; m <= 4; m++) {
37314 GemmMicrokernelTester()
37315 .mr(4)
37316 .nr(4)
37317 .kr(1)
37318 .sr(1)
37319 .m(m)
37320 .n(4)
37321 .k(1)
37322 .iterations(1)
37323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37324 }
37325 }
37326
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)37327 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
37328 for (uint32_t n = 1; n <= 4; n++) {
37329 GemmMicrokernelTester()
37330 .mr(4)
37331 .nr(4)
37332 .kr(1)
37333 .sr(1)
37334 .m(4)
37335 .n(n)
37336 .k(1)
37337 .iterations(1)
37338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37339 }
37340 }
37341
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)37342 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
37343 for (size_t k = 2; k < 10; k++) {
37344 GemmMicrokernelTester()
37345 .mr(4)
37346 .nr(4)
37347 .kr(1)
37348 .sr(1)
37349 .m(4)
37350 .n(4)
37351 .k(k)
37352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37353 }
37354 }
37355
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)37356 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
37357 for (size_t k = 2; k < 10; k++) {
37358 for (uint32_t n = 1; n <= 4; n++) {
37359 for (uint32_t m = 1; m <= 4; m++) {
37360 GemmMicrokernelTester()
37361 .mr(4)
37362 .nr(4)
37363 .kr(1)
37364 .sr(1)
37365 .m(m)
37366 .n(n)
37367 .k(k)
37368 .iterations(1)
37369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37370 }
37371 }
37372 }
37373 }
37374
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)37375 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
37376 for (uint32_t n = 5; n < 8; n++) {
37377 for (size_t k = 1; k <= 5; k += 2) {
37378 GemmMicrokernelTester()
37379 .mr(4)
37380 .nr(4)
37381 .kr(1)
37382 .sr(1)
37383 .m(4)
37384 .n(n)
37385 .k(k)
37386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37387 }
37388 }
37389 }
37390
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)37391 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
37392 for (uint32_t n = 5; n < 8; n++) {
37393 for (size_t k = 1; k <= 5; k += 2) {
37394 GemmMicrokernelTester()
37395 .mr(4)
37396 .nr(4)
37397 .kr(1)
37398 .sr(1)
37399 .m(4)
37400 .n(n)
37401 .k(k)
37402 .cn_stride(7)
37403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37404 }
37405 }
37406 }
37407
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)37408 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
37409 for (uint32_t n = 5; n < 8; n++) {
37410 for (size_t k = 1; k <= 5; k += 2) {
37411 for (uint32_t m = 1; m <= 4; m++) {
37412 GemmMicrokernelTester()
37413 .mr(4)
37414 .nr(4)
37415 .kr(1)
37416 .sr(1)
37417 .m(m)
37418 .n(n)
37419 .k(k)
37420 .iterations(1)
37421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37422 }
37423 }
37424 }
37425 }
37426
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)37427 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
37428 for (uint32_t n = 8; n <= 12; n += 4) {
37429 for (size_t k = 1; k <= 5; k += 2) {
37430 GemmMicrokernelTester()
37431 .mr(4)
37432 .nr(4)
37433 .kr(1)
37434 .sr(1)
37435 .m(4)
37436 .n(n)
37437 .k(k)
37438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37439 }
37440 }
37441 }
37442
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)37443 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
37444 for (uint32_t n = 8; n <= 12; n += 4) {
37445 for (size_t k = 1; k <= 5; k += 2) {
37446 GemmMicrokernelTester()
37447 .mr(4)
37448 .nr(4)
37449 .kr(1)
37450 .sr(1)
37451 .m(4)
37452 .n(n)
37453 .k(k)
37454 .cn_stride(7)
37455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37456 }
37457 }
37458 }
37459
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)37460 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
37461 for (uint32_t n = 8; n <= 12; n += 4) {
37462 for (size_t k = 1; k <= 5; k += 2) {
37463 for (uint32_t m = 1; m <= 4; m++) {
37464 GemmMicrokernelTester()
37465 .mr(4)
37466 .nr(4)
37467 .kr(1)
37468 .sr(1)
37469 .m(m)
37470 .n(n)
37471 .k(k)
37472 .iterations(1)
37473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37474 }
37475 }
37476 }
37477 }
37478
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)37479 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
37480 for (size_t k = 1; k <= 5; k += 2) {
37481 GemmMicrokernelTester()
37482 .mr(4)
37483 .nr(4)
37484 .kr(1)
37485 .sr(1)
37486 .m(4)
37487 .n(4)
37488 .k(k)
37489 .ks(3)
37490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37491 }
37492 }
37493
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)37494 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
37495 for (size_t k = 1; k <= 5; k += 2) {
37496 for (uint32_t n = 1; n <= 4; n++) {
37497 for (uint32_t m = 1; m <= 4; m++) {
37498 GemmMicrokernelTester()
37499 .mr(4)
37500 .nr(4)
37501 .kr(1)
37502 .sr(1)
37503 .m(m)
37504 .n(n)
37505 .k(k)
37506 .ks(3)
37507 .iterations(1)
37508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37509 }
37510 }
37511 }
37512 }
37513
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)37514 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
37515 for (uint32_t n = 5; n < 8; n++) {
37516 for (size_t k = 1; k <= 5; k += 2) {
37517 GemmMicrokernelTester()
37518 .mr(4)
37519 .nr(4)
37520 .kr(1)
37521 .sr(1)
37522 .m(4)
37523 .n(n)
37524 .k(k)
37525 .ks(3)
37526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37527 }
37528 }
37529 }
37530
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)37531 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
37532 for (uint32_t n = 8; n <= 12; n += 4) {
37533 for (size_t k = 1; k <= 5; k += 2) {
37534 GemmMicrokernelTester()
37535 .mr(4)
37536 .nr(4)
37537 .kr(1)
37538 .sr(1)
37539 .m(4)
37540 .n(n)
37541 .k(k)
37542 .ks(3)
37543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37544 }
37545 }
37546 }
37547
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)37548 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
37549 for (size_t k = 1; k <= 5; k += 2) {
37550 for (uint32_t n = 1; n <= 4; n++) {
37551 for (uint32_t m = 1; m <= 4; m++) {
37552 GemmMicrokernelTester()
37553 .mr(4)
37554 .nr(4)
37555 .kr(1)
37556 .sr(1)
37557 .m(m)
37558 .n(n)
37559 .k(k)
37560 .cm_stride(7)
37561 .iterations(1)
37562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37563 }
37564 }
37565 }
37566 }
37567
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)37568 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
37569 for (size_t k = 1; k <= 5; k += 2) {
37570 GemmMicrokernelTester()
37571 .mr(4)
37572 .nr(4)
37573 .kr(1)
37574 .sr(1)
37575 .m(4)
37576 .n(4)
37577 .k(k)
37578 .ks(3)
37579 .a_offset(23)
37580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37581 }
37582 }
37583
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)37584 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
37585 for (size_t k = 1; k <= 5; k += 2) {
37586 for (uint32_t mz = 0; mz < 4; mz++) {
37587 GemmMicrokernelTester()
37588 .mr(4)
37589 .nr(4)
37590 .kr(1)
37591 .sr(1)
37592 .m(4)
37593 .n(4)
37594 .k(k)
37595 .ks(3)
37596 .a_offset(23)
37597 .zero_index(mz)
37598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37599 }
37600 }
37601 }
37602
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)37603 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
37604 GemmMicrokernelTester()
37605 .mr(4)
37606 .nr(4)
37607 .kr(1)
37608 .sr(1)
37609 .m(4)
37610 .n(4)
37611 .k(1)
37612 .qmin(128)
37613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37614 }
37615
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)37616 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
37617 GemmMicrokernelTester()
37618 .mr(4)
37619 .nr(4)
37620 .kr(1)
37621 .sr(1)
37622 .m(4)
37623 .n(4)
37624 .k(1)
37625 .qmax(128)
37626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37627 }
37628
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)37629 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
37630 GemmMicrokernelTester()
37631 .mr(4)
37632 .nr(4)
37633 .kr(1)
37634 .sr(1)
37635 .m(4)
37636 .n(4)
37637 .k(1)
37638 .cm_stride(7)
37639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37640 }
37641
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_a_zero_point)37642 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_a_zero_point) {
37643 for (size_t k = 1; k <= 5; k += 2) {
37644 GemmMicrokernelTester()
37645 .mr(4)
37646 .nr(4)
37647 .kr(1)
37648 .sr(1)
37649 .m(4)
37650 .n(4)
37651 .k(k)
37652 .a_zero_point(0)
37653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37654 }
37655 }
37656
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_b_zero_point)37657 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_b_zero_point) {
37658 for (size_t k = 1; k <= 5; k += 2) {
37659 GemmMicrokernelTester()
37660 .mr(4)
37661 .nr(4)
37662 .kr(1)
37663 .sr(1)
37664 .m(4)
37665 .n(4)
37666 .k(k)
37667 .b_zero_point(0)
37668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37669 }
37670 }
37671
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_zero_point)37672 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_zero_point) {
37673 for (size_t k = 1; k <= 5; k += 2) {
37674 GemmMicrokernelTester()
37675 .mr(4)
37676 .nr(4)
37677 .kr(1)
37678 .sr(1)
37679 .m(4)
37680 .n(4)
37681 .k(k)
37682 .a_zero_point(0)
37683 .b_zero_point(0)
37684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37685 }
37686 }
37687
37688
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)37689 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
37690 GemmMicrokernelTester()
37691 .mr(4)
37692 .nr(4)
37693 .kr(1)
37694 .sr(1)
37695 .m(4)
37696 .n(4)
37697 .k(1)
37698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37699 }
37700
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)37701 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
37702 GemmMicrokernelTester()
37703 .mr(4)
37704 .nr(4)
37705 .kr(1)
37706 .sr(1)
37707 .m(4)
37708 .n(4)
37709 .k(1)
37710 .cn_stride(7)
37711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37712 }
37713
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)37714 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
37715 for (uint32_t n = 1; n <= 4; n++) {
37716 for (uint32_t m = 1; m <= 4; m++) {
37717 GemmMicrokernelTester()
37718 .mr(4)
37719 .nr(4)
37720 .kr(1)
37721 .sr(1)
37722 .m(m)
37723 .n(n)
37724 .k(1)
37725 .iterations(1)
37726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37727 }
37728 }
37729 }
37730
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)37731 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
37732 for (uint32_t m = 1; m <= 4; m++) {
37733 GemmMicrokernelTester()
37734 .mr(4)
37735 .nr(4)
37736 .kr(1)
37737 .sr(1)
37738 .m(m)
37739 .n(4)
37740 .k(1)
37741 .iterations(1)
37742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37743 }
37744 }
37745
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)37746 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
37747 for (uint32_t n = 1; n <= 4; n++) {
37748 GemmMicrokernelTester()
37749 .mr(4)
37750 .nr(4)
37751 .kr(1)
37752 .sr(1)
37753 .m(4)
37754 .n(n)
37755 .k(1)
37756 .iterations(1)
37757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37758 }
37759 }
37760
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)37761 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
37762 for (size_t k = 2; k < 10; k++) {
37763 GemmMicrokernelTester()
37764 .mr(4)
37765 .nr(4)
37766 .kr(1)
37767 .sr(1)
37768 .m(4)
37769 .n(4)
37770 .k(k)
37771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37772 }
37773 }
37774
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)37775 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
37776 for (size_t k = 2; k < 10; k++) {
37777 for (uint32_t n = 1; n <= 4; n++) {
37778 for (uint32_t m = 1; m <= 4; m++) {
37779 GemmMicrokernelTester()
37780 .mr(4)
37781 .nr(4)
37782 .kr(1)
37783 .sr(1)
37784 .m(m)
37785 .n(n)
37786 .k(k)
37787 .iterations(1)
37788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37789 }
37790 }
37791 }
37792 }
37793
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)37794 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
37795 for (uint32_t n = 5; n < 8; n++) {
37796 for (size_t k = 1; k <= 5; k += 2) {
37797 GemmMicrokernelTester()
37798 .mr(4)
37799 .nr(4)
37800 .kr(1)
37801 .sr(1)
37802 .m(4)
37803 .n(n)
37804 .k(k)
37805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37806 }
37807 }
37808 }
37809
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)37810 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
37811 for (uint32_t n = 5; n < 8; n++) {
37812 for (size_t k = 1; k <= 5; k += 2) {
37813 GemmMicrokernelTester()
37814 .mr(4)
37815 .nr(4)
37816 .kr(1)
37817 .sr(1)
37818 .m(4)
37819 .n(n)
37820 .k(k)
37821 .cn_stride(7)
37822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37823 }
37824 }
37825 }
37826
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)37827 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
37828 for (uint32_t n = 5; n < 8; n++) {
37829 for (size_t k = 1; k <= 5; k += 2) {
37830 for (uint32_t m = 1; m <= 4; m++) {
37831 GemmMicrokernelTester()
37832 .mr(4)
37833 .nr(4)
37834 .kr(1)
37835 .sr(1)
37836 .m(m)
37837 .n(n)
37838 .k(k)
37839 .iterations(1)
37840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37841 }
37842 }
37843 }
37844 }
37845
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)37846 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
37847 for (uint32_t n = 8; n <= 12; n += 4) {
37848 for (size_t k = 1; k <= 5; k += 2) {
37849 GemmMicrokernelTester()
37850 .mr(4)
37851 .nr(4)
37852 .kr(1)
37853 .sr(1)
37854 .m(4)
37855 .n(n)
37856 .k(k)
37857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37858 }
37859 }
37860 }
37861
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)37862 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
37863 for (uint32_t n = 8; n <= 12; n += 4) {
37864 for (size_t k = 1; k <= 5; k += 2) {
37865 GemmMicrokernelTester()
37866 .mr(4)
37867 .nr(4)
37868 .kr(1)
37869 .sr(1)
37870 .m(4)
37871 .n(n)
37872 .k(k)
37873 .cn_stride(7)
37874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37875 }
37876 }
37877 }
37878
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)37879 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
37880 for (uint32_t n = 8; n <= 12; n += 4) {
37881 for (size_t k = 1; k <= 5; k += 2) {
37882 for (uint32_t m = 1; m <= 4; m++) {
37883 GemmMicrokernelTester()
37884 .mr(4)
37885 .nr(4)
37886 .kr(1)
37887 .sr(1)
37888 .m(m)
37889 .n(n)
37890 .k(k)
37891 .iterations(1)
37892 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37893 }
37894 }
37895 }
37896 }
37897
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel)37898 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
37899 for (size_t k = 1; k <= 5; k += 2) {
37900 GemmMicrokernelTester()
37901 .mr(4)
37902 .nr(4)
37903 .kr(1)
37904 .sr(1)
37905 .m(4)
37906 .n(4)
37907 .k(k)
37908 .ks(3)
37909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37910 }
37911 }
37912
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel_subtile)37913 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
37914 for (size_t k = 1; k <= 5; k += 2) {
37915 for (uint32_t n = 1; n <= 4; n++) {
37916 for (uint32_t m = 1; m <= 4; m++) {
37917 GemmMicrokernelTester()
37918 .mr(4)
37919 .nr(4)
37920 .kr(1)
37921 .sr(1)
37922 .m(m)
37923 .n(n)
37924 .k(k)
37925 .ks(3)
37926 .iterations(1)
37927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37928 }
37929 }
37930 }
37931 }
37932
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_small_kernel)37933 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
37934 for (uint32_t n = 5; n < 8; n++) {
37935 for (size_t k = 1; k <= 5; k += 2) {
37936 GemmMicrokernelTester()
37937 .mr(4)
37938 .nr(4)
37939 .kr(1)
37940 .sr(1)
37941 .m(4)
37942 .n(n)
37943 .k(k)
37944 .ks(3)
37945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37946 }
37947 }
37948 }
37949
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_small_kernel)37950 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
37951 for (uint32_t n = 8; n <= 12; n += 4) {
37952 for (size_t k = 1; k <= 5; k += 2) {
37953 GemmMicrokernelTester()
37954 .mr(4)
37955 .nr(4)
37956 .kr(1)
37957 .sr(1)
37958 .m(4)
37959 .n(n)
37960 .k(k)
37961 .ks(3)
37962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37963 }
37964 }
37965 }
37966
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)37967 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
37968 for (size_t k = 1; k <= 5; k += 2) {
37969 for (uint32_t n = 1; n <= 4; n++) {
37970 for (uint32_t m = 1; m <= 4; m++) {
37971 GemmMicrokernelTester()
37972 .mr(4)
37973 .nr(4)
37974 .kr(1)
37975 .sr(1)
37976 .m(m)
37977 .n(n)
37978 .k(k)
37979 .cm_stride(7)
37980 .iterations(1)
37981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37982 }
37983 }
37984 }
37985 }
37986
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,a_offset)37987 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
37988 for (size_t k = 1; k <= 5; k += 2) {
37989 GemmMicrokernelTester()
37990 .mr(4)
37991 .nr(4)
37992 .kr(1)
37993 .sr(1)
37994 .m(4)
37995 .n(4)
37996 .k(k)
37997 .ks(3)
37998 .a_offset(23)
37999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38000 }
38001 }
38002
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,zero)38003 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
38004 for (size_t k = 1; k <= 5; k += 2) {
38005 for (uint32_t mz = 0; mz < 4; mz++) {
38006 GemmMicrokernelTester()
38007 .mr(4)
38008 .nr(4)
38009 .kr(1)
38010 .sr(1)
38011 .m(4)
38012 .n(4)
38013 .k(k)
38014 .ks(3)
38015 .a_offset(23)
38016 .zero_index(mz)
38017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38018 }
38019 }
38020 }
38021
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)38022 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
38023 GemmMicrokernelTester()
38024 .mr(4)
38025 .nr(4)
38026 .kr(1)
38027 .sr(1)
38028 .m(4)
38029 .n(4)
38030 .k(1)
38031 .qmin(128)
38032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38033 }
38034
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)38035 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
38036 GemmMicrokernelTester()
38037 .mr(4)
38038 .nr(4)
38039 .kr(1)
38040 .sr(1)
38041 .m(4)
38042 .n(4)
38043 .k(1)
38044 .qmax(128)
38045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38046 }
38047
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)38048 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
38049 GemmMicrokernelTester()
38050 .mr(4)
38051 .nr(4)
38052 .kr(1)
38053 .sr(1)
38054 .m(4)
38055 .n(4)
38056 .k(1)
38057 .cm_stride(7)
38058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38059 }
38060
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_a_zero_point)38061 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
38062 for (size_t k = 1; k <= 5; k += 2) {
38063 GemmMicrokernelTester()
38064 .mr(4)
38065 .nr(4)
38066 .kr(1)
38067 .sr(1)
38068 .m(4)
38069 .n(4)
38070 .k(k)
38071 .a_zero_point(0)
38072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38073 }
38074 }
38075
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_b_zero_point)38076 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
38077 for (size_t k = 1; k <= 5; k += 2) {
38078 GemmMicrokernelTester()
38079 .mr(4)
38080 .nr(4)
38081 .kr(1)
38082 .sr(1)
38083 .m(4)
38084 .n(4)
38085 .k(k)
38086 .b_zero_point(0)
38087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38088 }
38089 }
38090
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_zero_point)38091 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
38092 for (size_t k = 1; k <= 5; k += 2) {
38093 GemmMicrokernelTester()
38094 .mr(4)
38095 .nr(4)
38096 .kr(1)
38097 .sr(1)
38098 .m(4)
38099 .n(4)
38100 .k(k)
38101 .a_zero_point(0)
38102 .b_zero_point(0)
38103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38104 }
38105 }
38106