xref: /aosp_15_r20/external/XNNPACK/test/qu8-igemm-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qu8-igemm-minmax-fp32.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)28   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
29     TEST_REQUIRES_ARM_NEON_DOT;
30     GemmMicrokernelTester()
31       .mr(4)
32       .nr(16)
33       .kr(4)
34       .sr(1)
35       .m(4)
36       .n(16)
37       .k(16)
38       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
39   }
40 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)41   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
42     TEST_REQUIRES_ARM_NEON_DOT;
43     GemmMicrokernelTester()
44       .mr(4)
45       .nr(16)
46       .kr(4)
47       .sr(1)
48       .m(4)
49       .n(16)
50       .k(16)
51       .cn_stride(19)
52       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
53   }
54 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)55   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
56     TEST_REQUIRES_ARM_NEON_DOT;
57     for (uint32_t n = 1; n <= 16; n++) {
58       for (uint32_t m = 1; m <= 4; m++) {
59         GemmMicrokernelTester()
60           .mr(4)
61           .nr(16)
62           .kr(4)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(16)
67           .iterations(1)
68           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
69       }
70     }
71   }
72 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)73   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
74     TEST_REQUIRES_ARM_NEON_DOT;
75     for (uint32_t m = 1; m <= 4; m++) {
76       GemmMicrokernelTester()
77         .mr(4)
78         .nr(16)
79         .kr(4)
80         .sr(1)
81         .m(m)
82         .n(16)
83         .k(16)
84         .iterations(1)
85         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
86     }
87   }
88 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)89   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
90     TEST_REQUIRES_ARM_NEON_DOT;
91     for (uint32_t n = 1; n <= 16; n++) {
92       GemmMicrokernelTester()
93         .mr(4)
94         .nr(16)
95         .kr(4)
96         .sr(1)
97         .m(4)
98         .n(n)
99         .k(16)
100         .iterations(1)
101         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
102     }
103   }
104 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)105   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
106     TEST_REQUIRES_ARM_NEON_DOT;
107     for (size_t k = 1; k < 16; k++) {
108       GemmMicrokernelTester()
109         .mr(4)
110         .nr(16)
111         .kr(4)
112         .sr(1)
113         .m(4)
114         .n(16)
115         .k(k)
116         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
117     }
118   }
119 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)120   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
121     TEST_REQUIRES_ARM_NEON_DOT;
122     for (size_t k = 1; k < 16; k++) {
123       for (uint32_t n = 1; n <= 16; n++) {
124         for (uint32_t m = 1; m <= 4; m++) {
125           GemmMicrokernelTester()
126             .mr(4)
127             .nr(16)
128             .kr(4)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
135         }
136       }
137     }
138   }
139 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)140   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
141     TEST_REQUIRES_ARM_NEON_DOT;
142     for (size_t k = 17; k < 32; k++) {
143       GemmMicrokernelTester()
144         .mr(4)
145         .nr(16)
146         .kr(4)
147         .sr(1)
148         .m(4)
149         .n(16)
150         .k(k)
151         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
152     }
153   }
154 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)155   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
156     TEST_REQUIRES_ARM_NEON_DOT;
157     for (size_t k = 17; k < 32; k++) {
158       for (uint32_t n = 1; n <= 16; n++) {
159         for (uint32_t m = 1; m <= 4; m++) {
160           GemmMicrokernelTester()
161             .mr(4)
162             .nr(16)
163             .kr(4)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
170         }
171       }
172     }
173   }
174 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)175   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
176     TEST_REQUIRES_ARM_NEON_DOT;
177     for (size_t k = 32; k <= 160; k += 16) {
178       GemmMicrokernelTester()
179         .mr(4)
180         .nr(16)
181         .kr(4)
182         .sr(1)
183         .m(4)
184         .n(16)
185         .k(k)
186         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
187     }
188   }
189 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)190   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
191     TEST_REQUIRES_ARM_NEON_DOT;
192     for (size_t k = 32; k <= 160; k += 16) {
193       for (uint32_t n = 1; n <= 16; n++) {
194         for (uint32_t m = 1; m <= 4; m++) {
195           GemmMicrokernelTester()
196             .mr(4)
197             .nr(16)
198             .kr(4)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
205         }
206       }
207     }
208   }
209 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)210   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
211     TEST_REQUIRES_ARM_NEON_DOT;
212     for (uint32_t n = 17; n < 32; n++) {
213       for (size_t k = 1; k <= 80; k += 17) {
214         GemmMicrokernelTester()
215           .mr(4)
216           .nr(16)
217           .kr(4)
218           .sr(1)
219           .m(4)
220           .n(n)
221           .k(k)
222           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
223       }
224     }
225   }
226 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)227   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
228     TEST_REQUIRES_ARM_NEON_DOT;
229     for (uint32_t n = 17; n < 32; n++) {
230       for (size_t k = 1; k <= 80; k += 17) {
231         GemmMicrokernelTester()
232           .mr(4)
233           .nr(16)
234           .kr(4)
235           .sr(1)
236           .m(4)
237           .n(n)
238           .k(k)
239           .cn_stride(19)
240           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
241       }
242     }
243   }
244 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)245   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
246     TEST_REQUIRES_ARM_NEON_DOT;
247     for (uint32_t n = 17; n < 32; n++) {
248       for (size_t k = 1; k <= 80; k += 17) {
249         for (uint32_t m = 1; m <= 4; m++) {
250           GemmMicrokernelTester()
251             .mr(4)
252             .nr(16)
253             .kr(4)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
260         }
261       }
262     }
263   }
264 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)265   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
266     TEST_REQUIRES_ARM_NEON_DOT;
267     for (uint32_t n = 32; n <= 48; n += 16) {
268       for (size_t k = 1; k <= 80; k += 17) {
269         GemmMicrokernelTester()
270           .mr(4)
271           .nr(16)
272           .kr(4)
273           .sr(1)
274           .m(4)
275           .n(n)
276           .k(k)
277           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
278       }
279     }
280   }
281 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)282   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
283     TEST_REQUIRES_ARM_NEON_DOT;
284     for (uint32_t n = 32; n <= 48; n += 16) {
285       for (size_t k = 1; k <= 80; k += 17) {
286         GemmMicrokernelTester()
287           .mr(4)
288           .nr(16)
289           .kr(4)
290           .sr(1)
291           .m(4)
292           .n(n)
293           .k(k)
294           .cn_stride(19)
295           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
296       }
297     }
298   }
299 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)300   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
301     TEST_REQUIRES_ARM_NEON_DOT;
302     for (uint32_t n = 32; n <= 48; n += 16) {
303       for (size_t k = 1; k <= 80; k += 17) {
304         for (uint32_t m = 1; m <= 4; m++) {
305           GemmMicrokernelTester()
306             .mr(4)
307             .nr(16)
308             .kr(4)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
315         }
316       }
317     }
318   }
319 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)320   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
321     TEST_REQUIRES_ARM_NEON_DOT;
322     for (size_t k = 1; k <= 80; k += 17) {
323       GemmMicrokernelTester()
324         .mr(4)
325         .nr(16)
326         .kr(4)
327         .sr(1)
328         .m(4)
329         .n(16)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
333     }
334   }
335 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)336   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_NEON_DOT;
338     for (size_t k = 1; k <= 80; k += 17) {
339       for (uint32_t n = 1; n <= 16; n++) {
340         for (uint32_t m = 1; m <= 4; m++) {
341           GemmMicrokernelTester()
342             .mr(4)
343             .nr(16)
344             .kr(4)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
352         }
353       }
354     }
355   }
356 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)357   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
358     TEST_REQUIRES_ARM_NEON_DOT;
359     for (uint32_t n = 17; n < 32; n++) {
360       for (size_t k = 1; k <= 80; k += 17) {
361         GemmMicrokernelTester()
362           .mr(4)
363           .nr(16)
364           .kr(4)
365           .sr(1)
366           .m(4)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
371       }
372     }
373   }
374 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)375   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
376     TEST_REQUIRES_ARM_NEON_DOT;
377     for (uint32_t n = 32; n <= 48; n += 16) {
378       for (size_t k = 1; k <= 80; k += 17) {
379         GemmMicrokernelTester()
380           .mr(4)
381           .nr(16)
382           .kr(4)
383           .sr(1)
384           .m(4)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
389       }
390     }
391   }
392 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)393   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_NEON_DOT;
395     for (size_t k = 1; k <= 80; k += 17) {
396       for (uint32_t n = 1; n <= 16; n++) {
397         for (uint32_t m = 1; m <= 4; m++) {
398           GemmMicrokernelTester()
399             .mr(4)
400             .nr(16)
401             .kr(4)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(19)
407             .iterations(1)
408             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
409         }
410       }
411     }
412   }
413 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,a_offset)414   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
415     TEST_REQUIRES_ARM_NEON_DOT;
416     for (size_t k = 1; k <= 80; k += 17) {
417       GemmMicrokernelTester()
418         .mr(4)
419         .nr(16)
420         .kr(4)
421         .sr(1)
422         .m(4)
423         .n(16)
424         .k(k)
425         .ks(3)
426         .a_offset(331)
427         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
428     }
429   }
430 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,zero)431   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, zero) {
432     TEST_REQUIRES_ARM_NEON_DOT;
433     for (size_t k = 1; k <= 80; k += 17) {
434       for (uint32_t mz = 0; mz < 4; mz++) {
435         GemmMicrokernelTester()
436           .mr(4)
437           .nr(16)
438           .kr(4)
439           .sr(1)
440           .m(4)
441           .n(16)
442           .k(k)
443           .ks(3)
444           .a_offset(331)
445           .zero_index(mz)
446           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
447       }
448     }
449   }
450 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)451   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
452     TEST_REQUIRES_ARM_NEON_DOT;
453     GemmMicrokernelTester()
454       .mr(4)
455       .nr(16)
456       .kr(4)
457       .sr(1)
458       .m(4)
459       .n(16)
460       .k(16)
461       .qmin(128)
462       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
463   }
464 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)465   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
466     TEST_REQUIRES_ARM_NEON_DOT;
467     GemmMicrokernelTester()
468       .mr(4)
469       .nr(16)
470       .kr(4)
471       .sr(1)
472       .m(4)
473       .n(16)
474       .k(16)
475       .qmax(128)
476       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
477   }
478 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)479   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
480     TEST_REQUIRES_ARM_NEON_DOT;
481     GemmMicrokernelTester()
482       .mr(4)
483       .nr(16)
484       .kr(4)
485       .sr(1)
486       .m(4)
487       .n(16)
488       .k(16)
489       .cm_stride(19)
490       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
491   }
492 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_a_zero_point)493   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
494     TEST_REQUIRES_ARM_NEON_DOT;
495     for (size_t k = 1; k <= 80; k += 17) {
496       GemmMicrokernelTester()
497         .mr(4)
498         .nr(16)
499         .kr(4)
500         .sr(1)
501         .m(4)
502         .n(16)
503         .k(k)
504         .a_zero_point(0)
505         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
506     }
507   }
508 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_b_zero_point)509   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
510     TEST_REQUIRES_ARM_NEON_DOT;
511     for (size_t k = 1; k <= 80; k += 17) {
512       GemmMicrokernelTester()
513         .mr(4)
514         .nr(16)
515         .kr(4)
516         .sr(1)
517         .m(4)
518         .n(16)
519         .k(k)
520         .b_zero_point(0)
521         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
522     }
523   }
524 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,no_zero_point)525   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
526     TEST_REQUIRES_ARM_NEON_DOT;
527     for (size_t k = 1; k <= 80; k += 17) {
528       GemmMicrokernelTester()
529         .mr(4)
530         .nr(16)
531         .kr(4)
532         .sr(1)
533         .m(4)
534         .n(16)
535         .k(k)
536         .a_zero_point(0)
537         .b_zero_point(0)
538         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
539     }
540   }
541 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
542 
543 
544 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)545   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
546     TEST_REQUIRES_ARM_SIMD32;
547     GemmMicrokernelTester()
548       .mr(1)
549       .nr(1)
550       .kr(4)
551       .sr(1)
552       .m(1)
553       .n(1)
554       .k(4)
555       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
556   }
557 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)558   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
559     TEST_REQUIRES_ARM_SIMD32;
560     GemmMicrokernelTester()
561       .mr(1)
562       .nr(1)
563       .kr(4)
564       .sr(1)
565       .m(1)
566       .n(1)
567       .k(4)
568       .cn_stride(3)
569       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
570   }
571 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)572   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
573     TEST_REQUIRES_ARM_SIMD32;
574     for (uint32_t n = 1; n <= 1; n++) {
575       for (uint32_t m = 1; m <= 1; m++) {
576         GemmMicrokernelTester()
577           .mr(1)
578           .nr(1)
579           .kr(4)
580           .sr(1)
581           .m(m)
582           .n(n)
583           .k(4)
584           .iterations(1)
585           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
586       }
587     }
588   }
589 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)590   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
591     TEST_REQUIRES_ARM_SIMD32;
592     for (uint32_t m = 1; m <= 1; m++) {
593       GemmMicrokernelTester()
594         .mr(1)
595         .nr(1)
596         .kr(4)
597         .sr(1)
598         .m(m)
599         .n(1)
600         .k(4)
601         .iterations(1)
602         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
603     }
604   }
605 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)606   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
607     TEST_REQUIRES_ARM_SIMD32;
608     for (uint32_t n = 1; n <= 1; n++) {
609       GemmMicrokernelTester()
610         .mr(1)
611         .nr(1)
612         .kr(4)
613         .sr(1)
614         .m(1)
615         .n(n)
616         .k(4)
617         .iterations(1)
618         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
619     }
620   }
621 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)622   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
623     TEST_REQUIRES_ARM_SIMD32;
624     for (size_t k = 1; k < 4; k++) {
625       GemmMicrokernelTester()
626         .mr(1)
627         .nr(1)
628         .kr(4)
629         .sr(1)
630         .m(1)
631         .n(1)
632         .k(k)
633         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
634     }
635   }
636 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)637   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
638     TEST_REQUIRES_ARM_SIMD32;
639     for (size_t k = 1; k < 4; k++) {
640       for (uint32_t n = 1; n <= 1; n++) {
641         for (uint32_t m = 1; m <= 1; m++) {
642           GemmMicrokernelTester()
643             .mr(1)
644             .nr(1)
645             .kr(4)
646             .sr(1)
647             .m(m)
648             .n(n)
649             .k(k)
650             .iterations(1)
651             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
652         }
653       }
654     }
655   }
656 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)657   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
658     TEST_REQUIRES_ARM_SIMD32;
659     for (size_t k = 5; k < 8; k++) {
660       GemmMicrokernelTester()
661         .mr(1)
662         .nr(1)
663         .kr(4)
664         .sr(1)
665         .m(1)
666         .n(1)
667         .k(k)
668         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
669     }
670   }
671 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)672   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
673     TEST_REQUIRES_ARM_SIMD32;
674     for (size_t k = 5; k < 8; k++) {
675       for (uint32_t n = 1; n <= 1; n++) {
676         for (uint32_t m = 1; m <= 1; m++) {
677           GemmMicrokernelTester()
678             .mr(1)
679             .nr(1)
680             .kr(4)
681             .sr(1)
682             .m(m)
683             .n(n)
684             .k(k)
685             .iterations(1)
686             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
687         }
688       }
689     }
690   }
691 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)692   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
693     TEST_REQUIRES_ARM_SIMD32;
694     for (size_t k = 8; k <= 40; k += 4) {
695       GemmMicrokernelTester()
696         .mr(1)
697         .nr(1)
698         .kr(4)
699         .sr(1)
700         .m(1)
701         .n(1)
702         .k(k)
703         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
704     }
705   }
706 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)707   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
708     TEST_REQUIRES_ARM_SIMD32;
709     for (size_t k = 8; k <= 40; k += 4) {
710       for (uint32_t n = 1; n <= 1; n++) {
711         for (uint32_t m = 1; m <= 1; m++) {
712           GemmMicrokernelTester()
713             .mr(1)
714             .nr(1)
715             .kr(4)
716             .sr(1)
717             .m(m)
718             .n(n)
719             .k(k)
720             .iterations(1)
721             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
722         }
723       }
724     }
725   }
726 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)727   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
728     TEST_REQUIRES_ARM_SIMD32;
729     for (uint32_t n = 2; n < 2; n++) {
730       for (size_t k = 1; k <= 20; k += 5) {
731         GemmMicrokernelTester()
732           .mr(1)
733           .nr(1)
734           .kr(4)
735           .sr(1)
736           .m(1)
737           .n(n)
738           .k(k)
739           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
740       }
741     }
742   }
743 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)744   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
745     TEST_REQUIRES_ARM_SIMD32;
746     for (uint32_t n = 2; n < 2; n++) {
747       for (size_t k = 1; k <= 20; k += 5) {
748         GemmMicrokernelTester()
749           .mr(1)
750           .nr(1)
751           .kr(4)
752           .sr(1)
753           .m(1)
754           .n(n)
755           .k(k)
756           .cn_stride(3)
757           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
758       }
759     }
760   }
761 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)762   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
763     TEST_REQUIRES_ARM_SIMD32;
764     for (uint32_t n = 2; n < 2; n++) {
765       for (size_t k = 1; k <= 20; k += 5) {
766         for (uint32_t m = 1; m <= 1; m++) {
767           GemmMicrokernelTester()
768             .mr(1)
769             .nr(1)
770             .kr(4)
771             .sr(1)
772             .m(m)
773             .n(n)
774             .k(k)
775             .iterations(1)
776             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
777         }
778       }
779     }
780   }
781 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)782   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
783     TEST_REQUIRES_ARM_SIMD32;
784     for (uint32_t n = 2; n <= 3; n += 1) {
785       for (size_t k = 1; k <= 20; k += 5) {
786         GemmMicrokernelTester()
787           .mr(1)
788           .nr(1)
789           .kr(4)
790           .sr(1)
791           .m(1)
792           .n(n)
793           .k(k)
794           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
795       }
796     }
797   }
798 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)799   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
800     TEST_REQUIRES_ARM_SIMD32;
801     for (uint32_t n = 2; n <= 3; n += 1) {
802       for (size_t k = 1; k <= 20; k += 5) {
803         GemmMicrokernelTester()
804           .mr(1)
805           .nr(1)
806           .kr(4)
807           .sr(1)
808           .m(1)
809           .n(n)
810           .k(k)
811           .cn_stride(3)
812           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
813       }
814     }
815   }
816 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)817   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
818     TEST_REQUIRES_ARM_SIMD32;
819     for (uint32_t n = 2; n <= 3; n += 1) {
820       for (size_t k = 1; k <= 20; k += 5) {
821         for (uint32_t m = 1; m <= 1; m++) {
822           GemmMicrokernelTester()
823             .mr(1)
824             .nr(1)
825             .kr(4)
826             .sr(1)
827             .m(m)
828             .n(n)
829             .k(k)
830             .iterations(1)
831             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
832         }
833       }
834     }
835   }
836 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel)837   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel) {
838     TEST_REQUIRES_ARM_SIMD32;
839     for (size_t k = 1; k <= 20; k += 5) {
840       GemmMicrokernelTester()
841         .mr(1)
842         .nr(1)
843         .kr(4)
844         .sr(1)
845         .m(1)
846         .n(1)
847         .k(k)
848         .ks(3)
849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
850     }
851   }
852 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel_subtile)853   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel_subtile) {
854     TEST_REQUIRES_ARM_SIMD32;
855     for (size_t k = 1; k <= 20; k += 5) {
856       for (uint32_t n = 1; n <= 1; n++) {
857         for (uint32_t m = 1; m <= 1; m++) {
858           GemmMicrokernelTester()
859             .mr(1)
860             .nr(1)
861             .kr(4)
862             .sr(1)
863             .m(m)
864             .n(n)
865             .k(k)
866             .ks(3)
867             .iterations(1)
868             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
869         }
870       }
871     }
872   }
873 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_small_kernel)874   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_small_kernel) {
875     TEST_REQUIRES_ARM_SIMD32;
876     for (uint32_t n = 2; n < 2; n++) {
877       for (size_t k = 1; k <= 20; k += 5) {
878         GemmMicrokernelTester()
879           .mr(1)
880           .nr(1)
881           .kr(4)
882           .sr(1)
883           .m(1)
884           .n(n)
885           .k(k)
886           .ks(3)
887           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
888       }
889     }
890   }
891 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_small_kernel)892   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_small_kernel) {
893     TEST_REQUIRES_ARM_SIMD32;
894     for (uint32_t n = 2; n <= 3; n += 1) {
895       for (size_t k = 1; k <= 20; k += 5) {
896         GemmMicrokernelTester()
897           .mr(1)
898           .nr(1)
899           .kr(4)
900           .sr(1)
901           .m(1)
902           .n(n)
903           .k(k)
904           .ks(3)
905           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
906       }
907     }
908   }
909 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)910   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
911     TEST_REQUIRES_ARM_SIMD32;
912     for (size_t k = 1; k <= 20; k += 5) {
913       for (uint32_t n = 1; n <= 1; n++) {
914         for (uint32_t m = 1; m <= 1; m++) {
915           GemmMicrokernelTester()
916             .mr(1)
917             .nr(1)
918             .kr(4)
919             .sr(1)
920             .m(m)
921             .n(n)
922             .k(k)
923             .cm_stride(3)
924             .iterations(1)
925             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
926         }
927       }
928     }
929   }
930 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,a_offset)931   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, a_offset) {
932     TEST_REQUIRES_ARM_SIMD32;
933     for (size_t k = 1; k <= 20; k += 5) {
934       GemmMicrokernelTester()
935         .mr(1)
936         .nr(1)
937         .kr(4)
938         .sr(1)
939         .m(1)
940         .n(1)
941         .k(k)
942         .ks(3)
943         .a_offset(23)
944         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
945     }
946   }
947 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,zero)948   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, zero) {
949     TEST_REQUIRES_ARM_SIMD32;
950     for (size_t k = 1; k <= 20; k += 5) {
951       for (uint32_t mz = 0; mz < 1; mz++) {
952         GemmMicrokernelTester()
953           .mr(1)
954           .nr(1)
955           .kr(4)
956           .sr(1)
957           .m(1)
958           .n(1)
959           .k(k)
960           .ks(3)
961           .a_offset(23)
962           .zero_index(mz)
963           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
964       }
965     }
966   }
967 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)968   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
969     TEST_REQUIRES_ARM_SIMD32;
970     GemmMicrokernelTester()
971       .mr(1)
972       .nr(1)
973       .kr(4)
974       .sr(1)
975       .m(1)
976       .n(1)
977       .k(4)
978       .qmin(128)
979       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
980   }
981 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)982   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
983     TEST_REQUIRES_ARM_SIMD32;
984     GemmMicrokernelTester()
985       .mr(1)
986       .nr(1)
987       .kr(4)
988       .sr(1)
989       .m(1)
990       .n(1)
991       .k(4)
992       .qmax(128)
993       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
994   }
995 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)996   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
997     TEST_REQUIRES_ARM_SIMD32;
998     GemmMicrokernelTester()
999       .mr(1)
1000       .nr(1)
1001       .kr(4)
1002       .sr(1)
1003       .m(1)
1004       .n(1)
1005       .k(4)
1006       .cm_stride(3)
1007       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1008   }
1009 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_a_zero_point)1010   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_a_zero_point) {
1011     TEST_REQUIRES_ARM_SIMD32;
1012     for (size_t k = 1; k <= 20; k += 5) {
1013       GemmMicrokernelTester()
1014         .mr(1)
1015         .nr(1)
1016         .kr(4)
1017         .sr(1)
1018         .m(1)
1019         .n(1)
1020         .k(k)
1021         .a_zero_point(0)
1022         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1023     }
1024   }
1025 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_b_zero_point)1026   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_b_zero_point) {
1027     TEST_REQUIRES_ARM_SIMD32;
1028     for (size_t k = 1; k <= 20; k += 5) {
1029       GemmMicrokernelTester()
1030         .mr(1)
1031         .nr(1)
1032         .kr(4)
1033         .sr(1)
1034         .m(1)
1035         .n(1)
1036         .k(k)
1037         .b_zero_point(0)
1038         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1039     }
1040   }
1041 
TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,no_zero_point)1042   TEST(QU8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, no_zero_point) {
1043     TEST_REQUIRES_ARM_SIMD32;
1044     for (size_t k = 1; k <= 20; k += 5) {
1045       GemmMicrokernelTester()
1046         .mr(1)
1047         .nr(1)
1048         .kr(4)
1049         .sr(1)
1050         .m(1)
1051         .n(1)
1052         .k(k)
1053         .a_zero_point(0)
1054         .b_zero_point(0)
1055         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1056     }
1057   }
1058 #endif  // XNN_ARCH_ARM
1059 
1060 
1061 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)1062   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
1063     TEST_REQUIRES_ARM_SIMD32;
1064     GemmMicrokernelTester()
1065       .mr(2)
1066       .nr(1)
1067       .kr(4)
1068       .sr(1)
1069       .m(2)
1070       .n(1)
1071       .k(4)
1072       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1073   }
1074 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)1075   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
1076     TEST_REQUIRES_ARM_SIMD32;
1077     GemmMicrokernelTester()
1078       .mr(2)
1079       .nr(1)
1080       .kr(4)
1081       .sr(1)
1082       .m(2)
1083       .n(1)
1084       .k(4)
1085       .cn_stride(3)
1086       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1087   }
1088 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)1089   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
1090     TEST_REQUIRES_ARM_SIMD32;
1091     for (uint32_t n = 1; n <= 1; n++) {
1092       for (uint32_t m = 1; m <= 2; m++) {
1093         GemmMicrokernelTester()
1094           .mr(2)
1095           .nr(1)
1096           .kr(4)
1097           .sr(1)
1098           .m(m)
1099           .n(n)
1100           .k(4)
1101           .iterations(1)
1102           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1103       }
1104     }
1105   }
1106 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)1107   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
1108     TEST_REQUIRES_ARM_SIMD32;
1109     for (uint32_t m = 1; m <= 2; m++) {
1110       GemmMicrokernelTester()
1111         .mr(2)
1112         .nr(1)
1113         .kr(4)
1114         .sr(1)
1115         .m(m)
1116         .n(1)
1117         .k(4)
1118         .iterations(1)
1119         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1120     }
1121   }
1122 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)1123   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
1124     TEST_REQUIRES_ARM_SIMD32;
1125     for (uint32_t n = 1; n <= 1; n++) {
1126       GemmMicrokernelTester()
1127         .mr(2)
1128         .nr(1)
1129         .kr(4)
1130         .sr(1)
1131         .m(2)
1132         .n(n)
1133         .k(4)
1134         .iterations(1)
1135         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1136     }
1137   }
1138 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)1139   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
1140     TEST_REQUIRES_ARM_SIMD32;
1141     for (size_t k = 1; k < 4; k++) {
1142       GemmMicrokernelTester()
1143         .mr(2)
1144         .nr(1)
1145         .kr(4)
1146         .sr(1)
1147         .m(2)
1148         .n(1)
1149         .k(k)
1150         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1151     }
1152   }
1153 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)1154   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
1155     TEST_REQUIRES_ARM_SIMD32;
1156     for (size_t k = 1; k < 4; k++) {
1157       for (uint32_t n = 1; n <= 1; n++) {
1158         for (uint32_t m = 1; m <= 2; m++) {
1159           GemmMicrokernelTester()
1160             .mr(2)
1161             .nr(1)
1162             .kr(4)
1163             .sr(1)
1164             .m(m)
1165             .n(n)
1166             .k(k)
1167             .iterations(1)
1168             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1169         }
1170       }
1171     }
1172   }
1173 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)1174   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
1175     TEST_REQUIRES_ARM_SIMD32;
1176     for (size_t k = 5; k < 8; k++) {
1177       GemmMicrokernelTester()
1178         .mr(2)
1179         .nr(1)
1180         .kr(4)
1181         .sr(1)
1182         .m(2)
1183         .n(1)
1184         .k(k)
1185         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1186     }
1187   }
1188 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)1189   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
1190     TEST_REQUIRES_ARM_SIMD32;
1191     for (size_t k = 5; k < 8; k++) {
1192       for (uint32_t n = 1; n <= 1; n++) {
1193         for (uint32_t m = 1; m <= 2; m++) {
1194           GemmMicrokernelTester()
1195             .mr(2)
1196             .nr(1)
1197             .kr(4)
1198             .sr(1)
1199             .m(m)
1200             .n(n)
1201             .k(k)
1202             .iterations(1)
1203             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1204         }
1205       }
1206     }
1207   }
1208 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)1209   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
1210     TEST_REQUIRES_ARM_SIMD32;
1211     for (size_t k = 8; k <= 40; k += 4) {
1212       GemmMicrokernelTester()
1213         .mr(2)
1214         .nr(1)
1215         .kr(4)
1216         .sr(1)
1217         .m(2)
1218         .n(1)
1219         .k(k)
1220         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1221     }
1222   }
1223 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)1224   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
1225     TEST_REQUIRES_ARM_SIMD32;
1226     for (size_t k = 8; k <= 40; k += 4) {
1227       for (uint32_t n = 1; n <= 1; n++) {
1228         for (uint32_t m = 1; m <= 2; m++) {
1229           GemmMicrokernelTester()
1230             .mr(2)
1231             .nr(1)
1232             .kr(4)
1233             .sr(1)
1234             .m(m)
1235             .n(n)
1236             .k(k)
1237             .iterations(1)
1238             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1239         }
1240       }
1241     }
1242   }
1243 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)1244   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
1245     TEST_REQUIRES_ARM_SIMD32;
1246     for (uint32_t n = 2; n < 2; n++) {
1247       for (size_t k = 1; k <= 20; k += 5) {
1248         GemmMicrokernelTester()
1249           .mr(2)
1250           .nr(1)
1251           .kr(4)
1252           .sr(1)
1253           .m(2)
1254           .n(n)
1255           .k(k)
1256           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1257       }
1258     }
1259   }
1260 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)1261   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
1262     TEST_REQUIRES_ARM_SIMD32;
1263     for (uint32_t n = 2; n < 2; n++) {
1264       for (size_t k = 1; k <= 20; k += 5) {
1265         GemmMicrokernelTester()
1266           .mr(2)
1267           .nr(1)
1268           .kr(4)
1269           .sr(1)
1270           .m(2)
1271           .n(n)
1272           .k(k)
1273           .cn_stride(3)
1274           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1275       }
1276     }
1277   }
1278 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)1279   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
1280     TEST_REQUIRES_ARM_SIMD32;
1281     for (uint32_t n = 2; n < 2; n++) {
1282       for (size_t k = 1; k <= 20; k += 5) {
1283         for (uint32_t m = 1; m <= 2; m++) {
1284           GemmMicrokernelTester()
1285             .mr(2)
1286             .nr(1)
1287             .kr(4)
1288             .sr(1)
1289             .m(m)
1290             .n(n)
1291             .k(k)
1292             .iterations(1)
1293             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1294         }
1295       }
1296     }
1297   }
1298 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)1299   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
1300     TEST_REQUIRES_ARM_SIMD32;
1301     for (uint32_t n = 2; n <= 3; n += 1) {
1302       for (size_t k = 1; k <= 20; k += 5) {
1303         GemmMicrokernelTester()
1304           .mr(2)
1305           .nr(1)
1306           .kr(4)
1307           .sr(1)
1308           .m(2)
1309           .n(n)
1310           .k(k)
1311           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1312       }
1313     }
1314   }
1315 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)1316   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
1317     TEST_REQUIRES_ARM_SIMD32;
1318     for (uint32_t n = 2; n <= 3; n += 1) {
1319       for (size_t k = 1; k <= 20; k += 5) {
1320         GemmMicrokernelTester()
1321           .mr(2)
1322           .nr(1)
1323           .kr(4)
1324           .sr(1)
1325           .m(2)
1326           .n(n)
1327           .k(k)
1328           .cn_stride(3)
1329           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1330       }
1331     }
1332   }
1333 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)1334   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
1335     TEST_REQUIRES_ARM_SIMD32;
1336     for (uint32_t n = 2; n <= 3; n += 1) {
1337       for (size_t k = 1; k <= 20; k += 5) {
1338         for (uint32_t m = 1; m <= 2; m++) {
1339           GemmMicrokernelTester()
1340             .mr(2)
1341             .nr(1)
1342             .kr(4)
1343             .sr(1)
1344             .m(m)
1345             .n(n)
1346             .k(k)
1347             .iterations(1)
1348             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1349         }
1350       }
1351     }
1352   }
1353 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel)1354   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel) {
1355     TEST_REQUIRES_ARM_SIMD32;
1356     for (size_t k = 1; k <= 20; k += 5) {
1357       GemmMicrokernelTester()
1358         .mr(2)
1359         .nr(1)
1360         .kr(4)
1361         .sr(1)
1362         .m(2)
1363         .n(1)
1364         .k(k)
1365         .ks(3)
1366         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1367     }
1368   }
1369 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel_subtile)1370   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel_subtile) {
1371     TEST_REQUIRES_ARM_SIMD32;
1372     for (size_t k = 1; k <= 20; k += 5) {
1373       for (uint32_t n = 1; n <= 1; n++) {
1374         for (uint32_t m = 1; m <= 2; m++) {
1375           GemmMicrokernelTester()
1376             .mr(2)
1377             .nr(1)
1378             .kr(4)
1379             .sr(1)
1380             .m(m)
1381             .n(n)
1382             .k(k)
1383             .ks(3)
1384             .iterations(1)
1385             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1386         }
1387       }
1388     }
1389   }
1390 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_small_kernel)1391   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_small_kernel) {
1392     TEST_REQUIRES_ARM_SIMD32;
1393     for (uint32_t n = 2; n < 2; n++) {
1394       for (size_t k = 1; k <= 20; k += 5) {
1395         GemmMicrokernelTester()
1396           .mr(2)
1397           .nr(1)
1398           .kr(4)
1399           .sr(1)
1400           .m(2)
1401           .n(n)
1402           .k(k)
1403           .ks(3)
1404           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1405       }
1406     }
1407   }
1408 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_small_kernel)1409   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_small_kernel) {
1410     TEST_REQUIRES_ARM_SIMD32;
1411     for (uint32_t n = 2; n <= 3; n += 1) {
1412       for (size_t k = 1; k <= 20; k += 5) {
1413         GemmMicrokernelTester()
1414           .mr(2)
1415           .nr(1)
1416           .kr(4)
1417           .sr(1)
1418           .m(2)
1419           .n(n)
1420           .k(k)
1421           .ks(3)
1422           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1423       }
1424     }
1425   }
1426 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)1427   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
1428     TEST_REQUIRES_ARM_SIMD32;
1429     for (size_t k = 1; k <= 20; k += 5) {
1430       for (uint32_t n = 1; n <= 1; n++) {
1431         for (uint32_t m = 1; m <= 2; m++) {
1432           GemmMicrokernelTester()
1433             .mr(2)
1434             .nr(1)
1435             .kr(4)
1436             .sr(1)
1437             .m(m)
1438             .n(n)
1439             .k(k)
1440             .cm_stride(3)
1441             .iterations(1)
1442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1443         }
1444       }
1445     }
1446   }
1447 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,a_offset)1448   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, a_offset) {
1449     TEST_REQUIRES_ARM_SIMD32;
1450     for (size_t k = 1; k <= 20; k += 5) {
1451       GemmMicrokernelTester()
1452         .mr(2)
1453         .nr(1)
1454         .kr(4)
1455         .sr(1)
1456         .m(2)
1457         .n(1)
1458         .k(k)
1459         .ks(3)
1460         .a_offset(43)
1461         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1462     }
1463   }
1464 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,zero)1465   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, zero) {
1466     TEST_REQUIRES_ARM_SIMD32;
1467     for (size_t k = 1; k <= 20; k += 5) {
1468       for (uint32_t mz = 0; mz < 2; mz++) {
1469         GemmMicrokernelTester()
1470           .mr(2)
1471           .nr(1)
1472           .kr(4)
1473           .sr(1)
1474           .m(2)
1475           .n(1)
1476           .k(k)
1477           .ks(3)
1478           .a_offset(43)
1479           .zero_index(mz)
1480           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1481       }
1482     }
1483   }
1484 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)1485   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
1486     TEST_REQUIRES_ARM_SIMD32;
1487     GemmMicrokernelTester()
1488       .mr(2)
1489       .nr(1)
1490       .kr(4)
1491       .sr(1)
1492       .m(2)
1493       .n(1)
1494       .k(4)
1495       .qmin(128)
1496       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1497   }
1498 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)1499   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
1500     TEST_REQUIRES_ARM_SIMD32;
1501     GemmMicrokernelTester()
1502       .mr(2)
1503       .nr(1)
1504       .kr(4)
1505       .sr(1)
1506       .m(2)
1507       .n(1)
1508       .k(4)
1509       .qmax(128)
1510       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1511   }
1512 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)1513   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
1514     TEST_REQUIRES_ARM_SIMD32;
1515     GemmMicrokernelTester()
1516       .mr(2)
1517       .nr(1)
1518       .kr(4)
1519       .sr(1)
1520       .m(2)
1521       .n(1)
1522       .k(4)
1523       .cm_stride(3)
1524       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1525   }
1526 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_a_zero_point)1527   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_a_zero_point) {
1528     TEST_REQUIRES_ARM_SIMD32;
1529     for (size_t k = 1; k <= 20; k += 5) {
1530       GemmMicrokernelTester()
1531         .mr(2)
1532         .nr(1)
1533         .kr(4)
1534         .sr(1)
1535         .m(2)
1536         .n(1)
1537         .k(k)
1538         .a_zero_point(0)
1539         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1540     }
1541   }
1542 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_b_zero_point)1543   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_b_zero_point) {
1544     TEST_REQUIRES_ARM_SIMD32;
1545     for (size_t k = 1; k <= 20; k += 5) {
1546       GemmMicrokernelTester()
1547         .mr(2)
1548         .nr(1)
1549         .kr(4)
1550         .sr(1)
1551         .m(2)
1552         .n(1)
1553         .k(k)
1554         .b_zero_point(0)
1555         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1556     }
1557   }
1558 
TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,no_zero_point)1559   TEST(QU8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, no_zero_point) {
1560     TEST_REQUIRES_ARM_SIMD32;
1561     for (size_t k = 1; k <= 20; k += 5) {
1562       GemmMicrokernelTester()
1563         .mr(2)
1564         .nr(1)
1565         .kr(4)
1566         .sr(1)
1567         .m(2)
1568         .n(1)
1569         .k(k)
1570         .a_zero_point(0)
1571         .b_zero_point(0)
1572         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1573     }
1574   }
1575 #endif  // XNN_ARCH_ARM
1576 
1577 
1578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8)1579   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
1580     TEST_REQUIRES_ARM_NEON_V8;
1581     GemmMicrokernelTester()
1582       .mr(1)
1583       .nr(16)
1584       .kr(1)
1585       .sr(1)
1586       .m(1)
1587       .n(16)
1588       .k(8)
1589       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1590   }
1591 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cn)1592   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
1593     TEST_REQUIRES_ARM_NEON_V8;
1594     GemmMicrokernelTester()
1595       .mr(1)
1596       .nr(16)
1597       .kr(1)
1598       .sr(1)
1599       .m(1)
1600       .n(16)
1601       .k(8)
1602       .cn_stride(19)
1603       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1604   }
1605 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile)1606   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1607     TEST_REQUIRES_ARM_NEON_V8;
1608     for (uint32_t n = 1; n <= 16; n++) {
1609       for (uint32_t m = 1; m <= 1; m++) {
1610         GemmMicrokernelTester()
1611           .mr(1)
1612           .nr(16)
1613           .kr(1)
1614           .sr(1)
1615           .m(m)
1616           .n(n)
1617           .k(8)
1618           .iterations(1)
1619           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1620       }
1621     }
1622   }
1623 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)1624   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1625     TEST_REQUIRES_ARM_NEON_V8;
1626     for (uint32_t m = 1; m <= 1; m++) {
1627       GemmMicrokernelTester()
1628         .mr(1)
1629         .nr(16)
1630         .kr(1)
1631         .sr(1)
1632         .m(m)
1633         .n(16)
1634         .k(8)
1635         .iterations(1)
1636         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1637     }
1638   }
1639 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)1640   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1641     TEST_REQUIRES_ARM_NEON_V8;
1642     for (uint32_t n = 1; n <= 16; n++) {
1643       GemmMicrokernelTester()
1644         .mr(1)
1645         .nr(16)
1646         .kr(1)
1647         .sr(1)
1648         .m(1)
1649         .n(n)
1650         .k(8)
1651         .iterations(1)
1652         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1653     }
1654   }
1655 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8)1656   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
1657     TEST_REQUIRES_ARM_NEON_V8;
1658     for (size_t k = 1; k < 8; k++) {
1659       GemmMicrokernelTester()
1660         .mr(1)
1661         .nr(16)
1662         .kr(1)
1663         .sr(1)
1664         .m(1)
1665         .n(16)
1666         .k(k)
1667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1668     }
1669   }
1670 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_subtile)1671   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1672     TEST_REQUIRES_ARM_NEON_V8;
1673     for (size_t k = 1; k < 8; k++) {
1674       for (uint32_t n = 1; n <= 16; n++) {
1675         for (uint32_t m = 1; m <= 1; m++) {
1676           GemmMicrokernelTester()
1677             .mr(1)
1678             .nr(16)
1679             .kr(1)
1680             .sr(1)
1681             .m(m)
1682             .n(n)
1683             .k(k)
1684             .iterations(1)
1685             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1686         }
1687       }
1688     }
1689   }
1690 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8)1691   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
1692     TEST_REQUIRES_ARM_NEON_V8;
1693     for (size_t k = 9; k < 16; k++) {
1694       GemmMicrokernelTester()
1695         .mr(1)
1696         .nr(16)
1697         .kr(1)
1698         .sr(1)
1699         .m(1)
1700         .n(16)
1701         .k(k)
1702         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1703     }
1704   }
1705 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_subtile)1706   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1707     TEST_REQUIRES_ARM_NEON_V8;
1708     for (size_t k = 9; k < 16; k++) {
1709       for (uint32_t n = 1; n <= 16; n++) {
1710         for (uint32_t m = 1; m <= 1; m++) {
1711           GemmMicrokernelTester()
1712             .mr(1)
1713             .nr(16)
1714             .kr(1)
1715             .sr(1)
1716             .m(m)
1717             .n(n)
1718             .k(k)
1719             .iterations(1)
1720             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1721         }
1722       }
1723     }
1724   }
1725 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8)1726   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
1727     TEST_REQUIRES_ARM_NEON_V8;
1728     for (size_t k = 16; k <= 80; k += 8) {
1729       GemmMicrokernelTester()
1730         .mr(1)
1731         .nr(16)
1732         .kr(1)
1733         .sr(1)
1734         .m(1)
1735         .n(16)
1736         .k(k)
1737         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1738     }
1739   }
1740 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_subtile)1741   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1742     TEST_REQUIRES_ARM_NEON_V8;
1743     for (size_t k = 16; k <= 80; k += 8) {
1744       for (uint32_t n = 1; n <= 16; n++) {
1745         for (uint32_t m = 1; m <= 1; m++) {
1746           GemmMicrokernelTester()
1747             .mr(1)
1748             .nr(16)
1749             .kr(1)
1750             .sr(1)
1751             .m(m)
1752             .n(n)
1753             .k(k)
1754             .iterations(1)
1755             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16)1761   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
1762     TEST_REQUIRES_ARM_NEON_V8;
1763     for (uint32_t n = 17; n < 32; n++) {
1764       for (size_t k = 1; k <= 40; k += 9) {
1765         GemmMicrokernelTester()
1766           .mr(1)
1767           .nr(16)
1768           .kr(1)
1769           .sr(1)
1770           .m(1)
1771           .n(n)
1772           .k(k)
1773           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1774       }
1775     }
1776   }
1777 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)1778   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1779     TEST_REQUIRES_ARM_NEON_V8;
1780     for (uint32_t n = 17; n < 32; n++) {
1781       for (size_t k = 1; k <= 40; k += 9) {
1782         GemmMicrokernelTester()
1783           .mr(1)
1784           .nr(16)
1785           .kr(1)
1786           .sr(1)
1787           .m(1)
1788           .n(n)
1789           .k(k)
1790           .cn_stride(19)
1791           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1792       }
1793     }
1794   }
1795 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_subtile)1796   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1797     TEST_REQUIRES_ARM_NEON_V8;
1798     for (uint32_t n = 17; n < 32; n++) {
1799       for (size_t k = 1; k <= 40; k += 9) {
1800         for (uint32_t m = 1; m <= 1; m++) {
1801           GemmMicrokernelTester()
1802             .mr(1)
1803             .nr(16)
1804             .kr(1)
1805             .sr(1)
1806             .m(m)
1807             .n(n)
1808             .k(k)
1809             .iterations(1)
1810             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1811         }
1812       }
1813     }
1814   }
1815 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16)1816   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
1817     TEST_REQUIRES_ARM_NEON_V8;
1818     for (uint32_t n = 32; n <= 48; n += 16) {
1819       for (size_t k = 1; k <= 40; k += 9) {
1820         GemmMicrokernelTester()
1821           .mr(1)
1822           .nr(16)
1823           .kr(1)
1824           .sr(1)
1825           .m(1)
1826           .n(n)
1827           .k(k)
1828           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1829       }
1830     }
1831   }
1832 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)1833   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1834     TEST_REQUIRES_ARM_NEON_V8;
1835     for (uint32_t n = 32; n <= 48; n += 16) {
1836       for (size_t k = 1; k <= 40; k += 9) {
1837         GemmMicrokernelTester()
1838           .mr(1)
1839           .nr(16)
1840           .kr(1)
1841           .sr(1)
1842           .m(1)
1843           .n(n)
1844           .k(k)
1845           .cn_stride(19)
1846           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1847       }
1848     }
1849   }
1850 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_subtile)1851   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1852     TEST_REQUIRES_ARM_NEON_V8;
1853     for (uint32_t n = 32; n <= 48; n += 16) {
1854       for (size_t k = 1; k <= 40; k += 9) {
1855         for (uint32_t m = 1; m <= 1; m++) {
1856           GemmMicrokernelTester()
1857             .mr(1)
1858             .nr(16)
1859             .kr(1)
1860             .sr(1)
1861             .m(m)
1862             .n(n)
1863             .k(k)
1864             .iterations(1)
1865             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1866         }
1867       }
1868     }
1869   }
1870 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel)1871   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
1872     TEST_REQUIRES_ARM_NEON_V8;
1873     for (size_t k = 1; k <= 40; k += 9) {
1874       GemmMicrokernelTester()
1875         .mr(1)
1876         .nr(16)
1877         .kr(1)
1878         .sr(1)
1879         .m(1)
1880         .n(16)
1881         .k(k)
1882         .ks(3)
1883         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1884     }
1885   }
1886 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel_subtile)1887   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1888     TEST_REQUIRES_ARM_NEON_V8;
1889     for (size_t k = 1; k <= 40; k += 9) {
1890       for (uint32_t n = 1; n <= 16; n++) {
1891         for (uint32_t m = 1; m <= 1; m++) {
1892           GemmMicrokernelTester()
1893             .mr(1)
1894             .nr(16)
1895             .kr(1)
1896             .sr(1)
1897             .m(m)
1898             .n(n)
1899             .k(k)
1900             .ks(3)
1901             .iterations(1)
1902             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1903         }
1904       }
1905     }
1906   }
1907 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)1908   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1909     TEST_REQUIRES_ARM_NEON_V8;
1910     for (uint32_t n = 17; n < 32; n++) {
1911       for (size_t k = 1; k <= 40; k += 9) {
1912         GemmMicrokernelTester()
1913           .mr(1)
1914           .nr(16)
1915           .kr(1)
1916           .sr(1)
1917           .m(1)
1918           .n(n)
1919           .k(k)
1920           .ks(3)
1921           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1922       }
1923     }
1924   }
1925 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)1926   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1927     TEST_REQUIRES_ARM_NEON_V8;
1928     for (uint32_t n = 32; n <= 48; n += 16) {
1929       for (size_t k = 1; k <= 40; k += 9) {
1930         GemmMicrokernelTester()
1931           .mr(1)
1932           .nr(16)
1933           .kr(1)
1934           .sr(1)
1935           .m(1)
1936           .n(n)
1937           .k(k)
1938           .ks(3)
1939           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1940       }
1941     }
1942   }
1943 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm_subtile)1944   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1945     TEST_REQUIRES_ARM_NEON_V8;
1946     for (size_t k = 1; k <= 40; k += 9) {
1947       for (uint32_t n = 1; n <= 16; n++) {
1948         for (uint32_t m = 1; m <= 1; m++) {
1949           GemmMicrokernelTester()
1950             .mr(1)
1951             .nr(16)
1952             .kr(1)
1953             .sr(1)
1954             .m(m)
1955             .n(n)
1956             .k(k)
1957             .cm_stride(19)
1958             .iterations(1)
1959             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1960         }
1961       }
1962     }
1963   }
1964 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,a_offset)1965   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
1966     TEST_REQUIRES_ARM_NEON_V8;
1967     for (size_t k = 1; k <= 40; k += 9) {
1968       GemmMicrokernelTester()
1969         .mr(1)
1970         .nr(16)
1971         .kr(1)
1972         .sr(1)
1973         .m(1)
1974         .n(16)
1975         .k(k)
1976         .ks(3)
1977         .a_offset(43)
1978         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1979     }
1980   }
1981 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,zero)1982   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
1983     TEST_REQUIRES_ARM_NEON_V8;
1984     for (size_t k = 1; k <= 40; k += 9) {
1985       for (uint32_t mz = 0; mz < 1; mz++) {
1986         GemmMicrokernelTester()
1987           .mr(1)
1988           .nr(16)
1989           .kr(1)
1990           .sr(1)
1991           .m(1)
1992           .n(16)
1993           .k(k)
1994           .ks(3)
1995           .a_offset(43)
1996           .zero_index(mz)
1997           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1998       }
1999     }
2000   }
2001 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmin)2002   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
2003     TEST_REQUIRES_ARM_NEON_V8;
2004     GemmMicrokernelTester()
2005       .mr(1)
2006       .nr(16)
2007       .kr(1)
2008       .sr(1)
2009       .m(1)
2010       .n(16)
2011       .k(8)
2012       .qmin(128)
2013       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2014   }
2015 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmax)2016   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
2017     TEST_REQUIRES_ARM_NEON_V8;
2018     GemmMicrokernelTester()
2019       .mr(1)
2020       .nr(16)
2021       .kr(1)
2022       .sr(1)
2023       .m(1)
2024       .n(16)
2025       .k(8)
2026       .qmax(128)
2027       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2028   }
2029 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm)2030   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
2031     TEST_REQUIRES_ARM_NEON_V8;
2032     GemmMicrokernelTester()
2033       .mr(1)
2034       .nr(16)
2035       .kr(1)
2036       .sr(1)
2037       .m(1)
2038       .n(16)
2039       .k(8)
2040       .cm_stride(19)
2041       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2042   }
2043 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_a_zero_point)2044   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_a_zero_point) {
2045     TEST_REQUIRES_ARM_NEON_V8;
2046     for (size_t k = 1; k <= 40; k += 9) {
2047       GemmMicrokernelTester()
2048         .mr(1)
2049         .nr(16)
2050         .kr(1)
2051         .sr(1)
2052         .m(1)
2053         .n(16)
2054         .k(k)
2055         .a_zero_point(0)
2056         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2057     }
2058   }
2059 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_b_zero_point)2060   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_b_zero_point) {
2061     TEST_REQUIRES_ARM_NEON_V8;
2062     for (size_t k = 1; k <= 40; k += 9) {
2063       GemmMicrokernelTester()
2064         .mr(1)
2065         .nr(16)
2066         .kr(1)
2067         .sr(1)
2068         .m(1)
2069         .n(16)
2070         .k(k)
2071         .b_zero_point(0)
2072         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2073     }
2074   }
2075 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,no_zero_point)2076   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_zero_point) {
2077     TEST_REQUIRES_ARM_NEON_V8;
2078     for (size_t k = 1; k <= 40; k += 9) {
2079       GemmMicrokernelTester()
2080         .mr(1)
2081         .nr(16)
2082         .kr(1)
2083         .sr(1)
2084         .m(1)
2085         .n(16)
2086         .k(k)
2087         .a_zero_point(0)
2088         .b_zero_point(0)
2089         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2090     }
2091   }
2092 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2093 
2094 
2095 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8)2096   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
2097     TEST_REQUIRES_ARM_NEON_DOT;
2098     GemmMicrokernelTester()
2099       .mr(1)
2100       .nr(16)
2101       .kr(4)
2102       .sr(1)
2103       .m(1)
2104       .n(16)
2105       .k(8)
2106       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2107   }
2108 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cn)2109   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
2110     TEST_REQUIRES_ARM_NEON_DOT;
2111     GemmMicrokernelTester()
2112       .mr(1)
2113       .nr(16)
2114       .kr(4)
2115       .sr(1)
2116       .m(1)
2117       .n(16)
2118       .k(8)
2119       .cn_stride(19)
2120       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2121   }
2122 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile)2123   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
2124     TEST_REQUIRES_ARM_NEON_DOT;
2125     for (uint32_t n = 1; n <= 16; n++) {
2126       for (uint32_t m = 1; m <= 1; m++) {
2127         GemmMicrokernelTester()
2128           .mr(1)
2129           .nr(16)
2130           .kr(4)
2131           .sr(1)
2132           .m(m)
2133           .n(n)
2134           .k(8)
2135           .iterations(1)
2136           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2137       }
2138     }
2139   }
2140 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_m)2141   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
2142     TEST_REQUIRES_ARM_NEON_DOT;
2143     for (uint32_t m = 1; m <= 1; m++) {
2144       GemmMicrokernelTester()
2145         .mr(1)
2146         .nr(16)
2147         .kr(4)
2148         .sr(1)
2149         .m(m)
2150         .n(16)
2151         .k(8)
2152         .iterations(1)
2153         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2154     }
2155   }
2156 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_n)2157   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
2158     TEST_REQUIRES_ARM_NEON_DOT;
2159     for (uint32_t n = 1; n <= 16; n++) {
2160       GemmMicrokernelTester()
2161         .mr(1)
2162         .nr(16)
2163         .kr(4)
2164         .sr(1)
2165         .m(1)
2166         .n(n)
2167         .k(8)
2168         .iterations(1)
2169         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2170     }
2171   }
2172 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8)2173   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
2174     TEST_REQUIRES_ARM_NEON_DOT;
2175     for (size_t k = 1; k < 8; k++) {
2176       GemmMicrokernelTester()
2177         .mr(1)
2178         .nr(16)
2179         .kr(4)
2180         .sr(1)
2181         .m(1)
2182         .n(16)
2183         .k(k)
2184         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2185     }
2186   }
2187 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_subtile)2188   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
2189     TEST_REQUIRES_ARM_NEON_DOT;
2190     for (size_t k = 1; k < 8; k++) {
2191       for (uint32_t n = 1; n <= 16; n++) {
2192         for (uint32_t m = 1; m <= 1; m++) {
2193           GemmMicrokernelTester()
2194             .mr(1)
2195             .nr(16)
2196             .kr(4)
2197             .sr(1)
2198             .m(m)
2199             .n(n)
2200             .k(k)
2201             .iterations(1)
2202             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2203         }
2204       }
2205     }
2206   }
2207 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8)2208   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
2209     TEST_REQUIRES_ARM_NEON_DOT;
2210     for (size_t k = 9; k < 16; k++) {
2211       GemmMicrokernelTester()
2212         .mr(1)
2213         .nr(16)
2214         .kr(4)
2215         .sr(1)
2216         .m(1)
2217         .n(16)
2218         .k(k)
2219         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2220     }
2221   }
2222 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_subtile)2223   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
2224     TEST_REQUIRES_ARM_NEON_DOT;
2225     for (size_t k = 9; k < 16; k++) {
2226       for (uint32_t n = 1; n <= 16; n++) {
2227         for (uint32_t m = 1; m <= 1; m++) {
2228           GemmMicrokernelTester()
2229             .mr(1)
2230             .nr(16)
2231             .kr(4)
2232             .sr(1)
2233             .m(m)
2234             .n(n)
2235             .k(k)
2236             .iterations(1)
2237             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2238         }
2239       }
2240     }
2241   }
2242 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8)2243   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
2244     TEST_REQUIRES_ARM_NEON_DOT;
2245     for (size_t k = 16; k <= 80; k += 8) {
2246       GemmMicrokernelTester()
2247         .mr(1)
2248         .nr(16)
2249         .kr(4)
2250         .sr(1)
2251         .m(1)
2252         .n(16)
2253         .k(k)
2254         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2255     }
2256   }
2257 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_subtile)2258   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
2259     TEST_REQUIRES_ARM_NEON_DOT;
2260     for (size_t k = 16; k <= 80; k += 8) {
2261       for (uint32_t n = 1; n <= 16; n++) {
2262         for (uint32_t m = 1; m <= 1; m++) {
2263           GemmMicrokernelTester()
2264             .mr(1)
2265             .nr(16)
2266             .kr(4)
2267             .sr(1)
2268             .m(m)
2269             .n(n)
2270             .k(k)
2271             .iterations(1)
2272             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2273         }
2274       }
2275     }
2276   }
2277 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16)2278   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
2279     TEST_REQUIRES_ARM_NEON_DOT;
2280     for (uint32_t n = 17; n < 32; n++) {
2281       for (size_t k = 1; k <= 40; k += 9) {
2282         GemmMicrokernelTester()
2283           .mr(1)
2284           .nr(16)
2285           .kr(4)
2286           .sr(1)
2287           .m(1)
2288           .n(n)
2289           .k(k)
2290           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2291       }
2292     }
2293   }
2294 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_cn)2295   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
2296     TEST_REQUIRES_ARM_NEON_DOT;
2297     for (uint32_t n = 17; n < 32; n++) {
2298       for (size_t k = 1; k <= 40; k += 9) {
2299         GemmMicrokernelTester()
2300           .mr(1)
2301           .nr(16)
2302           .kr(4)
2303           .sr(1)
2304           .m(1)
2305           .n(n)
2306           .k(k)
2307           .cn_stride(19)
2308           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2309       }
2310     }
2311   }
2312 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_subtile)2313   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
2314     TEST_REQUIRES_ARM_NEON_DOT;
2315     for (uint32_t n = 17; n < 32; n++) {
2316       for (size_t k = 1; k <= 40; k += 9) {
2317         for (uint32_t m = 1; m <= 1; m++) {
2318           GemmMicrokernelTester()
2319             .mr(1)
2320             .nr(16)
2321             .kr(4)
2322             .sr(1)
2323             .m(m)
2324             .n(n)
2325             .k(k)
2326             .iterations(1)
2327             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2328         }
2329       }
2330     }
2331   }
2332 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16)2333   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
2334     TEST_REQUIRES_ARM_NEON_DOT;
2335     for (uint32_t n = 32; n <= 48; n += 16) {
2336       for (size_t k = 1; k <= 40; k += 9) {
2337         GemmMicrokernelTester()
2338           .mr(1)
2339           .nr(16)
2340           .kr(4)
2341           .sr(1)
2342           .m(1)
2343           .n(n)
2344           .k(k)
2345           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2346       }
2347     }
2348   }
2349 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_cn)2350   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
2351     TEST_REQUIRES_ARM_NEON_DOT;
2352     for (uint32_t n = 32; n <= 48; n += 16) {
2353       for (size_t k = 1; k <= 40; k += 9) {
2354         GemmMicrokernelTester()
2355           .mr(1)
2356           .nr(16)
2357           .kr(4)
2358           .sr(1)
2359           .m(1)
2360           .n(n)
2361           .k(k)
2362           .cn_stride(19)
2363           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2364       }
2365     }
2366   }
2367 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_subtile)2368   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
2369     TEST_REQUIRES_ARM_NEON_DOT;
2370     for (uint32_t n = 32; n <= 48; n += 16) {
2371       for (size_t k = 1; k <= 40; k += 9) {
2372         for (uint32_t m = 1; m <= 1; m++) {
2373           GemmMicrokernelTester()
2374             .mr(1)
2375             .nr(16)
2376             .kr(4)
2377             .sr(1)
2378             .m(m)
2379             .n(n)
2380             .k(k)
2381             .iterations(1)
2382             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2383         }
2384       }
2385     }
2386   }
2387 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel)2388   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel) {
2389     TEST_REQUIRES_ARM_NEON_DOT;
2390     for (size_t k = 1; k <= 40; k += 9) {
2391       GemmMicrokernelTester()
2392         .mr(1)
2393         .nr(16)
2394         .kr(4)
2395         .sr(1)
2396         .m(1)
2397         .n(16)
2398         .k(k)
2399         .ks(3)
2400         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2401     }
2402   }
2403 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel_subtile)2404   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel_subtile) {
2405     TEST_REQUIRES_ARM_NEON_DOT;
2406     for (size_t k = 1; k <= 40; k += 9) {
2407       for (uint32_t n = 1; n <= 16; n++) {
2408         for (uint32_t m = 1; m <= 1; m++) {
2409           GemmMicrokernelTester()
2410             .mr(1)
2411             .nr(16)
2412             .kr(4)
2413             .sr(1)
2414             .m(m)
2415             .n(n)
2416             .k(k)
2417             .ks(3)
2418             .iterations(1)
2419             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2420         }
2421       }
2422     }
2423   }
2424 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_small_kernel)2425   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_small_kernel) {
2426     TEST_REQUIRES_ARM_NEON_DOT;
2427     for (uint32_t n = 17; n < 32; n++) {
2428       for (size_t k = 1; k <= 40; k += 9) {
2429         GemmMicrokernelTester()
2430           .mr(1)
2431           .nr(16)
2432           .kr(4)
2433           .sr(1)
2434           .m(1)
2435           .n(n)
2436           .k(k)
2437           .ks(3)
2438           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2439       }
2440     }
2441   }
2442 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_small_kernel)2443   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_small_kernel) {
2444     TEST_REQUIRES_ARM_NEON_DOT;
2445     for (uint32_t n = 32; n <= 48; n += 16) {
2446       for (size_t k = 1; k <= 40; k += 9) {
2447         GemmMicrokernelTester()
2448           .mr(1)
2449           .nr(16)
2450           .kr(4)
2451           .sr(1)
2452           .m(1)
2453           .n(n)
2454           .k(k)
2455           .ks(3)
2456           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2457       }
2458     }
2459   }
2460 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm_subtile)2461   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
2462     TEST_REQUIRES_ARM_NEON_DOT;
2463     for (size_t k = 1; k <= 40; k += 9) {
2464       for (uint32_t n = 1; n <= 16; n++) {
2465         for (uint32_t m = 1; m <= 1; m++) {
2466           GemmMicrokernelTester()
2467             .mr(1)
2468             .nr(16)
2469             .kr(4)
2470             .sr(1)
2471             .m(m)
2472             .n(n)
2473             .k(k)
2474             .cm_stride(19)
2475             .iterations(1)
2476             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2477         }
2478       }
2479     }
2480   }
2481 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,a_offset)2482   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, a_offset) {
2483     TEST_REQUIRES_ARM_NEON_DOT;
2484     for (size_t k = 1; k <= 40; k += 9) {
2485       GemmMicrokernelTester()
2486         .mr(1)
2487         .nr(16)
2488         .kr(4)
2489         .sr(1)
2490         .m(1)
2491         .n(16)
2492         .k(k)
2493         .ks(3)
2494         .a_offset(43)
2495         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2496     }
2497   }
2498 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,zero)2499   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, zero) {
2500     TEST_REQUIRES_ARM_NEON_DOT;
2501     for (size_t k = 1; k <= 40; k += 9) {
2502       for (uint32_t mz = 0; mz < 1; mz++) {
2503         GemmMicrokernelTester()
2504           .mr(1)
2505           .nr(16)
2506           .kr(4)
2507           .sr(1)
2508           .m(1)
2509           .n(16)
2510           .k(k)
2511           .ks(3)
2512           .a_offset(43)
2513           .zero_index(mz)
2514           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2515       }
2516     }
2517   }
2518 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmin)2519   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
2520     TEST_REQUIRES_ARM_NEON_DOT;
2521     GemmMicrokernelTester()
2522       .mr(1)
2523       .nr(16)
2524       .kr(4)
2525       .sr(1)
2526       .m(1)
2527       .n(16)
2528       .k(8)
2529       .qmin(128)
2530       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2531   }
2532 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmax)2533   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
2534     TEST_REQUIRES_ARM_NEON_DOT;
2535     GemmMicrokernelTester()
2536       .mr(1)
2537       .nr(16)
2538       .kr(4)
2539       .sr(1)
2540       .m(1)
2541       .n(16)
2542       .k(8)
2543       .qmax(128)
2544       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2545   }
2546 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm)2547   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
2548     TEST_REQUIRES_ARM_NEON_DOT;
2549     GemmMicrokernelTester()
2550       .mr(1)
2551       .nr(16)
2552       .kr(4)
2553       .sr(1)
2554       .m(1)
2555       .n(16)
2556       .k(8)
2557       .cm_stride(19)
2558       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2559   }
2560 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_a_zero_point)2561   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_a_zero_point) {
2562     TEST_REQUIRES_ARM_NEON_DOT;
2563     for (size_t k = 1; k <= 40; k += 9) {
2564       GemmMicrokernelTester()
2565         .mr(1)
2566         .nr(16)
2567         .kr(4)
2568         .sr(1)
2569         .m(1)
2570         .n(16)
2571         .k(k)
2572         .a_zero_point(0)
2573         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2574     }
2575   }
2576 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_b_zero_point)2577   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_b_zero_point) {
2578     TEST_REQUIRES_ARM_NEON_DOT;
2579     for (size_t k = 1; k <= 40; k += 9) {
2580       GemmMicrokernelTester()
2581         .mr(1)
2582         .nr(16)
2583         .kr(4)
2584         .sr(1)
2585         .m(1)
2586         .n(16)
2587         .k(k)
2588         .b_zero_point(0)
2589         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2590     }
2591   }
2592 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,no_zero_point)2593   TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_zero_point) {
2594     TEST_REQUIRES_ARM_NEON_DOT;
2595     for (size_t k = 1; k <= 40; k += 9) {
2596       GemmMicrokernelTester()
2597         .mr(1)
2598         .nr(16)
2599         .kr(4)
2600         .sr(1)
2601         .m(1)
2602         .n(16)
2603         .k(k)
2604         .a_zero_point(0)
2605         .b_zero_point(0)
2606         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2607     }
2608   }
2609 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
2610 
2611 
2612 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)2613   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
2614     TEST_REQUIRES_ARM_NEON;
2615     GemmMicrokernelTester()
2616       .mr(4)
2617       .nr(16)
2618       .kr(1)
2619       .sr(1)
2620       .m(4)
2621       .n(16)
2622       .k(8)
2623       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2624   }
2625 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)2626   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
2627     TEST_REQUIRES_ARM_NEON;
2628     GemmMicrokernelTester()
2629       .mr(4)
2630       .nr(16)
2631       .kr(1)
2632       .sr(1)
2633       .m(4)
2634       .n(16)
2635       .k(8)
2636       .cn_stride(19)
2637       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2638   }
2639 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)2640   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2641     TEST_REQUIRES_ARM_NEON;
2642     for (uint32_t n = 1; n <= 16; n++) {
2643       for (uint32_t m = 1; m <= 4; m++) {
2644         GemmMicrokernelTester()
2645           .mr(4)
2646           .nr(16)
2647           .kr(1)
2648           .sr(1)
2649           .m(m)
2650           .n(n)
2651           .k(8)
2652           .iterations(1)
2653           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2654       }
2655     }
2656   }
2657 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2658   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2659     TEST_REQUIRES_ARM_NEON;
2660     for (uint32_t m = 1; m <= 4; m++) {
2661       GemmMicrokernelTester()
2662         .mr(4)
2663         .nr(16)
2664         .kr(1)
2665         .sr(1)
2666         .m(m)
2667         .n(16)
2668         .k(8)
2669         .iterations(1)
2670         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2671     }
2672   }
2673 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2674   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2675     TEST_REQUIRES_ARM_NEON;
2676     for (uint32_t n = 1; n <= 16; n++) {
2677       GemmMicrokernelTester()
2678         .mr(4)
2679         .nr(16)
2680         .kr(1)
2681         .sr(1)
2682         .m(4)
2683         .n(n)
2684         .k(8)
2685         .iterations(1)
2686         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2687     }
2688   }
2689 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)2690   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
2691     TEST_REQUIRES_ARM_NEON;
2692     for (size_t k = 1; k < 8; k++) {
2693       GemmMicrokernelTester()
2694         .mr(4)
2695         .nr(16)
2696         .kr(1)
2697         .sr(1)
2698         .m(4)
2699         .n(16)
2700         .k(k)
2701         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2702     }
2703   }
2704 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)2705   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2706     TEST_REQUIRES_ARM_NEON;
2707     for (size_t k = 1; k < 8; k++) {
2708       for (uint32_t n = 1; n <= 16; n++) {
2709         for (uint32_t m = 1; m <= 4; m++) {
2710           GemmMicrokernelTester()
2711             .mr(4)
2712             .nr(16)
2713             .kr(1)
2714             .sr(1)
2715             .m(m)
2716             .n(n)
2717             .k(k)
2718             .iterations(1)
2719             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2720         }
2721       }
2722     }
2723   }
2724 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)2725   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
2726     TEST_REQUIRES_ARM_NEON;
2727     for (size_t k = 9; k < 16; k++) {
2728       GemmMicrokernelTester()
2729         .mr(4)
2730         .nr(16)
2731         .kr(1)
2732         .sr(1)
2733         .m(4)
2734         .n(16)
2735         .k(k)
2736         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2737     }
2738   }
2739 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)2740   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2741     TEST_REQUIRES_ARM_NEON;
2742     for (size_t k = 9; k < 16; k++) {
2743       for (uint32_t n = 1; n <= 16; n++) {
2744         for (uint32_t m = 1; m <= 4; m++) {
2745           GemmMicrokernelTester()
2746             .mr(4)
2747             .nr(16)
2748             .kr(1)
2749             .sr(1)
2750             .m(m)
2751             .n(n)
2752             .k(k)
2753             .iterations(1)
2754             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2755         }
2756       }
2757     }
2758   }
2759 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)2760   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
2761     TEST_REQUIRES_ARM_NEON;
2762     for (size_t k = 16; k <= 80; k += 8) {
2763       GemmMicrokernelTester()
2764         .mr(4)
2765         .nr(16)
2766         .kr(1)
2767         .sr(1)
2768         .m(4)
2769         .n(16)
2770         .k(k)
2771         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2772     }
2773   }
2774 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)2775   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2776     TEST_REQUIRES_ARM_NEON;
2777     for (size_t k = 16; k <= 80; k += 8) {
2778       for (uint32_t n = 1; n <= 16; n++) {
2779         for (uint32_t m = 1; m <= 4; m++) {
2780           GemmMicrokernelTester()
2781             .mr(4)
2782             .nr(16)
2783             .kr(1)
2784             .sr(1)
2785             .m(m)
2786             .n(n)
2787             .k(k)
2788             .iterations(1)
2789             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2790         }
2791       }
2792     }
2793   }
2794 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)2795   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
2796     TEST_REQUIRES_ARM_NEON;
2797     for (uint32_t n = 17; n < 32; n++) {
2798       for (size_t k = 1; k <= 40; k += 9) {
2799         GemmMicrokernelTester()
2800           .mr(4)
2801           .nr(16)
2802           .kr(1)
2803           .sr(1)
2804           .m(4)
2805           .n(n)
2806           .k(k)
2807           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2808       }
2809     }
2810   }
2811 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2812   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2813     TEST_REQUIRES_ARM_NEON;
2814     for (uint32_t n = 17; n < 32; n++) {
2815       for (size_t k = 1; k <= 40; k += 9) {
2816         GemmMicrokernelTester()
2817           .mr(4)
2818           .nr(16)
2819           .kr(1)
2820           .sr(1)
2821           .m(4)
2822           .n(n)
2823           .k(k)
2824           .cn_stride(19)
2825           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2826       }
2827     }
2828   }
2829 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)2830   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2831     TEST_REQUIRES_ARM_NEON;
2832     for (uint32_t n = 17; n < 32; n++) {
2833       for (size_t k = 1; k <= 40; k += 9) {
2834         for (uint32_t m = 1; m <= 4; m++) {
2835           GemmMicrokernelTester()
2836             .mr(4)
2837             .nr(16)
2838             .kr(1)
2839             .sr(1)
2840             .m(m)
2841             .n(n)
2842             .k(k)
2843             .iterations(1)
2844             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2845         }
2846       }
2847     }
2848   }
2849 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)2850   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
2851     TEST_REQUIRES_ARM_NEON;
2852     for (uint32_t n = 32; n <= 48; n += 16) {
2853       for (size_t k = 1; k <= 40; k += 9) {
2854         GemmMicrokernelTester()
2855           .mr(4)
2856           .nr(16)
2857           .kr(1)
2858           .sr(1)
2859           .m(4)
2860           .n(n)
2861           .k(k)
2862           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2863       }
2864     }
2865   }
2866 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)2867   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2868     TEST_REQUIRES_ARM_NEON;
2869     for (uint32_t n = 32; n <= 48; n += 16) {
2870       for (size_t k = 1; k <= 40; k += 9) {
2871         GemmMicrokernelTester()
2872           .mr(4)
2873           .nr(16)
2874           .kr(1)
2875           .sr(1)
2876           .m(4)
2877           .n(n)
2878           .k(k)
2879           .cn_stride(19)
2880           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2881       }
2882     }
2883   }
2884 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)2885   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2886     TEST_REQUIRES_ARM_NEON;
2887     for (uint32_t n = 32; n <= 48; n += 16) {
2888       for (size_t k = 1; k <= 40; k += 9) {
2889         for (uint32_t m = 1; m <= 4; m++) {
2890           GemmMicrokernelTester()
2891             .mr(4)
2892             .nr(16)
2893             .kr(1)
2894             .sr(1)
2895             .m(m)
2896             .n(n)
2897             .k(k)
2898             .iterations(1)
2899             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2900         }
2901       }
2902     }
2903   }
2904 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel)2905   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
2906     TEST_REQUIRES_ARM_NEON;
2907     for (size_t k = 1; k <= 40; k += 9) {
2908       GemmMicrokernelTester()
2909         .mr(4)
2910         .nr(16)
2911         .kr(1)
2912         .sr(1)
2913         .m(4)
2914         .n(16)
2915         .k(k)
2916         .ks(3)
2917         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2918     }
2919   }
2920 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel_subtile)2921   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
2922     TEST_REQUIRES_ARM_NEON;
2923     for (size_t k = 1; k <= 40; k += 9) {
2924       for (uint32_t n = 1; n <= 16; n++) {
2925         for (uint32_t m = 1; m <= 4; m++) {
2926           GemmMicrokernelTester()
2927             .mr(4)
2928             .nr(16)
2929             .kr(1)
2930             .sr(1)
2931             .m(m)
2932             .n(n)
2933             .k(k)
2934             .ks(3)
2935             .iterations(1)
2936             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2937         }
2938       }
2939     }
2940   }
2941 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2942   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2943     TEST_REQUIRES_ARM_NEON;
2944     for (uint32_t n = 17; n < 32; n++) {
2945       for (size_t k = 1; k <= 40; k += 9) {
2946         GemmMicrokernelTester()
2947           .mr(4)
2948           .nr(16)
2949           .kr(1)
2950           .sr(1)
2951           .m(4)
2952           .n(n)
2953           .k(k)
2954           .ks(3)
2955           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2956       }
2957     }
2958   }
2959 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)2960   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2961     TEST_REQUIRES_ARM_NEON;
2962     for (uint32_t n = 32; n <= 48; n += 16) {
2963       for (size_t k = 1; k <= 40; k += 9) {
2964         GemmMicrokernelTester()
2965           .mr(4)
2966           .nr(16)
2967           .kr(1)
2968           .sr(1)
2969           .m(4)
2970           .n(n)
2971           .k(k)
2972           .ks(3)
2973           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2974       }
2975     }
2976   }
2977 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)2978   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2979     TEST_REQUIRES_ARM_NEON;
2980     for (size_t k = 1; k <= 40; k += 9) {
2981       for (uint32_t n = 1; n <= 16; n++) {
2982         for (uint32_t m = 1; m <= 4; m++) {
2983           GemmMicrokernelTester()
2984             .mr(4)
2985             .nr(16)
2986             .kr(1)
2987             .sr(1)
2988             .m(m)
2989             .n(n)
2990             .k(k)
2991             .cm_stride(19)
2992             .iterations(1)
2993             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2994         }
2995       }
2996     }
2997   }
2998 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,a_offset)2999   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
3000     TEST_REQUIRES_ARM_NEON;
3001     for (size_t k = 1; k <= 40; k += 9) {
3002       GemmMicrokernelTester()
3003         .mr(4)
3004         .nr(16)
3005         .kr(1)
3006         .sr(1)
3007         .m(4)
3008         .n(16)
3009         .k(k)
3010         .ks(3)
3011         .a_offset(163)
3012         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3013     }
3014   }
3015 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,zero)3016   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
3017     TEST_REQUIRES_ARM_NEON;
3018     for (size_t k = 1; k <= 40; k += 9) {
3019       for (uint32_t mz = 0; mz < 4; mz++) {
3020         GemmMicrokernelTester()
3021           .mr(4)
3022           .nr(16)
3023           .kr(1)
3024           .sr(1)
3025           .m(4)
3026           .n(16)
3027           .k(k)
3028           .ks(3)
3029           .a_offset(163)
3030           .zero_index(mz)
3031           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3032       }
3033     }
3034   }
3035 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)3036   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
3037     TEST_REQUIRES_ARM_NEON;
3038     GemmMicrokernelTester()
3039       .mr(4)
3040       .nr(16)
3041       .kr(1)
3042       .sr(1)
3043       .m(4)
3044       .n(16)
3045       .k(8)
3046       .qmin(128)
3047       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3048   }
3049 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)3050   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
3051     TEST_REQUIRES_ARM_NEON;
3052     GemmMicrokernelTester()
3053       .mr(4)
3054       .nr(16)
3055       .kr(1)
3056       .sr(1)
3057       .m(4)
3058       .n(16)
3059       .k(8)
3060       .qmax(128)
3061       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3062   }
3063 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)3064   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
3065     TEST_REQUIRES_ARM_NEON;
3066     GemmMicrokernelTester()
3067       .mr(4)
3068       .nr(16)
3069       .kr(1)
3070       .sr(1)
3071       .m(4)
3072       .n(16)
3073       .k(8)
3074       .cm_stride(19)
3075       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3076   }
3077 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_a_zero_point)3078   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
3079     TEST_REQUIRES_ARM_NEON;
3080     for (size_t k = 1; k <= 40; k += 9) {
3081       GemmMicrokernelTester()
3082         .mr(4)
3083         .nr(16)
3084         .kr(1)
3085         .sr(1)
3086         .m(4)
3087         .n(16)
3088         .k(k)
3089         .a_zero_point(0)
3090         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3091     }
3092   }
3093 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_b_zero_point)3094   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
3095     TEST_REQUIRES_ARM_NEON;
3096     for (size_t k = 1; k <= 40; k += 9) {
3097       GemmMicrokernelTester()
3098         .mr(4)
3099         .nr(16)
3100         .kr(1)
3101         .sr(1)
3102         .m(4)
3103         .n(16)
3104         .k(k)
3105         .b_zero_point(0)
3106         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3107     }
3108   }
3109 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,no_zero_point)3110   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
3111     TEST_REQUIRES_ARM_NEON;
3112     for (size_t k = 1; k <= 40; k += 9) {
3113       GemmMicrokernelTester()
3114         .mr(4)
3115         .nr(16)
3116         .kr(1)
3117         .sr(1)
3118         .m(4)
3119         .n(16)
3120         .k(k)
3121         .a_zero_point(0)
3122         .b_zero_point(0)
3123         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3124     }
3125   }
3126 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3127 
3128 
3129 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)3130   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
3131     TEST_REQUIRES_ARM_NEON_V8;
3132     GemmMicrokernelTester()
3133       .mr(4)
3134       .nr(16)
3135       .kr(1)
3136       .sr(1)
3137       .m(4)
3138       .n(16)
3139       .k(8)
3140       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3141   }
3142 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)3143   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
3144     TEST_REQUIRES_ARM_NEON_V8;
3145     GemmMicrokernelTester()
3146       .mr(4)
3147       .nr(16)
3148       .kr(1)
3149       .sr(1)
3150       .m(4)
3151       .n(16)
3152       .k(8)
3153       .cn_stride(19)
3154       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3155   }
3156 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)3157   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
3158     TEST_REQUIRES_ARM_NEON_V8;
3159     for (uint32_t n = 1; n <= 16; n++) {
3160       for (uint32_t m = 1; m <= 4; m++) {
3161         GemmMicrokernelTester()
3162           .mr(4)
3163           .nr(16)
3164           .kr(1)
3165           .sr(1)
3166           .m(m)
3167           .n(n)
3168           .k(8)
3169           .iterations(1)
3170           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3171       }
3172     }
3173   }
3174 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)3175   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
3176     TEST_REQUIRES_ARM_NEON_V8;
3177     for (uint32_t m = 1; m <= 4; m++) {
3178       GemmMicrokernelTester()
3179         .mr(4)
3180         .nr(16)
3181         .kr(1)
3182         .sr(1)
3183         .m(m)
3184         .n(16)
3185         .k(8)
3186         .iterations(1)
3187         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3188     }
3189   }
3190 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)3191   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
3192     TEST_REQUIRES_ARM_NEON_V8;
3193     for (uint32_t n = 1; n <= 16; n++) {
3194       GemmMicrokernelTester()
3195         .mr(4)
3196         .nr(16)
3197         .kr(1)
3198         .sr(1)
3199         .m(4)
3200         .n(n)
3201         .k(8)
3202         .iterations(1)
3203         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3204     }
3205   }
3206 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)3207   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
3208     TEST_REQUIRES_ARM_NEON_V8;
3209     for (size_t k = 1; k < 8; k++) {
3210       GemmMicrokernelTester()
3211         .mr(4)
3212         .nr(16)
3213         .kr(1)
3214         .sr(1)
3215         .m(4)
3216         .n(16)
3217         .k(k)
3218         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3219     }
3220   }
3221 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)3222   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
3223     TEST_REQUIRES_ARM_NEON_V8;
3224     for (size_t k = 1; k < 8; k++) {
3225       for (uint32_t n = 1; n <= 16; n++) {
3226         for (uint32_t m = 1; m <= 4; m++) {
3227           GemmMicrokernelTester()
3228             .mr(4)
3229             .nr(16)
3230             .kr(1)
3231             .sr(1)
3232             .m(m)
3233             .n(n)
3234             .k(k)
3235             .iterations(1)
3236             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3237         }
3238       }
3239     }
3240   }
3241 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)3242   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
3243     TEST_REQUIRES_ARM_NEON_V8;
3244     for (size_t k = 9; k < 16; k++) {
3245       GemmMicrokernelTester()
3246         .mr(4)
3247         .nr(16)
3248         .kr(1)
3249         .sr(1)
3250         .m(4)
3251         .n(16)
3252         .k(k)
3253         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3254     }
3255   }
3256 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)3257   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
3258     TEST_REQUIRES_ARM_NEON_V8;
3259     for (size_t k = 9; k < 16; k++) {
3260       for (uint32_t n = 1; n <= 16; n++) {
3261         for (uint32_t m = 1; m <= 4; m++) {
3262           GemmMicrokernelTester()
3263             .mr(4)
3264             .nr(16)
3265             .kr(1)
3266             .sr(1)
3267             .m(m)
3268             .n(n)
3269             .k(k)
3270             .iterations(1)
3271             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3272         }
3273       }
3274     }
3275   }
3276 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)3277   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
3278     TEST_REQUIRES_ARM_NEON_V8;
3279     for (size_t k = 16; k <= 80; k += 8) {
3280       GemmMicrokernelTester()
3281         .mr(4)
3282         .nr(16)
3283         .kr(1)
3284         .sr(1)
3285         .m(4)
3286         .n(16)
3287         .k(k)
3288         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3289     }
3290   }
3291 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)3292   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
3293     TEST_REQUIRES_ARM_NEON_V8;
3294     for (size_t k = 16; k <= 80; k += 8) {
3295       for (uint32_t n = 1; n <= 16; n++) {
3296         for (uint32_t m = 1; m <= 4; m++) {
3297           GemmMicrokernelTester()
3298             .mr(4)
3299             .nr(16)
3300             .kr(1)
3301             .sr(1)
3302             .m(m)
3303             .n(n)
3304             .k(k)
3305             .iterations(1)
3306             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3307         }
3308       }
3309     }
3310   }
3311 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)3312   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
3313     TEST_REQUIRES_ARM_NEON_V8;
3314     for (uint32_t n = 17; n < 32; n++) {
3315       for (size_t k = 1; k <= 40; k += 9) {
3316         GemmMicrokernelTester()
3317           .mr(4)
3318           .nr(16)
3319           .kr(1)
3320           .sr(1)
3321           .m(4)
3322           .n(n)
3323           .k(k)
3324           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3325       }
3326     }
3327   }
3328 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)3329   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
3330     TEST_REQUIRES_ARM_NEON_V8;
3331     for (uint32_t n = 17; n < 32; n++) {
3332       for (size_t k = 1; k <= 40; k += 9) {
3333         GemmMicrokernelTester()
3334           .mr(4)
3335           .nr(16)
3336           .kr(1)
3337           .sr(1)
3338           .m(4)
3339           .n(n)
3340           .k(k)
3341           .cn_stride(19)
3342           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3343       }
3344     }
3345   }
3346 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)3347   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
3348     TEST_REQUIRES_ARM_NEON_V8;
3349     for (uint32_t n = 17; n < 32; n++) {
3350       for (size_t k = 1; k <= 40; k += 9) {
3351         for (uint32_t m = 1; m <= 4; m++) {
3352           GemmMicrokernelTester()
3353             .mr(4)
3354             .nr(16)
3355             .kr(1)
3356             .sr(1)
3357             .m(m)
3358             .n(n)
3359             .k(k)
3360             .iterations(1)
3361             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3362         }
3363       }
3364     }
3365   }
3366 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)3367   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
3368     TEST_REQUIRES_ARM_NEON_V8;
3369     for (uint32_t n = 32; n <= 48; n += 16) {
3370       for (size_t k = 1; k <= 40; k += 9) {
3371         GemmMicrokernelTester()
3372           .mr(4)
3373           .nr(16)
3374           .kr(1)
3375           .sr(1)
3376           .m(4)
3377           .n(n)
3378           .k(k)
3379           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3380       }
3381     }
3382   }
3383 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)3384   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
3385     TEST_REQUIRES_ARM_NEON_V8;
3386     for (uint32_t n = 32; n <= 48; n += 16) {
3387       for (size_t k = 1; k <= 40; k += 9) {
3388         GemmMicrokernelTester()
3389           .mr(4)
3390           .nr(16)
3391           .kr(1)
3392           .sr(1)
3393           .m(4)
3394           .n(n)
3395           .k(k)
3396           .cn_stride(19)
3397           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3398       }
3399     }
3400   }
3401 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)3402   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
3403     TEST_REQUIRES_ARM_NEON_V8;
3404     for (uint32_t n = 32; n <= 48; n += 16) {
3405       for (size_t k = 1; k <= 40; k += 9) {
3406         for (uint32_t m = 1; m <= 4; m++) {
3407           GemmMicrokernelTester()
3408             .mr(4)
3409             .nr(16)
3410             .kr(1)
3411             .sr(1)
3412             .m(m)
3413             .n(n)
3414             .k(k)
3415             .iterations(1)
3416             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3417         }
3418       }
3419     }
3420   }
3421 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel)3422   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
3423     TEST_REQUIRES_ARM_NEON_V8;
3424     for (size_t k = 1; k <= 40; k += 9) {
3425       GemmMicrokernelTester()
3426         .mr(4)
3427         .nr(16)
3428         .kr(1)
3429         .sr(1)
3430         .m(4)
3431         .n(16)
3432         .k(k)
3433         .ks(3)
3434         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3435     }
3436   }
3437 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel_subtile)3438   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
3439     TEST_REQUIRES_ARM_NEON_V8;
3440     for (size_t k = 1; k <= 40; k += 9) {
3441       for (uint32_t n = 1; n <= 16; n++) {
3442         for (uint32_t m = 1; m <= 4; m++) {
3443           GemmMicrokernelTester()
3444             .mr(4)
3445             .nr(16)
3446             .kr(1)
3447             .sr(1)
3448             .m(m)
3449             .n(n)
3450             .k(k)
3451             .ks(3)
3452             .iterations(1)
3453             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3454         }
3455       }
3456     }
3457   }
3458 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)3459   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
3460     TEST_REQUIRES_ARM_NEON_V8;
3461     for (uint32_t n = 17; n < 32; n++) {
3462       for (size_t k = 1; k <= 40; k += 9) {
3463         GemmMicrokernelTester()
3464           .mr(4)
3465           .nr(16)
3466           .kr(1)
3467           .sr(1)
3468           .m(4)
3469           .n(n)
3470           .k(k)
3471           .ks(3)
3472           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3473       }
3474     }
3475   }
3476 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)3477   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
3478     TEST_REQUIRES_ARM_NEON_V8;
3479     for (uint32_t n = 32; n <= 48; n += 16) {
3480       for (size_t k = 1; k <= 40; k += 9) {
3481         GemmMicrokernelTester()
3482           .mr(4)
3483           .nr(16)
3484           .kr(1)
3485           .sr(1)
3486           .m(4)
3487           .n(n)
3488           .k(k)
3489           .ks(3)
3490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3491       }
3492     }
3493   }
3494 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)3495   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
3496     TEST_REQUIRES_ARM_NEON_V8;
3497     for (size_t k = 1; k <= 40; k += 9) {
3498       for (uint32_t n = 1; n <= 16; n++) {
3499         for (uint32_t m = 1; m <= 4; m++) {
3500           GemmMicrokernelTester()
3501             .mr(4)
3502             .nr(16)
3503             .kr(1)
3504             .sr(1)
3505             .m(m)
3506             .n(n)
3507             .k(k)
3508             .cm_stride(19)
3509             .iterations(1)
3510             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3511         }
3512       }
3513     }
3514   }
3515 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,a_offset)3516   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
3517     TEST_REQUIRES_ARM_NEON_V8;
3518     for (size_t k = 1; k <= 40; k += 9) {
3519       GemmMicrokernelTester()
3520         .mr(4)
3521         .nr(16)
3522         .kr(1)
3523         .sr(1)
3524         .m(4)
3525         .n(16)
3526         .k(k)
3527         .ks(3)
3528         .a_offset(163)
3529         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3530     }
3531   }
3532 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,zero)3533   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
3534     TEST_REQUIRES_ARM_NEON_V8;
3535     for (size_t k = 1; k <= 40; k += 9) {
3536       for (uint32_t mz = 0; mz < 4; mz++) {
3537         GemmMicrokernelTester()
3538           .mr(4)
3539           .nr(16)
3540           .kr(1)
3541           .sr(1)
3542           .m(4)
3543           .n(16)
3544           .k(k)
3545           .ks(3)
3546           .a_offset(163)
3547           .zero_index(mz)
3548           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3549       }
3550     }
3551   }
3552 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)3553   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
3554     TEST_REQUIRES_ARM_NEON_V8;
3555     GemmMicrokernelTester()
3556       .mr(4)
3557       .nr(16)
3558       .kr(1)
3559       .sr(1)
3560       .m(4)
3561       .n(16)
3562       .k(8)
3563       .qmin(128)
3564       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3565   }
3566 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)3567   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
3568     TEST_REQUIRES_ARM_NEON_V8;
3569     GemmMicrokernelTester()
3570       .mr(4)
3571       .nr(16)
3572       .kr(1)
3573       .sr(1)
3574       .m(4)
3575       .n(16)
3576       .k(8)
3577       .qmax(128)
3578       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3579   }
3580 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)3581   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
3582     TEST_REQUIRES_ARM_NEON_V8;
3583     GemmMicrokernelTester()
3584       .mr(4)
3585       .nr(16)
3586       .kr(1)
3587       .sr(1)
3588       .m(4)
3589       .n(16)
3590       .k(8)
3591       .cm_stride(19)
3592       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3593   }
3594 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_a_zero_point)3595   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_a_zero_point) {
3596     TEST_REQUIRES_ARM_NEON_V8;
3597     for (size_t k = 1; k <= 40; k += 9) {
3598       GemmMicrokernelTester()
3599         .mr(4)
3600         .nr(16)
3601         .kr(1)
3602         .sr(1)
3603         .m(4)
3604         .n(16)
3605         .k(k)
3606         .a_zero_point(0)
3607         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3608     }
3609   }
3610 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_b_zero_point)3611   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_b_zero_point) {
3612     TEST_REQUIRES_ARM_NEON_V8;
3613     for (size_t k = 1; k <= 40; k += 9) {
3614       GemmMicrokernelTester()
3615         .mr(4)
3616         .nr(16)
3617         .kr(1)
3618         .sr(1)
3619         .m(4)
3620         .n(16)
3621         .k(k)
3622         .b_zero_point(0)
3623         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3624     }
3625   }
3626 
TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,no_zero_point)3627   TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_zero_point) {
3628     TEST_REQUIRES_ARM_NEON_V8;
3629     for (size_t k = 1; k <= 40; k += 9) {
3630       GemmMicrokernelTester()
3631         .mr(4)
3632         .nr(16)
3633         .kr(1)
3634         .sr(1)
3635         .m(4)
3636         .n(16)
3637         .k(k)
3638         .a_zero_point(0)
3639         .b_zero_point(0)
3640         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3641     }
3642   }
3643 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3644 
3645 
3646 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)3647   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
3648     TEST_REQUIRES_X86_SSE41;
3649     GemmMicrokernelTester()
3650       .mr(1)
3651       .nr(4)
3652       .kr(2)
3653       .sr(1)
3654       .m(1)
3655       .n(4)
3656       .k(8)
3657       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3658   }
3659 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)3660   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
3661     TEST_REQUIRES_X86_SSE41;
3662     GemmMicrokernelTester()
3663       .mr(1)
3664       .nr(4)
3665       .kr(2)
3666       .sr(1)
3667       .m(1)
3668       .n(4)
3669       .k(8)
3670       .cn_stride(7)
3671       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3672   }
3673 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)3674   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
3675     TEST_REQUIRES_X86_SSE41;
3676     for (uint32_t n = 1; n <= 4; n++) {
3677       for (uint32_t m = 1; m <= 1; m++) {
3678         GemmMicrokernelTester()
3679           .mr(1)
3680           .nr(4)
3681           .kr(2)
3682           .sr(1)
3683           .m(m)
3684           .n(n)
3685           .k(8)
3686           .iterations(1)
3687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3688       }
3689     }
3690   }
3691 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)3692   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
3693     TEST_REQUIRES_X86_SSE41;
3694     for (uint32_t m = 1; m <= 1; m++) {
3695       GemmMicrokernelTester()
3696         .mr(1)
3697         .nr(4)
3698         .kr(2)
3699         .sr(1)
3700         .m(m)
3701         .n(4)
3702         .k(8)
3703         .iterations(1)
3704         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3705     }
3706   }
3707 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)3708   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
3709     TEST_REQUIRES_X86_SSE41;
3710     for (uint32_t n = 1; n <= 4; n++) {
3711       GemmMicrokernelTester()
3712         .mr(1)
3713         .nr(4)
3714         .kr(2)
3715         .sr(1)
3716         .m(1)
3717         .n(n)
3718         .k(8)
3719         .iterations(1)
3720         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3721     }
3722   }
3723 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)3724   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
3725     TEST_REQUIRES_X86_SSE41;
3726     for (size_t k = 1; k < 8; k++) {
3727       GemmMicrokernelTester()
3728         .mr(1)
3729         .nr(4)
3730         .kr(2)
3731         .sr(1)
3732         .m(1)
3733         .n(4)
3734         .k(k)
3735         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3736     }
3737   }
3738 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)3739   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
3740     TEST_REQUIRES_X86_SSE41;
3741     for (size_t k = 1; k < 8; k++) {
3742       for (uint32_t n = 1; n <= 4; n++) {
3743         for (uint32_t m = 1; m <= 1; m++) {
3744           GemmMicrokernelTester()
3745             .mr(1)
3746             .nr(4)
3747             .kr(2)
3748             .sr(1)
3749             .m(m)
3750             .n(n)
3751             .k(k)
3752             .iterations(1)
3753             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3754         }
3755       }
3756     }
3757   }
3758 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)3759   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
3760     TEST_REQUIRES_X86_SSE41;
3761     for (size_t k = 9; k < 16; k++) {
3762       GemmMicrokernelTester()
3763         .mr(1)
3764         .nr(4)
3765         .kr(2)
3766         .sr(1)
3767         .m(1)
3768         .n(4)
3769         .k(k)
3770         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3771     }
3772   }
3773 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)3774   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
3775     TEST_REQUIRES_X86_SSE41;
3776     for (size_t k = 9; k < 16; k++) {
3777       for (uint32_t n = 1; n <= 4; n++) {
3778         for (uint32_t m = 1; m <= 1; m++) {
3779           GemmMicrokernelTester()
3780             .mr(1)
3781             .nr(4)
3782             .kr(2)
3783             .sr(1)
3784             .m(m)
3785             .n(n)
3786             .k(k)
3787             .iterations(1)
3788             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3789         }
3790       }
3791     }
3792   }
3793 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)3794   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
3795     TEST_REQUIRES_X86_SSE41;
3796     for (size_t k = 16; k <= 80; k += 8) {
3797       GemmMicrokernelTester()
3798         .mr(1)
3799         .nr(4)
3800         .kr(2)
3801         .sr(1)
3802         .m(1)
3803         .n(4)
3804         .k(k)
3805         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3806     }
3807   }
3808 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)3809   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
3810     TEST_REQUIRES_X86_SSE41;
3811     for (size_t k = 16; k <= 80; k += 8) {
3812       for (uint32_t n = 1; n <= 4; n++) {
3813         for (uint32_t m = 1; m <= 1; m++) {
3814           GemmMicrokernelTester()
3815             .mr(1)
3816             .nr(4)
3817             .kr(2)
3818             .sr(1)
3819             .m(m)
3820             .n(n)
3821             .k(k)
3822             .iterations(1)
3823             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3824         }
3825       }
3826     }
3827   }
3828 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)3829   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
3830     TEST_REQUIRES_X86_SSE41;
3831     for (uint32_t n = 5; n < 8; n++) {
3832       for (size_t k = 1; k <= 40; k += 9) {
3833         GemmMicrokernelTester()
3834           .mr(1)
3835           .nr(4)
3836           .kr(2)
3837           .sr(1)
3838           .m(1)
3839           .n(n)
3840           .k(k)
3841           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3842       }
3843     }
3844   }
3845 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)3846   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
3847     TEST_REQUIRES_X86_SSE41;
3848     for (uint32_t n = 5; n < 8; n++) {
3849       for (size_t k = 1; k <= 40; k += 9) {
3850         GemmMicrokernelTester()
3851           .mr(1)
3852           .nr(4)
3853           .kr(2)
3854           .sr(1)
3855           .m(1)
3856           .n(n)
3857           .k(k)
3858           .cn_stride(7)
3859           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3860       }
3861     }
3862   }
3863 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)3864   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
3865     TEST_REQUIRES_X86_SSE41;
3866     for (uint32_t n = 5; n < 8; n++) {
3867       for (size_t k = 1; k <= 40; k += 9) {
3868         for (uint32_t m = 1; m <= 1; m++) {
3869           GemmMicrokernelTester()
3870             .mr(1)
3871             .nr(4)
3872             .kr(2)
3873             .sr(1)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)3884   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
3885     TEST_REQUIRES_X86_SSE41;
3886     for (uint32_t n = 8; n <= 12; n += 4) {
3887       for (size_t k = 1; k <= 40; k += 9) {
3888         GemmMicrokernelTester()
3889           .mr(1)
3890           .nr(4)
3891           .kr(2)
3892           .sr(1)
3893           .m(1)
3894           .n(n)
3895           .k(k)
3896           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3897       }
3898     }
3899   }
3900 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)3901   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
3902     TEST_REQUIRES_X86_SSE41;
3903     for (uint32_t n = 8; n <= 12; n += 4) {
3904       for (size_t k = 1; k <= 40; k += 9) {
3905         GemmMicrokernelTester()
3906           .mr(1)
3907           .nr(4)
3908           .kr(2)
3909           .sr(1)
3910           .m(1)
3911           .n(n)
3912           .k(k)
3913           .cn_stride(7)
3914           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3915       }
3916     }
3917   }
3918 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)3919   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
3920     TEST_REQUIRES_X86_SSE41;
3921     for (uint32_t n = 8; n <= 12; n += 4) {
3922       for (size_t k = 1; k <= 40; k += 9) {
3923         for (uint32_t m = 1; m <= 1; m++) {
3924           GemmMicrokernelTester()
3925             .mr(1)
3926             .nr(4)
3927             .kr(2)
3928             .sr(1)
3929             .m(m)
3930             .n(n)
3931             .k(k)
3932             .iterations(1)
3933             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3934         }
3935       }
3936     }
3937   }
3938 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel)3939   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
3940     TEST_REQUIRES_X86_SSE41;
3941     for (size_t k = 1; k <= 40; k += 9) {
3942       GemmMicrokernelTester()
3943         .mr(1)
3944         .nr(4)
3945         .kr(2)
3946         .sr(1)
3947         .m(1)
3948         .n(4)
3949         .k(k)
3950         .ks(3)
3951         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3952     }
3953   }
3954 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel_subtile)3955   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
3956     TEST_REQUIRES_X86_SSE41;
3957     for (size_t k = 1; k <= 40; k += 9) {
3958       for (uint32_t n = 1; n <= 4; n++) {
3959         for (uint32_t m = 1; m <= 1; m++) {
3960           GemmMicrokernelTester()
3961             .mr(1)
3962             .nr(4)
3963             .kr(2)
3964             .sr(1)
3965             .m(m)
3966             .n(n)
3967             .k(k)
3968             .ks(3)
3969             .iterations(1)
3970             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3971         }
3972       }
3973     }
3974   }
3975 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_small_kernel)3976   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
3977     TEST_REQUIRES_X86_SSE41;
3978     for (uint32_t n = 5; n < 8; n++) {
3979       for (size_t k = 1; k <= 40; k += 9) {
3980         GemmMicrokernelTester()
3981           .mr(1)
3982           .nr(4)
3983           .kr(2)
3984           .sr(1)
3985           .m(1)
3986           .n(n)
3987           .k(k)
3988           .ks(3)
3989           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3990       }
3991     }
3992   }
3993 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_small_kernel)3994   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
3995     TEST_REQUIRES_X86_SSE41;
3996     for (uint32_t n = 8; n <= 12; n += 4) {
3997       for (size_t k = 1; k <= 40; k += 9) {
3998         GemmMicrokernelTester()
3999           .mr(1)
4000           .nr(4)
4001           .kr(2)
4002           .sr(1)
4003           .m(1)
4004           .n(n)
4005           .k(k)
4006           .ks(3)
4007           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4008       }
4009     }
4010   }
4011 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)4012   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
4013     TEST_REQUIRES_X86_SSE41;
4014     for (size_t k = 1; k <= 40; k += 9) {
4015       for (uint32_t n = 1; n <= 4; n++) {
4016         for (uint32_t m = 1; m <= 1; m++) {
4017           GemmMicrokernelTester()
4018             .mr(1)
4019             .nr(4)
4020             .kr(2)
4021             .sr(1)
4022             .m(m)
4023             .n(n)
4024             .k(k)
4025             .cm_stride(7)
4026             .iterations(1)
4027             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4028         }
4029       }
4030     }
4031   }
4032 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,a_offset)4033   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
4034     TEST_REQUIRES_X86_SSE41;
4035     for (size_t k = 1; k <= 40; k += 9) {
4036       GemmMicrokernelTester()
4037         .mr(1)
4038         .nr(4)
4039         .kr(2)
4040         .sr(1)
4041         .m(1)
4042         .n(4)
4043         .k(k)
4044         .ks(3)
4045         .a_offset(43)
4046         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4047     }
4048   }
4049 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,zero)4050   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
4051     TEST_REQUIRES_X86_SSE41;
4052     for (size_t k = 1; k <= 40; k += 9) {
4053       for (uint32_t mz = 0; mz < 1; mz++) {
4054         GemmMicrokernelTester()
4055           .mr(1)
4056           .nr(4)
4057           .kr(2)
4058           .sr(1)
4059           .m(1)
4060           .n(4)
4061           .k(k)
4062           .ks(3)
4063           .a_offset(43)
4064           .zero_index(mz)
4065           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4066       }
4067     }
4068   }
4069 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)4070   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
4071     TEST_REQUIRES_X86_SSE41;
4072     GemmMicrokernelTester()
4073       .mr(1)
4074       .nr(4)
4075       .kr(2)
4076       .sr(1)
4077       .m(1)
4078       .n(4)
4079       .k(8)
4080       .qmin(128)
4081       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4082   }
4083 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)4084   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
4085     TEST_REQUIRES_X86_SSE41;
4086     GemmMicrokernelTester()
4087       .mr(1)
4088       .nr(4)
4089       .kr(2)
4090       .sr(1)
4091       .m(1)
4092       .n(4)
4093       .k(8)
4094       .qmax(128)
4095       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4096   }
4097 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)4098   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
4099     TEST_REQUIRES_X86_SSE41;
4100     GemmMicrokernelTester()
4101       .mr(1)
4102       .nr(4)
4103       .kr(2)
4104       .sr(1)
4105       .m(1)
4106       .n(4)
4107       .k(8)
4108       .cm_stride(7)
4109       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4110   }
4111 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_a_zero_point)4112   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_a_zero_point) {
4113     TEST_REQUIRES_X86_SSE41;
4114     for (size_t k = 1; k <= 40; k += 9) {
4115       GemmMicrokernelTester()
4116         .mr(1)
4117         .nr(4)
4118         .kr(2)
4119         .sr(1)
4120         .m(1)
4121         .n(4)
4122         .k(k)
4123         .a_zero_point(0)
4124         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4125     }
4126   }
4127 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_b_zero_point)4128   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_b_zero_point) {
4129     TEST_REQUIRES_X86_SSE41;
4130     for (size_t k = 1; k <= 40; k += 9) {
4131       GemmMicrokernelTester()
4132         .mr(1)
4133         .nr(4)
4134         .kr(2)
4135         .sr(1)
4136         .m(1)
4137         .n(4)
4138         .k(k)
4139         .b_zero_point(0)
4140         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4141     }
4142   }
4143 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,no_zero_point)4144   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_zero_point) {
4145     TEST_REQUIRES_X86_SSE41;
4146     for (size_t k = 1; k <= 40; k += 9) {
4147       GemmMicrokernelTester()
4148         .mr(1)
4149         .nr(4)
4150         .kr(2)
4151         .sr(1)
4152         .m(1)
4153         .n(4)
4154         .k(k)
4155         .a_zero_point(0)
4156         .b_zero_point(0)
4157         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4158     }
4159   }
4160 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4161 
4162 
4163 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8)4164   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
4165     TEST_REQUIRES_X86_SSE41;
4166     GemmMicrokernelTester()
4167       .mr(2)
4168       .nr(4)
4169       .kr(2)
4170       .sr(1)
4171       .m(2)
4172       .n(4)
4173       .k(8)
4174       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4175   }
4176 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cn)4177   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
4178     TEST_REQUIRES_X86_SSE41;
4179     GemmMicrokernelTester()
4180       .mr(2)
4181       .nr(4)
4182       .kr(2)
4183       .sr(1)
4184       .m(2)
4185       .n(4)
4186       .k(8)
4187       .cn_stride(7)
4188       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4189   }
4190 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile)4191   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
4192     TEST_REQUIRES_X86_SSE41;
4193     for (uint32_t n = 1; n <= 4; n++) {
4194       for (uint32_t m = 1; m <= 2; m++) {
4195         GemmMicrokernelTester()
4196           .mr(2)
4197           .nr(4)
4198           .kr(2)
4199           .sr(1)
4200           .m(m)
4201           .n(n)
4202           .k(8)
4203           .iterations(1)
4204           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4205       }
4206     }
4207   }
4208 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_m)4209   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
4210     TEST_REQUIRES_X86_SSE41;
4211     for (uint32_t m = 1; m <= 2; m++) {
4212       GemmMicrokernelTester()
4213         .mr(2)
4214         .nr(4)
4215         .kr(2)
4216         .sr(1)
4217         .m(m)
4218         .n(4)
4219         .k(8)
4220         .iterations(1)
4221         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4222     }
4223   }
4224 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_n)4225   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
4226     TEST_REQUIRES_X86_SSE41;
4227     for (uint32_t n = 1; n <= 4; n++) {
4228       GemmMicrokernelTester()
4229         .mr(2)
4230         .nr(4)
4231         .kr(2)
4232         .sr(1)
4233         .m(2)
4234         .n(n)
4235         .k(8)
4236         .iterations(1)
4237         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4238     }
4239   }
4240 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8)4241   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
4242     TEST_REQUIRES_X86_SSE41;
4243     for (size_t k = 1; k < 8; k++) {
4244       GemmMicrokernelTester()
4245         .mr(2)
4246         .nr(4)
4247         .kr(2)
4248         .sr(1)
4249         .m(2)
4250         .n(4)
4251         .k(k)
4252         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4253     }
4254   }
4255 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_subtile)4256   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
4257     TEST_REQUIRES_X86_SSE41;
4258     for (size_t k = 1; k < 8; k++) {
4259       for (uint32_t n = 1; n <= 4; n++) {
4260         for (uint32_t m = 1; m <= 2; m++) {
4261           GemmMicrokernelTester()
4262             .mr(2)
4263             .nr(4)
4264             .kr(2)
4265             .sr(1)
4266             .m(m)
4267             .n(n)
4268             .k(k)
4269             .iterations(1)
4270             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4271         }
4272       }
4273     }
4274   }
4275 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8)4276   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
4277     TEST_REQUIRES_X86_SSE41;
4278     for (size_t k = 9; k < 16; k++) {
4279       GemmMicrokernelTester()
4280         .mr(2)
4281         .nr(4)
4282         .kr(2)
4283         .sr(1)
4284         .m(2)
4285         .n(4)
4286         .k(k)
4287         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4288     }
4289   }
4290 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_subtile)4291   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
4292     TEST_REQUIRES_X86_SSE41;
4293     for (size_t k = 9; k < 16; k++) {
4294       for (uint32_t n = 1; n <= 4; n++) {
4295         for (uint32_t m = 1; m <= 2; m++) {
4296           GemmMicrokernelTester()
4297             .mr(2)
4298             .nr(4)
4299             .kr(2)
4300             .sr(1)
4301             .m(m)
4302             .n(n)
4303             .k(k)
4304             .iterations(1)
4305             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4306         }
4307       }
4308     }
4309   }
4310 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8)4311   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
4312     TEST_REQUIRES_X86_SSE41;
4313     for (size_t k = 16; k <= 80; k += 8) {
4314       GemmMicrokernelTester()
4315         .mr(2)
4316         .nr(4)
4317         .kr(2)
4318         .sr(1)
4319         .m(2)
4320         .n(4)
4321         .k(k)
4322         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4323     }
4324   }
4325 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_subtile)4326   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
4327     TEST_REQUIRES_X86_SSE41;
4328     for (size_t k = 16; k <= 80; k += 8) {
4329       for (uint32_t n = 1; n <= 4; n++) {
4330         for (uint32_t m = 1; m <= 2; m++) {
4331           GemmMicrokernelTester()
4332             .mr(2)
4333             .nr(4)
4334             .kr(2)
4335             .sr(1)
4336             .m(m)
4337             .n(n)
4338             .k(k)
4339             .iterations(1)
4340             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4341         }
4342       }
4343     }
4344   }
4345 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4)4346   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
4347     TEST_REQUIRES_X86_SSE41;
4348     for (uint32_t n = 5; n < 8; n++) {
4349       for (size_t k = 1; k <= 40; k += 9) {
4350         GemmMicrokernelTester()
4351           .mr(2)
4352           .nr(4)
4353           .kr(2)
4354           .sr(1)
4355           .m(2)
4356           .n(n)
4357           .k(k)
4358           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4359       }
4360     }
4361   }
4362 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_cn)4363   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
4364     TEST_REQUIRES_X86_SSE41;
4365     for (uint32_t n = 5; n < 8; n++) {
4366       for (size_t k = 1; k <= 40; k += 9) {
4367         GemmMicrokernelTester()
4368           .mr(2)
4369           .nr(4)
4370           .kr(2)
4371           .sr(1)
4372           .m(2)
4373           .n(n)
4374           .k(k)
4375           .cn_stride(7)
4376           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4377       }
4378     }
4379   }
4380 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_subtile)4381   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
4382     TEST_REQUIRES_X86_SSE41;
4383     for (uint32_t n = 5; n < 8; n++) {
4384       for (size_t k = 1; k <= 40; k += 9) {
4385         for (uint32_t m = 1; m <= 2; m++) {
4386           GemmMicrokernelTester()
4387             .mr(2)
4388             .nr(4)
4389             .kr(2)
4390             .sr(1)
4391             .m(m)
4392             .n(n)
4393             .k(k)
4394             .iterations(1)
4395             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4396         }
4397       }
4398     }
4399   }
4400 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4)4401   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
4402     TEST_REQUIRES_X86_SSE41;
4403     for (uint32_t n = 8; n <= 12; n += 4) {
4404       for (size_t k = 1; k <= 40; k += 9) {
4405         GemmMicrokernelTester()
4406           .mr(2)
4407           .nr(4)
4408           .kr(2)
4409           .sr(1)
4410           .m(2)
4411           .n(n)
4412           .k(k)
4413           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4414       }
4415     }
4416   }
4417 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_cn)4418   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
4419     TEST_REQUIRES_X86_SSE41;
4420     for (uint32_t n = 8; n <= 12; n += 4) {
4421       for (size_t k = 1; k <= 40; k += 9) {
4422         GemmMicrokernelTester()
4423           .mr(2)
4424           .nr(4)
4425           .kr(2)
4426           .sr(1)
4427           .m(2)
4428           .n(n)
4429           .k(k)
4430           .cn_stride(7)
4431           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4432       }
4433     }
4434   }
4435 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_subtile)4436   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
4437     TEST_REQUIRES_X86_SSE41;
4438     for (uint32_t n = 8; n <= 12; n += 4) {
4439       for (size_t k = 1; k <= 40; k += 9) {
4440         for (uint32_t m = 1; m <= 2; m++) {
4441           GemmMicrokernelTester()
4442             .mr(2)
4443             .nr(4)
4444             .kr(2)
4445             .sr(1)
4446             .m(m)
4447             .n(n)
4448             .k(k)
4449             .iterations(1)
4450             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4451         }
4452       }
4453     }
4454   }
4455 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel)4456   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
4457     TEST_REQUIRES_X86_SSE41;
4458     for (size_t k = 1; k <= 40; k += 9) {
4459       GemmMicrokernelTester()
4460         .mr(2)
4461         .nr(4)
4462         .kr(2)
4463         .sr(1)
4464         .m(2)
4465         .n(4)
4466         .k(k)
4467         .ks(3)
4468         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4469     }
4470   }
4471 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel_subtile)4472   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
4473     TEST_REQUIRES_X86_SSE41;
4474     for (size_t k = 1; k <= 40; k += 9) {
4475       for (uint32_t n = 1; n <= 4; n++) {
4476         for (uint32_t m = 1; m <= 2; m++) {
4477           GemmMicrokernelTester()
4478             .mr(2)
4479             .nr(4)
4480             .kr(2)
4481             .sr(1)
4482             .m(m)
4483             .n(n)
4484             .k(k)
4485             .ks(3)
4486             .iterations(1)
4487             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4488         }
4489       }
4490     }
4491   }
4492 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_small_kernel)4493   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
4494     TEST_REQUIRES_X86_SSE41;
4495     for (uint32_t n = 5; n < 8; n++) {
4496       for (size_t k = 1; k <= 40; k += 9) {
4497         GemmMicrokernelTester()
4498           .mr(2)
4499           .nr(4)
4500           .kr(2)
4501           .sr(1)
4502           .m(2)
4503           .n(n)
4504           .k(k)
4505           .ks(3)
4506           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4507       }
4508     }
4509   }
4510 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_small_kernel)4511   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
4512     TEST_REQUIRES_X86_SSE41;
4513     for (uint32_t n = 8; n <= 12; n += 4) {
4514       for (size_t k = 1; k <= 40; k += 9) {
4515         GemmMicrokernelTester()
4516           .mr(2)
4517           .nr(4)
4518           .kr(2)
4519           .sr(1)
4520           .m(2)
4521           .n(n)
4522           .k(k)
4523           .ks(3)
4524           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4525       }
4526     }
4527   }
4528 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm_subtile)4529   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
4530     TEST_REQUIRES_X86_SSE41;
4531     for (size_t k = 1; k <= 40; k += 9) {
4532       for (uint32_t n = 1; n <= 4; n++) {
4533         for (uint32_t m = 1; m <= 2; m++) {
4534           GemmMicrokernelTester()
4535             .mr(2)
4536             .nr(4)
4537             .kr(2)
4538             .sr(1)
4539             .m(m)
4540             .n(n)
4541             .k(k)
4542             .cm_stride(7)
4543             .iterations(1)
4544             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4545         }
4546       }
4547     }
4548   }
4549 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,a_offset)4550   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
4551     TEST_REQUIRES_X86_SSE41;
4552     for (size_t k = 1; k <= 40; k += 9) {
4553       GemmMicrokernelTester()
4554         .mr(2)
4555         .nr(4)
4556         .kr(2)
4557         .sr(1)
4558         .m(2)
4559         .n(4)
4560         .k(k)
4561         .ks(3)
4562         .a_offset(83)
4563         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4564     }
4565   }
4566 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,zero)4567   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
4568     TEST_REQUIRES_X86_SSE41;
4569     for (size_t k = 1; k <= 40; k += 9) {
4570       for (uint32_t mz = 0; mz < 2; mz++) {
4571         GemmMicrokernelTester()
4572           .mr(2)
4573           .nr(4)
4574           .kr(2)
4575           .sr(1)
4576           .m(2)
4577           .n(4)
4578           .k(k)
4579           .ks(3)
4580           .a_offset(83)
4581           .zero_index(mz)
4582           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4583       }
4584     }
4585   }
4586 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmin)4587   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
4588     TEST_REQUIRES_X86_SSE41;
4589     GemmMicrokernelTester()
4590       .mr(2)
4591       .nr(4)
4592       .kr(2)
4593       .sr(1)
4594       .m(2)
4595       .n(4)
4596       .k(8)
4597       .qmin(128)
4598       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4599   }
4600 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmax)4601   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
4602     TEST_REQUIRES_X86_SSE41;
4603     GemmMicrokernelTester()
4604       .mr(2)
4605       .nr(4)
4606       .kr(2)
4607       .sr(1)
4608       .m(2)
4609       .n(4)
4610       .k(8)
4611       .qmax(128)
4612       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4613   }
4614 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm)4615   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
4616     TEST_REQUIRES_X86_SSE41;
4617     GemmMicrokernelTester()
4618       .mr(2)
4619       .nr(4)
4620       .kr(2)
4621       .sr(1)
4622       .m(2)
4623       .n(4)
4624       .k(8)
4625       .cm_stride(7)
4626       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4627   }
4628 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_a_zero_point)4629   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_a_zero_point) {
4630     TEST_REQUIRES_X86_SSE41;
4631     for (size_t k = 1; k <= 40; k += 9) {
4632       GemmMicrokernelTester()
4633         .mr(2)
4634         .nr(4)
4635         .kr(2)
4636         .sr(1)
4637         .m(2)
4638         .n(4)
4639         .k(k)
4640         .a_zero_point(0)
4641         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4642     }
4643   }
4644 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_b_zero_point)4645   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_b_zero_point) {
4646     TEST_REQUIRES_X86_SSE41;
4647     for (size_t k = 1; k <= 40; k += 9) {
4648       GemmMicrokernelTester()
4649         .mr(2)
4650         .nr(4)
4651         .kr(2)
4652         .sr(1)
4653         .m(2)
4654         .n(4)
4655         .k(k)
4656         .b_zero_point(0)
4657         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4658     }
4659   }
4660 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,no_zero_point)4661   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_zero_point) {
4662     TEST_REQUIRES_X86_SSE41;
4663     for (size_t k = 1; k <= 40; k += 9) {
4664       GemmMicrokernelTester()
4665         .mr(2)
4666         .nr(4)
4667         .kr(2)
4668         .sr(1)
4669         .m(2)
4670         .n(4)
4671         .k(k)
4672         .a_zero_point(0)
4673         .b_zero_point(0)
4674         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4675     }
4676   }
4677 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4678 
4679 
4680 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)4681   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
4682     TEST_REQUIRES_X86_SSE2;
4683     GemmMicrokernelTester()
4684       .mr(3)
4685       .nr(4)
4686       .kr(2)
4687       .sr(1)
4688       .m(3)
4689       .n(4)
4690       .k(8)
4691       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4692   }
4693 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)4694   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
4695     TEST_REQUIRES_X86_SSE2;
4696     GemmMicrokernelTester()
4697       .mr(3)
4698       .nr(4)
4699       .kr(2)
4700       .sr(1)
4701       .m(3)
4702       .n(4)
4703       .k(8)
4704       .cn_stride(7)
4705       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4706   }
4707 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)4708   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
4709     TEST_REQUIRES_X86_SSE2;
4710     for (uint32_t n = 1; n <= 4; n++) {
4711       for (uint32_t m = 1; m <= 3; m++) {
4712         GemmMicrokernelTester()
4713           .mr(3)
4714           .nr(4)
4715           .kr(2)
4716           .sr(1)
4717           .m(m)
4718           .n(n)
4719           .k(8)
4720           .iterations(1)
4721           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4722       }
4723     }
4724   }
4725 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)4726   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4727     TEST_REQUIRES_X86_SSE2;
4728     for (uint32_t m = 1; m <= 3; m++) {
4729       GemmMicrokernelTester()
4730         .mr(3)
4731         .nr(4)
4732         .kr(2)
4733         .sr(1)
4734         .m(m)
4735         .n(4)
4736         .k(8)
4737         .iterations(1)
4738         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4739     }
4740   }
4741 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)4742   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4743     TEST_REQUIRES_X86_SSE2;
4744     for (uint32_t n = 1; n <= 4; n++) {
4745       GemmMicrokernelTester()
4746         .mr(3)
4747         .nr(4)
4748         .kr(2)
4749         .sr(1)
4750         .m(3)
4751         .n(n)
4752         .k(8)
4753         .iterations(1)
4754         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4755     }
4756   }
4757 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)4758   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
4759     TEST_REQUIRES_X86_SSE2;
4760     for (size_t k = 1; k < 8; k++) {
4761       GemmMicrokernelTester()
4762         .mr(3)
4763         .nr(4)
4764         .kr(2)
4765         .sr(1)
4766         .m(3)
4767         .n(4)
4768         .k(k)
4769         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4770     }
4771   }
4772 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)4773   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
4774     TEST_REQUIRES_X86_SSE2;
4775     for (size_t k = 1; k < 8; k++) {
4776       for (uint32_t n = 1; n <= 4; n++) {
4777         for (uint32_t m = 1; m <= 3; m++) {
4778           GemmMicrokernelTester()
4779             .mr(3)
4780             .nr(4)
4781             .kr(2)
4782             .sr(1)
4783             .m(m)
4784             .n(n)
4785             .k(k)
4786             .iterations(1)
4787             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4788         }
4789       }
4790     }
4791   }
4792 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)4793   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
4794     TEST_REQUIRES_X86_SSE2;
4795     for (size_t k = 9; k < 16; k++) {
4796       GemmMicrokernelTester()
4797         .mr(3)
4798         .nr(4)
4799         .kr(2)
4800         .sr(1)
4801         .m(3)
4802         .n(4)
4803         .k(k)
4804         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4805     }
4806   }
4807 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)4808   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
4809     TEST_REQUIRES_X86_SSE2;
4810     for (size_t k = 9; k < 16; k++) {
4811       for (uint32_t n = 1; n <= 4; n++) {
4812         for (uint32_t m = 1; m <= 3; m++) {
4813           GemmMicrokernelTester()
4814             .mr(3)
4815             .nr(4)
4816             .kr(2)
4817             .sr(1)
4818             .m(m)
4819             .n(n)
4820             .k(k)
4821             .iterations(1)
4822             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4823         }
4824       }
4825     }
4826   }
4827 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)4828   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
4829     TEST_REQUIRES_X86_SSE2;
4830     for (size_t k = 16; k <= 80; k += 8) {
4831       GemmMicrokernelTester()
4832         .mr(3)
4833         .nr(4)
4834         .kr(2)
4835         .sr(1)
4836         .m(3)
4837         .n(4)
4838         .k(k)
4839         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4840     }
4841   }
4842 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)4843   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
4844     TEST_REQUIRES_X86_SSE2;
4845     for (size_t k = 16; k <= 80; k += 8) {
4846       for (uint32_t n = 1; n <= 4; n++) {
4847         for (uint32_t m = 1; m <= 3; m++) {
4848           GemmMicrokernelTester()
4849             .mr(3)
4850             .nr(4)
4851             .kr(2)
4852             .sr(1)
4853             .m(m)
4854             .n(n)
4855             .k(k)
4856             .iterations(1)
4857             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4858         }
4859       }
4860     }
4861   }
4862 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)4863   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
4864     TEST_REQUIRES_X86_SSE2;
4865     for (uint32_t n = 5; n < 8; n++) {
4866       for (size_t k = 1; k <= 40; k += 9) {
4867         GemmMicrokernelTester()
4868           .mr(3)
4869           .nr(4)
4870           .kr(2)
4871           .sr(1)
4872           .m(3)
4873           .n(n)
4874           .k(k)
4875           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4876       }
4877     }
4878   }
4879 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)4880   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4881     TEST_REQUIRES_X86_SSE2;
4882     for (uint32_t n = 5; n < 8; n++) {
4883       for (size_t k = 1; k <= 40; k += 9) {
4884         GemmMicrokernelTester()
4885           .mr(3)
4886           .nr(4)
4887           .kr(2)
4888           .sr(1)
4889           .m(3)
4890           .n(n)
4891           .k(k)
4892           .cn_stride(7)
4893           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4894       }
4895     }
4896   }
4897 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)4898   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
4899     TEST_REQUIRES_X86_SSE2;
4900     for (uint32_t n = 5; n < 8; n++) {
4901       for (size_t k = 1; k <= 40; k += 9) {
4902         for (uint32_t m = 1; m <= 3; m++) {
4903           GemmMicrokernelTester()
4904             .mr(3)
4905             .nr(4)
4906             .kr(2)
4907             .sr(1)
4908             .m(m)
4909             .n(n)
4910             .k(k)
4911             .iterations(1)
4912             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4913         }
4914       }
4915     }
4916   }
4917 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)4918   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
4919     TEST_REQUIRES_X86_SSE2;
4920     for (uint32_t n = 8; n <= 12; n += 4) {
4921       for (size_t k = 1; k <= 40; k += 9) {
4922         GemmMicrokernelTester()
4923           .mr(3)
4924           .nr(4)
4925           .kr(2)
4926           .sr(1)
4927           .m(3)
4928           .n(n)
4929           .k(k)
4930           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4931       }
4932     }
4933   }
4934 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)4935   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
4936     TEST_REQUIRES_X86_SSE2;
4937     for (uint32_t n = 8; n <= 12; n += 4) {
4938       for (size_t k = 1; k <= 40; k += 9) {
4939         GemmMicrokernelTester()
4940           .mr(3)
4941           .nr(4)
4942           .kr(2)
4943           .sr(1)
4944           .m(3)
4945           .n(n)
4946           .k(k)
4947           .cn_stride(7)
4948           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4949       }
4950     }
4951   }
4952 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)4953   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
4954     TEST_REQUIRES_X86_SSE2;
4955     for (uint32_t n = 8; n <= 12; n += 4) {
4956       for (size_t k = 1; k <= 40; k += 9) {
4957         for (uint32_t m = 1; m <= 3; m++) {
4958           GemmMicrokernelTester()
4959             .mr(3)
4960             .nr(4)
4961             .kr(2)
4962             .sr(1)
4963             .m(m)
4964             .n(n)
4965             .k(k)
4966             .iterations(1)
4967             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4968         }
4969       }
4970     }
4971   }
4972 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)4973   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
4974     TEST_REQUIRES_X86_SSE2;
4975     for (size_t k = 1; k <= 40; k += 9) {
4976       GemmMicrokernelTester()
4977         .mr(3)
4978         .nr(4)
4979         .kr(2)
4980         .sr(1)
4981         .m(3)
4982         .n(4)
4983         .k(k)
4984         .ks(3)
4985         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4986     }
4987   }
4988 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)4989   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
4990     TEST_REQUIRES_X86_SSE2;
4991     for (size_t k = 1; k <= 40; k += 9) {
4992       for (uint32_t n = 1; n <= 4; n++) {
4993         for (uint32_t m = 1; m <= 3; m++) {
4994           GemmMicrokernelTester()
4995             .mr(3)
4996             .nr(4)
4997             .kr(2)
4998             .sr(1)
4999             .m(m)
5000             .n(n)
5001             .k(k)
5002             .ks(3)
5003             .iterations(1)
5004             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5005         }
5006       }
5007     }
5008   }
5009 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)5010   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
5011     TEST_REQUIRES_X86_SSE2;
5012     for (uint32_t n = 5; n < 8; n++) {
5013       for (size_t k = 1; k <= 40; k += 9) {
5014         GemmMicrokernelTester()
5015           .mr(3)
5016           .nr(4)
5017           .kr(2)
5018           .sr(1)
5019           .m(3)
5020           .n(n)
5021           .k(k)
5022           .ks(3)
5023           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5024       }
5025     }
5026   }
5027 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)5028   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
5029     TEST_REQUIRES_X86_SSE2;
5030     for (uint32_t n = 8; n <= 12; n += 4) {
5031       for (size_t k = 1; k <= 40; k += 9) {
5032         GemmMicrokernelTester()
5033           .mr(3)
5034           .nr(4)
5035           .kr(2)
5036           .sr(1)
5037           .m(3)
5038           .n(n)
5039           .k(k)
5040           .ks(3)
5041           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5042       }
5043     }
5044   }
5045 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)5046   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
5047     TEST_REQUIRES_X86_SSE2;
5048     for (size_t k = 1; k <= 40; k += 9) {
5049       for (uint32_t n = 1; n <= 4; n++) {
5050         for (uint32_t m = 1; m <= 3; m++) {
5051           GemmMicrokernelTester()
5052             .mr(3)
5053             .nr(4)
5054             .kr(2)
5055             .sr(1)
5056             .m(m)
5057             .n(n)
5058             .k(k)
5059             .cm_stride(7)
5060             .iterations(1)
5061             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5062         }
5063       }
5064     }
5065   }
5066 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)5067   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
5068     TEST_REQUIRES_X86_SSE2;
5069     for (size_t k = 1; k <= 40; k += 9) {
5070       GemmMicrokernelTester()
5071         .mr(3)
5072         .nr(4)
5073         .kr(2)
5074         .sr(1)
5075         .m(3)
5076         .n(4)
5077         .k(k)
5078         .ks(3)
5079         .a_offset(127)
5080         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5081     }
5082   }
5083 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)5084   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
5085     TEST_REQUIRES_X86_SSE2;
5086     for (size_t k = 1; k <= 40; k += 9) {
5087       for (uint32_t mz = 0; mz < 3; mz++) {
5088         GemmMicrokernelTester()
5089           .mr(3)
5090           .nr(4)
5091           .kr(2)
5092           .sr(1)
5093           .m(3)
5094           .n(4)
5095           .k(k)
5096           .ks(3)
5097           .a_offset(127)
5098           .zero_index(mz)
5099           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5100       }
5101     }
5102   }
5103 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)5104   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
5105     TEST_REQUIRES_X86_SSE2;
5106     GemmMicrokernelTester()
5107       .mr(3)
5108       .nr(4)
5109       .kr(2)
5110       .sr(1)
5111       .m(3)
5112       .n(4)
5113       .k(8)
5114       .qmin(128)
5115       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5116   }
5117 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)5118   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
5119     TEST_REQUIRES_X86_SSE2;
5120     GemmMicrokernelTester()
5121       .mr(3)
5122       .nr(4)
5123       .kr(2)
5124       .sr(1)
5125       .m(3)
5126       .n(4)
5127       .k(8)
5128       .qmax(128)
5129       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5130   }
5131 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)5132   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
5133     TEST_REQUIRES_X86_SSE2;
5134     GemmMicrokernelTester()
5135       .mr(3)
5136       .nr(4)
5137       .kr(2)
5138       .sr(1)
5139       .m(3)
5140       .n(4)
5141       .k(8)
5142       .cm_stride(7)
5143       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5144   }
5145 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_a_zero_point)5146   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
5147     TEST_REQUIRES_X86_SSE2;
5148     for (size_t k = 1; k <= 40; k += 9) {
5149       GemmMicrokernelTester()
5150         .mr(3)
5151         .nr(4)
5152         .kr(2)
5153         .sr(1)
5154         .m(3)
5155         .n(4)
5156         .k(k)
5157         .a_zero_point(0)
5158         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5159     }
5160   }
5161 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_b_zero_point)5162   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
5163     TEST_REQUIRES_X86_SSE2;
5164     for (size_t k = 1; k <= 40; k += 9) {
5165       GemmMicrokernelTester()
5166         .mr(3)
5167         .nr(4)
5168         .kr(2)
5169         .sr(1)
5170         .m(3)
5171         .n(4)
5172         .k(k)
5173         .b_zero_point(0)
5174         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5175     }
5176   }
5177 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,no_zero_point)5178   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
5179     TEST_REQUIRES_X86_SSE2;
5180     for (size_t k = 1; k <= 40; k += 9) {
5181       GemmMicrokernelTester()
5182         .mr(3)
5183         .nr(4)
5184         .kr(2)
5185         .sr(1)
5186         .m(3)
5187         .n(4)
5188         .k(k)
5189         .a_zero_point(0)
5190         .b_zero_point(0)
5191         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5192     }
5193   }
5194 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5195 
5196 
5197 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)5198   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
5199     TEST_REQUIRES_X86_SSE41;
5200     GemmMicrokernelTester()
5201       .mr(4)
5202       .nr(4)
5203       .kr(2)
5204       .sr(1)
5205       .m(4)
5206       .n(4)
5207       .k(8)
5208       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5209   }
5210 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)5211   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
5212     TEST_REQUIRES_X86_SSE41;
5213     GemmMicrokernelTester()
5214       .mr(4)
5215       .nr(4)
5216       .kr(2)
5217       .sr(1)
5218       .m(4)
5219       .n(4)
5220       .k(8)
5221       .cn_stride(7)
5222       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5223   }
5224 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)5225   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
5226     TEST_REQUIRES_X86_SSE41;
5227     for (uint32_t n = 1; n <= 4; n++) {
5228       for (uint32_t m = 1; m <= 4; m++) {
5229         GemmMicrokernelTester()
5230           .mr(4)
5231           .nr(4)
5232           .kr(2)
5233           .sr(1)
5234           .m(m)
5235           .n(n)
5236           .k(8)
5237           .iterations(1)
5238           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5239       }
5240     }
5241   }
5242 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)5243   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5244     TEST_REQUIRES_X86_SSE41;
5245     for (uint32_t m = 1; m <= 4; m++) {
5246       GemmMicrokernelTester()
5247         .mr(4)
5248         .nr(4)
5249         .kr(2)
5250         .sr(1)
5251         .m(m)
5252         .n(4)
5253         .k(8)
5254         .iterations(1)
5255         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5256     }
5257   }
5258 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)5259   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5260     TEST_REQUIRES_X86_SSE41;
5261     for (uint32_t n = 1; n <= 4; n++) {
5262       GemmMicrokernelTester()
5263         .mr(4)
5264         .nr(4)
5265         .kr(2)
5266         .sr(1)
5267         .m(4)
5268         .n(n)
5269         .k(8)
5270         .iterations(1)
5271         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5272     }
5273   }
5274 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)5275   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
5276     TEST_REQUIRES_X86_SSE41;
5277     for (size_t k = 1; k < 8; k++) {
5278       GemmMicrokernelTester()
5279         .mr(4)
5280         .nr(4)
5281         .kr(2)
5282         .sr(1)
5283         .m(4)
5284         .n(4)
5285         .k(k)
5286         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5287     }
5288   }
5289 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)5290   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
5291     TEST_REQUIRES_X86_SSE41;
5292     for (size_t k = 1; k < 8; k++) {
5293       for (uint32_t n = 1; n <= 4; n++) {
5294         for (uint32_t m = 1; m <= 4; m++) {
5295           GemmMicrokernelTester()
5296             .mr(4)
5297             .nr(4)
5298             .kr(2)
5299             .sr(1)
5300             .m(m)
5301             .n(n)
5302             .k(k)
5303             .iterations(1)
5304             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5305         }
5306       }
5307     }
5308   }
5309 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)5310   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
5311     TEST_REQUIRES_X86_SSE41;
5312     for (size_t k = 9; k < 16; k++) {
5313       GemmMicrokernelTester()
5314         .mr(4)
5315         .nr(4)
5316         .kr(2)
5317         .sr(1)
5318         .m(4)
5319         .n(4)
5320         .k(k)
5321         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5322     }
5323   }
5324 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)5325   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
5326     TEST_REQUIRES_X86_SSE41;
5327     for (size_t k = 9; k < 16; k++) {
5328       for (uint32_t n = 1; n <= 4; n++) {
5329         for (uint32_t m = 1; m <= 4; m++) {
5330           GemmMicrokernelTester()
5331             .mr(4)
5332             .nr(4)
5333             .kr(2)
5334             .sr(1)
5335             .m(m)
5336             .n(n)
5337             .k(k)
5338             .iterations(1)
5339             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5340         }
5341       }
5342     }
5343   }
5344 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)5345   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
5346     TEST_REQUIRES_X86_SSE41;
5347     for (size_t k = 16; k <= 80; k += 8) {
5348       GemmMicrokernelTester()
5349         .mr(4)
5350         .nr(4)
5351         .kr(2)
5352         .sr(1)
5353         .m(4)
5354         .n(4)
5355         .k(k)
5356         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5357     }
5358   }
5359 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)5360   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
5361     TEST_REQUIRES_X86_SSE41;
5362     for (size_t k = 16; k <= 80; k += 8) {
5363       for (uint32_t n = 1; n <= 4; n++) {
5364         for (uint32_t m = 1; m <= 4; m++) {
5365           GemmMicrokernelTester()
5366             .mr(4)
5367             .nr(4)
5368             .kr(2)
5369             .sr(1)
5370             .m(m)
5371             .n(n)
5372             .k(k)
5373             .iterations(1)
5374             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5375         }
5376       }
5377     }
5378   }
5379 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)5380   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
5381     TEST_REQUIRES_X86_SSE41;
5382     for (uint32_t n = 5; n < 8; n++) {
5383       for (size_t k = 1; k <= 40; k += 9) {
5384         GemmMicrokernelTester()
5385           .mr(4)
5386           .nr(4)
5387           .kr(2)
5388           .sr(1)
5389           .m(4)
5390           .n(n)
5391           .k(k)
5392           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5393       }
5394     }
5395   }
5396 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)5397   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5398     TEST_REQUIRES_X86_SSE41;
5399     for (uint32_t n = 5; n < 8; n++) {
5400       for (size_t k = 1; k <= 40; k += 9) {
5401         GemmMicrokernelTester()
5402           .mr(4)
5403           .nr(4)
5404           .kr(2)
5405           .sr(1)
5406           .m(4)
5407           .n(n)
5408           .k(k)
5409           .cn_stride(7)
5410           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5411       }
5412     }
5413   }
5414 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)5415   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
5416     TEST_REQUIRES_X86_SSE41;
5417     for (uint32_t n = 5; n < 8; n++) {
5418       for (size_t k = 1; k <= 40; k += 9) {
5419         for (uint32_t m = 1; m <= 4; m++) {
5420           GemmMicrokernelTester()
5421             .mr(4)
5422             .nr(4)
5423             .kr(2)
5424             .sr(1)
5425             .m(m)
5426             .n(n)
5427             .k(k)
5428             .iterations(1)
5429             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5430         }
5431       }
5432     }
5433   }
5434 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)5435   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
5436     TEST_REQUIRES_X86_SSE41;
5437     for (uint32_t n = 8; n <= 12; n += 4) {
5438       for (size_t k = 1; k <= 40; k += 9) {
5439         GemmMicrokernelTester()
5440           .mr(4)
5441           .nr(4)
5442           .kr(2)
5443           .sr(1)
5444           .m(4)
5445           .n(n)
5446           .k(k)
5447           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5448       }
5449     }
5450   }
5451 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)5452   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
5453     TEST_REQUIRES_X86_SSE41;
5454     for (uint32_t n = 8; n <= 12; n += 4) {
5455       for (size_t k = 1; k <= 40; k += 9) {
5456         GemmMicrokernelTester()
5457           .mr(4)
5458           .nr(4)
5459           .kr(2)
5460           .sr(1)
5461           .m(4)
5462           .n(n)
5463           .k(k)
5464           .cn_stride(7)
5465           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5466       }
5467     }
5468   }
5469 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)5470   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
5471     TEST_REQUIRES_X86_SSE41;
5472     for (uint32_t n = 8; n <= 12; n += 4) {
5473       for (size_t k = 1; k <= 40; k += 9) {
5474         for (uint32_t m = 1; m <= 4; m++) {
5475           GemmMicrokernelTester()
5476             .mr(4)
5477             .nr(4)
5478             .kr(2)
5479             .sr(1)
5480             .m(m)
5481             .n(n)
5482             .k(k)
5483             .iterations(1)
5484             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5485         }
5486       }
5487     }
5488   }
5489 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel)5490   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
5491     TEST_REQUIRES_X86_SSE41;
5492     for (size_t k = 1; k <= 40; k += 9) {
5493       GemmMicrokernelTester()
5494         .mr(4)
5495         .nr(4)
5496         .kr(2)
5497         .sr(1)
5498         .m(4)
5499         .n(4)
5500         .k(k)
5501         .ks(3)
5502         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5503     }
5504   }
5505 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel_subtile)5506   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
5507     TEST_REQUIRES_X86_SSE41;
5508     for (size_t k = 1; k <= 40; k += 9) {
5509       for (uint32_t n = 1; n <= 4; n++) {
5510         for (uint32_t m = 1; m <= 4; m++) {
5511           GemmMicrokernelTester()
5512             .mr(4)
5513             .nr(4)
5514             .kr(2)
5515             .sr(1)
5516             .m(m)
5517             .n(n)
5518             .k(k)
5519             .ks(3)
5520             .iterations(1)
5521             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5522         }
5523       }
5524     }
5525   }
5526 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_small_kernel)5527   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5528     TEST_REQUIRES_X86_SSE41;
5529     for (uint32_t n = 5; n < 8; n++) {
5530       for (size_t k = 1; k <= 40; k += 9) {
5531         GemmMicrokernelTester()
5532           .mr(4)
5533           .nr(4)
5534           .kr(2)
5535           .sr(1)
5536           .m(4)
5537           .n(n)
5538           .k(k)
5539           .ks(3)
5540           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5541       }
5542     }
5543   }
5544 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_small_kernel)5545   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
5546     TEST_REQUIRES_X86_SSE41;
5547     for (uint32_t n = 8; n <= 12; n += 4) {
5548       for (size_t k = 1; k <= 40; k += 9) {
5549         GemmMicrokernelTester()
5550           .mr(4)
5551           .nr(4)
5552           .kr(2)
5553           .sr(1)
5554           .m(4)
5555           .n(n)
5556           .k(k)
5557           .ks(3)
5558           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5559       }
5560     }
5561   }
5562 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)5563   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
5564     TEST_REQUIRES_X86_SSE41;
5565     for (size_t k = 1; k <= 40; k += 9) {
5566       for (uint32_t n = 1; n <= 4; n++) {
5567         for (uint32_t m = 1; m <= 4; m++) {
5568           GemmMicrokernelTester()
5569             .mr(4)
5570             .nr(4)
5571             .kr(2)
5572             .sr(1)
5573             .m(m)
5574             .n(n)
5575             .k(k)
5576             .cm_stride(7)
5577             .iterations(1)
5578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5579         }
5580       }
5581     }
5582   }
5583 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,a_offset)5584   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
5585     TEST_REQUIRES_X86_SSE41;
5586     for (size_t k = 1; k <= 40; k += 9) {
5587       GemmMicrokernelTester()
5588         .mr(4)
5589         .nr(4)
5590         .kr(2)
5591         .sr(1)
5592         .m(4)
5593         .n(4)
5594         .k(k)
5595         .ks(3)
5596         .a_offset(163)
5597         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5598     }
5599   }
5600 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,zero)5601   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
5602     TEST_REQUIRES_X86_SSE41;
5603     for (size_t k = 1; k <= 40; k += 9) {
5604       for (uint32_t mz = 0; mz < 4; mz++) {
5605         GemmMicrokernelTester()
5606           .mr(4)
5607           .nr(4)
5608           .kr(2)
5609           .sr(1)
5610           .m(4)
5611           .n(4)
5612           .k(k)
5613           .ks(3)
5614           .a_offset(163)
5615           .zero_index(mz)
5616           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5617       }
5618     }
5619   }
5620 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)5621   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
5622     TEST_REQUIRES_X86_SSE41;
5623     GemmMicrokernelTester()
5624       .mr(4)
5625       .nr(4)
5626       .kr(2)
5627       .sr(1)
5628       .m(4)
5629       .n(4)
5630       .k(8)
5631       .qmin(128)
5632       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5633   }
5634 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)5635   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
5636     TEST_REQUIRES_X86_SSE41;
5637     GemmMicrokernelTester()
5638       .mr(4)
5639       .nr(4)
5640       .kr(2)
5641       .sr(1)
5642       .m(4)
5643       .n(4)
5644       .k(8)
5645       .qmax(128)
5646       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5647   }
5648 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)5649   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
5650     TEST_REQUIRES_X86_SSE41;
5651     GemmMicrokernelTester()
5652       .mr(4)
5653       .nr(4)
5654       .kr(2)
5655       .sr(1)
5656       .m(4)
5657       .n(4)
5658       .k(8)
5659       .cm_stride(7)
5660       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5661   }
5662 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_a_zero_point)5663   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_a_zero_point) {
5664     TEST_REQUIRES_X86_SSE41;
5665     for (size_t k = 1; k <= 40; k += 9) {
5666       GemmMicrokernelTester()
5667         .mr(4)
5668         .nr(4)
5669         .kr(2)
5670         .sr(1)
5671         .m(4)
5672         .n(4)
5673         .k(k)
5674         .a_zero_point(0)
5675         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5676     }
5677   }
5678 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_b_zero_point)5679   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_b_zero_point) {
5680     TEST_REQUIRES_X86_SSE41;
5681     for (size_t k = 1; k <= 40; k += 9) {
5682       GemmMicrokernelTester()
5683         .mr(4)
5684         .nr(4)
5685         .kr(2)
5686         .sr(1)
5687         .m(4)
5688         .n(4)
5689         .k(k)
5690         .b_zero_point(0)
5691         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5692     }
5693   }
5694 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,no_zero_point)5695   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_zero_point) {
5696     TEST_REQUIRES_X86_SSE41;
5697     for (size_t k = 1; k <= 40; k += 9) {
5698       GemmMicrokernelTester()
5699         .mr(4)
5700         .nr(4)
5701         .kr(2)
5702         .sr(1)
5703         .m(4)
5704         .n(4)
5705         .k(k)
5706         .a_zero_point(0)
5707         .b_zero_point(0)
5708         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5709     }
5710   }
5711 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5712 
5713 
5714 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)5715   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
5716     TEST_REQUIRES_X86_AVX;
5717     GemmMicrokernelTester()
5718       .mr(2)
5719       .nr(4)
5720       .kr(2)
5721       .sr(1)
5722       .m(2)
5723       .n(4)
5724       .k(8)
5725       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5726   }
5727 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)5728   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
5729     TEST_REQUIRES_X86_AVX;
5730     GemmMicrokernelTester()
5731       .mr(2)
5732       .nr(4)
5733       .kr(2)
5734       .sr(1)
5735       .m(2)
5736       .n(4)
5737       .k(8)
5738       .cn_stride(7)
5739       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5740   }
5741 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)5742   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
5743     TEST_REQUIRES_X86_AVX;
5744     for (uint32_t n = 1; n <= 4; n++) {
5745       for (uint32_t m = 1; m <= 2; m++) {
5746         GemmMicrokernelTester()
5747           .mr(2)
5748           .nr(4)
5749           .kr(2)
5750           .sr(1)
5751           .m(m)
5752           .n(n)
5753           .k(8)
5754           .iterations(1)
5755           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5756       }
5757     }
5758   }
5759 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)5760   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
5761     TEST_REQUIRES_X86_AVX;
5762     for (uint32_t m = 1; m <= 2; m++) {
5763       GemmMicrokernelTester()
5764         .mr(2)
5765         .nr(4)
5766         .kr(2)
5767         .sr(1)
5768         .m(m)
5769         .n(4)
5770         .k(8)
5771         .iterations(1)
5772         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5773     }
5774   }
5775 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)5776   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
5777     TEST_REQUIRES_X86_AVX;
5778     for (uint32_t n = 1; n <= 4; n++) {
5779       GemmMicrokernelTester()
5780         .mr(2)
5781         .nr(4)
5782         .kr(2)
5783         .sr(1)
5784         .m(2)
5785         .n(n)
5786         .k(8)
5787         .iterations(1)
5788         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5789     }
5790   }
5791 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)5792   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
5793     TEST_REQUIRES_X86_AVX;
5794     for (size_t k = 1; k < 8; k++) {
5795       GemmMicrokernelTester()
5796         .mr(2)
5797         .nr(4)
5798         .kr(2)
5799         .sr(1)
5800         .m(2)
5801         .n(4)
5802         .k(k)
5803         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5804     }
5805   }
5806 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)5807   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
5808     TEST_REQUIRES_X86_AVX;
5809     for (size_t k = 1; k < 8; k++) {
5810       for (uint32_t n = 1; n <= 4; n++) {
5811         for (uint32_t m = 1; m <= 2; m++) {
5812           GemmMicrokernelTester()
5813             .mr(2)
5814             .nr(4)
5815             .kr(2)
5816             .sr(1)
5817             .m(m)
5818             .n(n)
5819             .k(k)
5820             .iterations(1)
5821             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5822         }
5823       }
5824     }
5825   }
5826 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)5827   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
5828     TEST_REQUIRES_X86_AVX;
5829     for (size_t k = 9; k < 16; k++) {
5830       GemmMicrokernelTester()
5831         .mr(2)
5832         .nr(4)
5833         .kr(2)
5834         .sr(1)
5835         .m(2)
5836         .n(4)
5837         .k(k)
5838         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5839     }
5840   }
5841 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)5842   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
5843     TEST_REQUIRES_X86_AVX;
5844     for (size_t k = 9; k < 16; k++) {
5845       for (uint32_t n = 1; n <= 4; n++) {
5846         for (uint32_t m = 1; m <= 2; m++) {
5847           GemmMicrokernelTester()
5848             .mr(2)
5849             .nr(4)
5850             .kr(2)
5851             .sr(1)
5852             .m(m)
5853             .n(n)
5854             .k(k)
5855             .iterations(1)
5856             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5857         }
5858       }
5859     }
5860   }
5861 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)5862   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
5863     TEST_REQUIRES_X86_AVX;
5864     for (size_t k = 16; k <= 80; k += 8) {
5865       GemmMicrokernelTester()
5866         .mr(2)
5867         .nr(4)
5868         .kr(2)
5869         .sr(1)
5870         .m(2)
5871         .n(4)
5872         .k(k)
5873         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5874     }
5875   }
5876 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)5877   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
5878     TEST_REQUIRES_X86_AVX;
5879     for (size_t k = 16; k <= 80; k += 8) {
5880       for (uint32_t n = 1; n <= 4; n++) {
5881         for (uint32_t m = 1; m <= 2; m++) {
5882           GemmMicrokernelTester()
5883             .mr(2)
5884             .nr(4)
5885             .kr(2)
5886             .sr(1)
5887             .m(m)
5888             .n(n)
5889             .k(k)
5890             .iterations(1)
5891             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5892         }
5893       }
5894     }
5895   }
5896 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)5897   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
5898     TEST_REQUIRES_X86_AVX;
5899     for (uint32_t n = 5; n < 8; n++) {
5900       for (size_t k = 1; k <= 40; k += 9) {
5901         GemmMicrokernelTester()
5902           .mr(2)
5903           .nr(4)
5904           .kr(2)
5905           .sr(1)
5906           .m(2)
5907           .n(n)
5908           .k(k)
5909           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5910       }
5911     }
5912   }
5913 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)5914   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
5915     TEST_REQUIRES_X86_AVX;
5916     for (uint32_t n = 5; n < 8; n++) {
5917       for (size_t k = 1; k <= 40; k += 9) {
5918         GemmMicrokernelTester()
5919           .mr(2)
5920           .nr(4)
5921           .kr(2)
5922           .sr(1)
5923           .m(2)
5924           .n(n)
5925           .k(k)
5926           .cn_stride(7)
5927           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5928       }
5929     }
5930   }
5931 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)5932   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
5933     TEST_REQUIRES_X86_AVX;
5934     for (uint32_t n = 5; n < 8; n++) {
5935       for (size_t k = 1; k <= 40; k += 9) {
5936         for (uint32_t m = 1; m <= 2; m++) {
5937           GemmMicrokernelTester()
5938             .mr(2)
5939             .nr(4)
5940             .kr(2)
5941             .sr(1)
5942             .m(m)
5943             .n(n)
5944             .k(k)
5945             .iterations(1)
5946             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5947         }
5948       }
5949     }
5950   }
5951 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)5952   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
5953     TEST_REQUIRES_X86_AVX;
5954     for (uint32_t n = 8; n <= 12; n += 4) {
5955       for (size_t k = 1; k <= 40; k += 9) {
5956         GemmMicrokernelTester()
5957           .mr(2)
5958           .nr(4)
5959           .kr(2)
5960           .sr(1)
5961           .m(2)
5962           .n(n)
5963           .k(k)
5964           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5965       }
5966     }
5967   }
5968 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)5969   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
5970     TEST_REQUIRES_X86_AVX;
5971     for (uint32_t n = 8; n <= 12; n += 4) {
5972       for (size_t k = 1; k <= 40; k += 9) {
5973         GemmMicrokernelTester()
5974           .mr(2)
5975           .nr(4)
5976           .kr(2)
5977           .sr(1)
5978           .m(2)
5979           .n(n)
5980           .k(k)
5981           .cn_stride(7)
5982           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5983       }
5984     }
5985   }
5986 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)5987   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
5988     TEST_REQUIRES_X86_AVX;
5989     for (uint32_t n = 8; n <= 12; n += 4) {
5990       for (size_t k = 1; k <= 40; k += 9) {
5991         for (uint32_t m = 1; m <= 2; m++) {
5992           GemmMicrokernelTester()
5993             .mr(2)
5994             .nr(4)
5995             .kr(2)
5996             .sr(1)
5997             .m(m)
5998             .n(n)
5999             .k(k)
6000             .iterations(1)
6001             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6002         }
6003       }
6004     }
6005   }
6006 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel)6007   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
6008     TEST_REQUIRES_X86_AVX;
6009     for (size_t k = 1; k <= 40; k += 9) {
6010       GemmMicrokernelTester()
6011         .mr(2)
6012         .nr(4)
6013         .kr(2)
6014         .sr(1)
6015         .m(2)
6016         .n(4)
6017         .k(k)
6018         .ks(3)
6019         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6020     }
6021   }
6022 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel_subtile)6023   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
6024     TEST_REQUIRES_X86_AVX;
6025     for (size_t k = 1; k <= 40; k += 9) {
6026       for (uint32_t n = 1; n <= 4; n++) {
6027         for (uint32_t m = 1; m <= 2; m++) {
6028           GemmMicrokernelTester()
6029             .mr(2)
6030             .nr(4)
6031             .kr(2)
6032             .sr(1)
6033             .m(m)
6034             .n(n)
6035             .k(k)
6036             .ks(3)
6037             .iterations(1)
6038             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6039         }
6040       }
6041     }
6042   }
6043 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_small_kernel)6044   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
6045     TEST_REQUIRES_X86_AVX;
6046     for (uint32_t n = 5; n < 8; n++) {
6047       for (size_t k = 1; k <= 40; k += 9) {
6048         GemmMicrokernelTester()
6049           .mr(2)
6050           .nr(4)
6051           .kr(2)
6052           .sr(1)
6053           .m(2)
6054           .n(n)
6055           .k(k)
6056           .ks(3)
6057           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6058       }
6059     }
6060   }
6061 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_small_kernel)6062   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
6063     TEST_REQUIRES_X86_AVX;
6064     for (uint32_t n = 8; n <= 12; n += 4) {
6065       for (size_t k = 1; k <= 40; k += 9) {
6066         GemmMicrokernelTester()
6067           .mr(2)
6068           .nr(4)
6069           .kr(2)
6070           .sr(1)
6071           .m(2)
6072           .n(n)
6073           .k(k)
6074           .ks(3)
6075           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6076       }
6077     }
6078   }
6079 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)6080   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
6081     TEST_REQUIRES_X86_AVX;
6082     for (size_t k = 1; k <= 40; k += 9) {
6083       for (uint32_t n = 1; n <= 4; n++) {
6084         for (uint32_t m = 1; m <= 2; m++) {
6085           GemmMicrokernelTester()
6086             .mr(2)
6087             .nr(4)
6088             .kr(2)
6089             .sr(1)
6090             .m(m)
6091             .n(n)
6092             .k(k)
6093             .cm_stride(7)
6094             .iterations(1)
6095             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6096         }
6097       }
6098     }
6099   }
6100 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,a_offset)6101   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
6102     TEST_REQUIRES_X86_AVX;
6103     for (size_t k = 1; k <= 40; k += 9) {
6104       GemmMicrokernelTester()
6105         .mr(2)
6106         .nr(4)
6107         .kr(2)
6108         .sr(1)
6109         .m(2)
6110         .n(4)
6111         .k(k)
6112         .ks(3)
6113         .a_offset(83)
6114         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6115     }
6116   }
6117 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,zero)6118   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
6119     TEST_REQUIRES_X86_AVX;
6120     for (size_t k = 1; k <= 40; k += 9) {
6121       for (uint32_t mz = 0; mz < 2; mz++) {
6122         GemmMicrokernelTester()
6123           .mr(2)
6124           .nr(4)
6125           .kr(2)
6126           .sr(1)
6127           .m(2)
6128           .n(4)
6129           .k(k)
6130           .ks(3)
6131           .a_offset(83)
6132           .zero_index(mz)
6133           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6134       }
6135     }
6136   }
6137 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)6138   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
6139     TEST_REQUIRES_X86_AVX;
6140     GemmMicrokernelTester()
6141       .mr(2)
6142       .nr(4)
6143       .kr(2)
6144       .sr(1)
6145       .m(2)
6146       .n(4)
6147       .k(8)
6148       .qmin(128)
6149       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6150   }
6151 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)6152   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
6153     TEST_REQUIRES_X86_AVX;
6154     GemmMicrokernelTester()
6155       .mr(2)
6156       .nr(4)
6157       .kr(2)
6158       .sr(1)
6159       .m(2)
6160       .n(4)
6161       .k(8)
6162       .qmax(128)
6163       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6164   }
6165 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)6166   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
6167     TEST_REQUIRES_X86_AVX;
6168     GemmMicrokernelTester()
6169       .mr(2)
6170       .nr(4)
6171       .kr(2)
6172       .sr(1)
6173       .m(2)
6174       .n(4)
6175       .k(8)
6176       .cm_stride(7)
6177       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6178   }
6179 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_a_zero_point)6180   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
6181     TEST_REQUIRES_X86_AVX;
6182     for (size_t k = 1; k <= 40; k += 9) {
6183       GemmMicrokernelTester()
6184         .mr(2)
6185         .nr(4)
6186         .kr(2)
6187         .sr(1)
6188         .m(2)
6189         .n(4)
6190         .k(k)
6191         .a_zero_point(0)
6192         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6193     }
6194   }
6195 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_b_zero_point)6196   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
6197     TEST_REQUIRES_X86_AVX;
6198     for (size_t k = 1; k <= 40; k += 9) {
6199       GemmMicrokernelTester()
6200         .mr(2)
6201         .nr(4)
6202         .kr(2)
6203         .sr(1)
6204         .m(2)
6205         .n(4)
6206         .k(k)
6207         .b_zero_point(0)
6208         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6209     }
6210   }
6211 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,no_zero_point)6212   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
6213     TEST_REQUIRES_X86_AVX;
6214     for (size_t k = 1; k <= 40; k += 9) {
6215       GemmMicrokernelTester()
6216         .mr(2)
6217         .nr(4)
6218         .kr(2)
6219         .sr(1)
6220         .m(2)
6221         .n(4)
6222         .k(k)
6223         .a_zero_point(0)
6224         .b_zero_point(0)
6225         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6226     }
6227   }
6228 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6229 
6230 
6231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)6232   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
6233     TEST_REQUIRES_X86_XOP;
6234     GemmMicrokernelTester()
6235       .mr(2)
6236       .nr(4)
6237       .kr(2)
6238       .sr(1)
6239       .m(2)
6240       .n(4)
6241       .k(8)
6242       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6243   }
6244 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)6245   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
6246     TEST_REQUIRES_X86_XOP;
6247     GemmMicrokernelTester()
6248       .mr(2)
6249       .nr(4)
6250       .kr(2)
6251       .sr(1)
6252       .m(2)
6253       .n(4)
6254       .k(8)
6255       .cn_stride(7)
6256       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6257   }
6258 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)6259   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
6260     TEST_REQUIRES_X86_XOP;
6261     for (uint32_t n = 1; n <= 4; n++) {
6262       for (uint32_t m = 1; m <= 2; m++) {
6263         GemmMicrokernelTester()
6264           .mr(2)
6265           .nr(4)
6266           .kr(2)
6267           .sr(1)
6268           .m(m)
6269           .n(n)
6270           .k(8)
6271           .iterations(1)
6272           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6273       }
6274     }
6275   }
6276 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)6277   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
6278     TEST_REQUIRES_X86_XOP;
6279     for (uint32_t m = 1; m <= 2; m++) {
6280       GemmMicrokernelTester()
6281         .mr(2)
6282         .nr(4)
6283         .kr(2)
6284         .sr(1)
6285         .m(m)
6286         .n(4)
6287         .k(8)
6288         .iterations(1)
6289         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6290     }
6291   }
6292 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)6293   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
6294     TEST_REQUIRES_X86_XOP;
6295     for (uint32_t n = 1; n <= 4; n++) {
6296       GemmMicrokernelTester()
6297         .mr(2)
6298         .nr(4)
6299         .kr(2)
6300         .sr(1)
6301         .m(2)
6302         .n(n)
6303         .k(8)
6304         .iterations(1)
6305         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6306     }
6307   }
6308 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)6309   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
6310     TEST_REQUIRES_X86_XOP;
6311     for (size_t k = 1; k < 8; k++) {
6312       GemmMicrokernelTester()
6313         .mr(2)
6314         .nr(4)
6315         .kr(2)
6316         .sr(1)
6317         .m(2)
6318         .n(4)
6319         .k(k)
6320         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6321     }
6322   }
6323 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)6324   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
6325     TEST_REQUIRES_X86_XOP;
6326     for (size_t k = 1; k < 8; k++) {
6327       for (uint32_t n = 1; n <= 4; n++) {
6328         for (uint32_t m = 1; m <= 2; m++) {
6329           GemmMicrokernelTester()
6330             .mr(2)
6331             .nr(4)
6332             .kr(2)
6333             .sr(1)
6334             .m(m)
6335             .n(n)
6336             .k(k)
6337             .iterations(1)
6338             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6339         }
6340       }
6341     }
6342   }
6343 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)6344   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
6345     TEST_REQUIRES_X86_XOP;
6346     for (size_t k = 9; k < 16; k++) {
6347       GemmMicrokernelTester()
6348         .mr(2)
6349         .nr(4)
6350         .kr(2)
6351         .sr(1)
6352         .m(2)
6353         .n(4)
6354         .k(k)
6355         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6356     }
6357   }
6358 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)6359   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
6360     TEST_REQUIRES_X86_XOP;
6361     for (size_t k = 9; k < 16; k++) {
6362       for (uint32_t n = 1; n <= 4; n++) {
6363         for (uint32_t m = 1; m <= 2; m++) {
6364           GemmMicrokernelTester()
6365             .mr(2)
6366             .nr(4)
6367             .kr(2)
6368             .sr(1)
6369             .m(m)
6370             .n(n)
6371             .k(k)
6372             .iterations(1)
6373             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6374         }
6375       }
6376     }
6377   }
6378 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)6379   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
6380     TEST_REQUIRES_X86_XOP;
6381     for (size_t k = 16; k <= 80; k += 8) {
6382       GemmMicrokernelTester()
6383         .mr(2)
6384         .nr(4)
6385         .kr(2)
6386         .sr(1)
6387         .m(2)
6388         .n(4)
6389         .k(k)
6390         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6391     }
6392   }
6393 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)6394   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
6395     TEST_REQUIRES_X86_XOP;
6396     for (size_t k = 16; k <= 80; k += 8) {
6397       for (uint32_t n = 1; n <= 4; n++) {
6398         for (uint32_t m = 1; m <= 2; m++) {
6399           GemmMicrokernelTester()
6400             .mr(2)
6401             .nr(4)
6402             .kr(2)
6403             .sr(1)
6404             .m(m)
6405             .n(n)
6406             .k(k)
6407             .iterations(1)
6408             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6409         }
6410       }
6411     }
6412   }
6413 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)6414   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
6415     TEST_REQUIRES_X86_XOP;
6416     for (uint32_t n = 5; n < 8; n++) {
6417       for (size_t k = 1; k <= 40; k += 9) {
6418         GemmMicrokernelTester()
6419           .mr(2)
6420           .nr(4)
6421           .kr(2)
6422           .sr(1)
6423           .m(2)
6424           .n(n)
6425           .k(k)
6426           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6427       }
6428     }
6429   }
6430 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)6431   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
6432     TEST_REQUIRES_X86_XOP;
6433     for (uint32_t n = 5; n < 8; n++) {
6434       for (size_t k = 1; k <= 40; k += 9) {
6435         GemmMicrokernelTester()
6436           .mr(2)
6437           .nr(4)
6438           .kr(2)
6439           .sr(1)
6440           .m(2)
6441           .n(n)
6442           .k(k)
6443           .cn_stride(7)
6444           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6445       }
6446     }
6447   }
6448 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)6449   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
6450     TEST_REQUIRES_X86_XOP;
6451     for (uint32_t n = 5; n < 8; n++) {
6452       for (size_t k = 1; k <= 40; k += 9) {
6453         for (uint32_t m = 1; m <= 2; m++) {
6454           GemmMicrokernelTester()
6455             .mr(2)
6456             .nr(4)
6457             .kr(2)
6458             .sr(1)
6459             .m(m)
6460             .n(n)
6461             .k(k)
6462             .iterations(1)
6463             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6464         }
6465       }
6466     }
6467   }
6468 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)6469   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
6470     TEST_REQUIRES_X86_XOP;
6471     for (uint32_t n = 8; n <= 12; n += 4) {
6472       for (size_t k = 1; k <= 40; k += 9) {
6473         GemmMicrokernelTester()
6474           .mr(2)
6475           .nr(4)
6476           .kr(2)
6477           .sr(1)
6478           .m(2)
6479           .n(n)
6480           .k(k)
6481           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6482       }
6483     }
6484   }
6485 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)6486   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
6487     TEST_REQUIRES_X86_XOP;
6488     for (uint32_t n = 8; n <= 12; n += 4) {
6489       for (size_t k = 1; k <= 40; k += 9) {
6490         GemmMicrokernelTester()
6491           .mr(2)
6492           .nr(4)
6493           .kr(2)
6494           .sr(1)
6495           .m(2)
6496           .n(n)
6497           .k(k)
6498           .cn_stride(7)
6499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6500       }
6501     }
6502   }
6503 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)6504   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
6505     TEST_REQUIRES_X86_XOP;
6506     for (uint32_t n = 8; n <= 12; n += 4) {
6507       for (size_t k = 1; k <= 40; k += 9) {
6508         for (uint32_t m = 1; m <= 2; m++) {
6509           GemmMicrokernelTester()
6510             .mr(2)
6511             .nr(4)
6512             .kr(2)
6513             .sr(1)
6514             .m(m)
6515             .n(n)
6516             .k(k)
6517             .iterations(1)
6518             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6519         }
6520       }
6521     }
6522   }
6523 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel)6524   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
6525     TEST_REQUIRES_X86_XOP;
6526     for (size_t k = 1; k <= 40; k += 9) {
6527       GemmMicrokernelTester()
6528         .mr(2)
6529         .nr(4)
6530         .kr(2)
6531         .sr(1)
6532         .m(2)
6533         .n(4)
6534         .k(k)
6535         .ks(3)
6536         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6537     }
6538   }
6539 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel_subtile)6540   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
6541     TEST_REQUIRES_X86_XOP;
6542     for (size_t k = 1; k <= 40; k += 9) {
6543       for (uint32_t n = 1; n <= 4; n++) {
6544         for (uint32_t m = 1; m <= 2; m++) {
6545           GemmMicrokernelTester()
6546             .mr(2)
6547             .nr(4)
6548             .kr(2)
6549             .sr(1)
6550             .m(m)
6551             .n(n)
6552             .k(k)
6553             .ks(3)
6554             .iterations(1)
6555             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6556         }
6557       }
6558     }
6559   }
6560 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_small_kernel)6561   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
6562     TEST_REQUIRES_X86_XOP;
6563     for (uint32_t n = 5; n < 8; n++) {
6564       for (size_t k = 1; k <= 40; k += 9) {
6565         GemmMicrokernelTester()
6566           .mr(2)
6567           .nr(4)
6568           .kr(2)
6569           .sr(1)
6570           .m(2)
6571           .n(n)
6572           .k(k)
6573           .ks(3)
6574           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6575       }
6576     }
6577   }
6578 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_small_kernel)6579   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
6580     TEST_REQUIRES_X86_XOP;
6581     for (uint32_t n = 8; n <= 12; n += 4) {
6582       for (size_t k = 1; k <= 40; k += 9) {
6583         GemmMicrokernelTester()
6584           .mr(2)
6585           .nr(4)
6586           .kr(2)
6587           .sr(1)
6588           .m(2)
6589           .n(n)
6590           .k(k)
6591           .ks(3)
6592           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6593       }
6594     }
6595   }
6596 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)6597   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
6598     TEST_REQUIRES_X86_XOP;
6599     for (size_t k = 1; k <= 40; k += 9) {
6600       for (uint32_t n = 1; n <= 4; n++) {
6601         for (uint32_t m = 1; m <= 2; m++) {
6602           GemmMicrokernelTester()
6603             .mr(2)
6604             .nr(4)
6605             .kr(2)
6606             .sr(1)
6607             .m(m)
6608             .n(n)
6609             .k(k)
6610             .cm_stride(7)
6611             .iterations(1)
6612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6613         }
6614       }
6615     }
6616   }
6617 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,a_offset)6618   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
6619     TEST_REQUIRES_X86_XOP;
6620     for (size_t k = 1; k <= 40; k += 9) {
6621       GemmMicrokernelTester()
6622         .mr(2)
6623         .nr(4)
6624         .kr(2)
6625         .sr(1)
6626         .m(2)
6627         .n(4)
6628         .k(k)
6629         .ks(3)
6630         .a_offset(83)
6631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6632     }
6633   }
6634 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,zero)6635   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
6636     TEST_REQUIRES_X86_XOP;
6637     for (size_t k = 1; k <= 40; k += 9) {
6638       for (uint32_t mz = 0; mz < 2; mz++) {
6639         GemmMicrokernelTester()
6640           .mr(2)
6641           .nr(4)
6642           .kr(2)
6643           .sr(1)
6644           .m(2)
6645           .n(4)
6646           .k(k)
6647           .ks(3)
6648           .a_offset(83)
6649           .zero_index(mz)
6650           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6651       }
6652     }
6653   }
6654 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)6655   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
6656     TEST_REQUIRES_X86_XOP;
6657     GemmMicrokernelTester()
6658       .mr(2)
6659       .nr(4)
6660       .kr(2)
6661       .sr(1)
6662       .m(2)
6663       .n(4)
6664       .k(8)
6665       .qmin(128)
6666       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6667   }
6668 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)6669   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
6670     TEST_REQUIRES_X86_XOP;
6671     GemmMicrokernelTester()
6672       .mr(2)
6673       .nr(4)
6674       .kr(2)
6675       .sr(1)
6676       .m(2)
6677       .n(4)
6678       .k(8)
6679       .qmax(128)
6680       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6681   }
6682 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)6683   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
6684     TEST_REQUIRES_X86_XOP;
6685     GemmMicrokernelTester()
6686       .mr(2)
6687       .nr(4)
6688       .kr(2)
6689       .sr(1)
6690       .m(2)
6691       .n(4)
6692       .k(8)
6693       .cm_stride(7)
6694       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6695   }
6696 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_a_zero_point)6697   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
6698     TEST_REQUIRES_X86_XOP;
6699     for (size_t k = 1; k <= 40; k += 9) {
6700       GemmMicrokernelTester()
6701         .mr(2)
6702         .nr(4)
6703         .kr(2)
6704         .sr(1)
6705         .m(2)
6706         .n(4)
6707         .k(k)
6708         .a_zero_point(0)
6709         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6710     }
6711   }
6712 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_b_zero_point)6713   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
6714     TEST_REQUIRES_X86_XOP;
6715     for (size_t k = 1; k <= 40; k += 9) {
6716       GemmMicrokernelTester()
6717         .mr(2)
6718         .nr(4)
6719         .kr(2)
6720         .sr(1)
6721         .m(2)
6722         .n(4)
6723         .k(k)
6724         .b_zero_point(0)
6725         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6726     }
6727   }
6728 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,no_zero_point)6729   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
6730     TEST_REQUIRES_X86_XOP;
6731     for (size_t k = 1; k <= 40; k += 9) {
6732       GemmMicrokernelTester()
6733         .mr(2)
6734         .nr(4)
6735         .kr(2)
6736         .sr(1)
6737         .m(2)
6738         .n(4)
6739         .k(k)
6740         .a_zero_point(0)
6741         .b_zero_point(0)
6742         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6743     }
6744   }
6745 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6746 
6747 
6748 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)6749   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
6750     TEST_REQUIRES_X86_AVX;
6751     GemmMicrokernelTester()
6752       .mr(3)
6753       .nr(4)
6754       .kr(2)
6755       .sr(1)
6756       .m(3)
6757       .n(4)
6758       .k(8)
6759       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6760   }
6761 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)6762   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
6763     TEST_REQUIRES_X86_AVX;
6764     GemmMicrokernelTester()
6765       .mr(3)
6766       .nr(4)
6767       .kr(2)
6768       .sr(1)
6769       .m(3)
6770       .n(4)
6771       .k(8)
6772       .cn_stride(7)
6773       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6774   }
6775 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)6776   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
6777     TEST_REQUIRES_X86_AVX;
6778     for (uint32_t n = 1; n <= 4; n++) {
6779       for (uint32_t m = 1; m <= 3; m++) {
6780         GemmMicrokernelTester()
6781           .mr(3)
6782           .nr(4)
6783           .kr(2)
6784           .sr(1)
6785           .m(m)
6786           .n(n)
6787           .k(8)
6788           .iterations(1)
6789           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6790       }
6791     }
6792   }
6793 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)6794   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
6795     TEST_REQUIRES_X86_AVX;
6796     for (uint32_t m = 1; m <= 3; m++) {
6797       GemmMicrokernelTester()
6798         .mr(3)
6799         .nr(4)
6800         .kr(2)
6801         .sr(1)
6802         .m(m)
6803         .n(4)
6804         .k(8)
6805         .iterations(1)
6806         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6807     }
6808   }
6809 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)6810   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
6811     TEST_REQUIRES_X86_AVX;
6812     for (uint32_t n = 1; n <= 4; n++) {
6813       GemmMicrokernelTester()
6814         .mr(3)
6815         .nr(4)
6816         .kr(2)
6817         .sr(1)
6818         .m(3)
6819         .n(n)
6820         .k(8)
6821         .iterations(1)
6822         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6823     }
6824   }
6825 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)6826   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
6827     TEST_REQUIRES_X86_AVX;
6828     for (size_t k = 1; k < 8; k++) {
6829       GemmMicrokernelTester()
6830         .mr(3)
6831         .nr(4)
6832         .kr(2)
6833         .sr(1)
6834         .m(3)
6835         .n(4)
6836         .k(k)
6837         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6838     }
6839   }
6840 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)6841   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
6842     TEST_REQUIRES_X86_AVX;
6843     for (size_t k = 1; k < 8; k++) {
6844       for (uint32_t n = 1; n <= 4; n++) {
6845         for (uint32_t m = 1; m <= 3; m++) {
6846           GemmMicrokernelTester()
6847             .mr(3)
6848             .nr(4)
6849             .kr(2)
6850             .sr(1)
6851             .m(m)
6852             .n(n)
6853             .k(k)
6854             .iterations(1)
6855             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6856         }
6857       }
6858     }
6859   }
6860 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)6861   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
6862     TEST_REQUIRES_X86_AVX;
6863     for (size_t k = 9; k < 16; k++) {
6864       GemmMicrokernelTester()
6865         .mr(3)
6866         .nr(4)
6867         .kr(2)
6868         .sr(1)
6869         .m(3)
6870         .n(4)
6871         .k(k)
6872         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6873     }
6874   }
6875 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)6876   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
6877     TEST_REQUIRES_X86_AVX;
6878     for (size_t k = 9; k < 16; k++) {
6879       for (uint32_t n = 1; n <= 4; n++) {
6880         for (uint32_t m = 1; m <= 3; m++) {
6881           GemmMicrokernelTester()
6882             .mr(3)
6883             .nr(4)
6884             .kr(2)
6885             .sr(1)
6886             .m(m)
6887             .n(n)
6888             .k(k)
6889             .iterations(1)
6890             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6891         }
6892       }
6893     }
6894   }
6895 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)6896   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
6897     TEST_REQUIRES_X86_AVX;
6898     for (size_t k = 16; k <= 80; k += 8) {
6899       GemmMicrokernelTester()
6900         .mr(3)
6901         .nr(4)
6902         .kr(2)
6903         .sr(1)
6904         .m(3)
6905         .n(4)
6906         .k(k)
6907         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6908     }
6909   }
6910 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)6911   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
6912     TEST_REQUIRES_X86_AVX;
6913     for (size_t k = 16; k <= 80; k += 8) {
6914       for (uint32_t n = 1; n <= 4; n++) {
6915         for (uint32_t m = 1; m <= 3; m++) {
6916           GemmMicrokernelTester()
6917             .mr(3)
6918             .nr(4)
6919             .kr(2)
6920             .sr(1)
6921             .m(m)
6922             .n(n)
6923             .k(k)
6924             .iterations(1)
6925             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6926         }
6927       }
6928     }
6929   }
6930 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)6931   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
6932     TEST_REQUIRES_X86_AVX;
6933     for (uint32_t n = 5; n < 8; n++) {
6934       for (size_t k = 1; k <= 40; k += 9) {
6935         GemmMicrokernelTester()
6936           .mr(3)
6937           .nr(4)
6938           .kr(2)
6939           .sr(1)
6940           .m(3)
6941           .n(n)
6942           .k(k)
6943           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6944       }
6945     }
6946   }
6947 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)6948   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
6949     TEST_REQUIRES_X86_AVX;
6950     for (uint32_t n = 5; n < 8; n++) {
6951       for (size_t k = 1; k <= 40; k += 9) {
6952         GemmMicrokernelTester()
6953           .mr(3)
6954           .nr(4)
6955           .kr(2)
6956           .sr(1)
6957           .m(3)
6958           .n(n)
6959           .k(k)
6960           .cn_stride(7)
6961           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6962       }
6963     }
6964   }
6965 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)6966   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
6967     TEST_REQUIRES_X86_AVX;
6968     for (uint32_t n = 5; n < 8; n++) {
6969       for (size_t k = 1; k <= 40; k += 9) {
6970         for (uint32_t m = 1; m <= 3; m++) {
6971           GemmMicrokernelTester()
6972             .mr(3)
6973             .nr(4)
6974             .kr(2)
6975             .sr(1)
6976             .m(m)
6977             .n(n)
6978             .k(k)
6979             .iterations(1)
6980             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6981         }
6982       }
6983     }
6984   }
6985 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)6986   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
6987     TEST_REQUIRES_X86_AVX;
6988     for (uint32_t n = 8; n <= 12; n += 4) {
6989       for (size_t k = 1; k <= 40; k += 9) {
6990         GemmMicrokernelTester()
6991           .mr(3)
6992           .nr(4)
6993           .kr(2)
6994           .sr(1)
6995           .m(3)
6996           .n(n)
6997           .k(k)
6998           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999       }
7000     }
7001   }
7002 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)7003   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
7004     TEST_REQUIRES_X86_AVX;
7005     for (uint32_t n = 8; n <= 12; n += 4) {
7006       for (size_t k = 1; k <= 40; k += 9) {
7007         GemmMicrokernelTester()
7008           .mr(3)
7009           .nr(4)
7010           .kr(2)
7011           .sr(1)
7012           .m(3)
7013           .n(n)
7014           .k(k)
7015           .cn_stride(7)
7016           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7017       }
7018     }
7019   }
7020 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)7021   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
7022     TEST_REQUIRES_X86_AVX;
7023     for (uint32_t n = 8; n <= 12; n += 4) {
7024       for (size_t k = 1; k <= 40; k += 9) {
7025         for (uint32_t m = 1; m <= 3; m++) {
7026           GemmMicrokernelTester()
7027             .mr(3)
7028             .nr(4)
7029             .kr(2)
7030             .sr(1)
7031             .m(m)
7032             .n(n)
7033             .k(k)
7034             .iterations(1)
7035             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7036         }
7037       }
7038     }
7039   }
7040 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)7041   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
7042     TEST_REQUIRES_X86_AVX;
7043     for (size_t k = 1; k <= 40; k += 9) {
7044       GemmMicrokernelTester()
7045         .mr(3)
7046         .nr(4)
7047         .kr(2)
7048         .sr(1)
7049         .m(3)
7050         .n(4)
7051         .k(k)
7052         .ks(3)
7053         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7054     }
7055   }
7056 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)7057   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
7058     TEST_REQUIRES_X86_AVX;
7059     for (size_t k = 1; k <= 40; k += 9) {
7060       for (uint32_t n = 1; n <= 4; n++) {
7061         for (uint32_t m = 1; m <= 3; m++) {
7062           GemmMicrokernelTester()
7063             .mr(3)
7064             .nr(4)
7065             .kr(2)
7066             .sr(1)
7067             .m(m)
7068             .n(n)
7069             .k(k)
7070             .ks(3)
7071             .iterations(1)
7072             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7073         }
7074       }
7075     }
7076   }
7077 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)7078   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
7079     TEST_REQUIRES_X86_AVX;
7080     for (uint32_t n = 5; n < 8; n++) {
7081       for (size_t k = 1; k <= 40; k += 9) {
7082         GemmMicrokernelTester()
7083           .mr(3)
7084           .nr(4)
7085           .kr(2)
7086           .sr(1)
7087           .m(3)
7088           .n(n)
7089           .k(k)
7090           .ks(3)
7091           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7092       }
7093     }
7094   }
7095 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)7096   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
7097     TEST_REQUIRES_X86_AVX;
7098     for (uint32_t n = 8; n <= 12; n += 4) {
7099       for (size_t k = 1; k <= 40; k += 9) {
7100         GemmMicrokernelTester()
7101           .mr(3)
7102           .nr(4)
7103           .kr(2)
7104           .sr(1)
7105           .m(3)
7106           .n(n)
7107           .k(k)
7108           .ks(3)
7109           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7110       }
7111     }
7112   }
7113 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)7114   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
7115     TEST_REQUIRES_X86_AVX;
7116     for (size_t k = 1; k <= 40; k += 9) {
7117       for (uint32_t n = 1; n <= 4; n++) {
7118         for (uint32_t m = 1; m <= 3; m++) {
7119           GemmMicrokernelTester()
7120             .mr(3)
7121             .nr(4)
7122             .kr(2)
7123             .sr(1)
7124             .m(m)
7125             .n(n)
7126             .k(k)
7127             .cm_stride(7)
7128             .iterations(1)
7129             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7130         }
7131       }
7132     }
7133   }
7134 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)7135   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
7136     TEST_REQUIRES_X86_AVX;
7137     for (size_t k = 1; k <= 40; k += 9) {
7138       GemmMicrokernelTester()
7139         .mr(3)
7140         .nr(4)
7141         .kr(2)
7142         .sr(1)
7143         .m(3)
7144         .n(4)
7145         .k(k)
7146         .ks(3)
7147         .a_offset(127)
7148         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7149     }
7150   }
7151 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)7152   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
7153     TEST_REQUIRES_X86_AVX;
7154     for (size_t k = 1; k <= 40; k += 9) {
7155       for (uint32_t mz = 0; mz < 3; mz++) {
7156         GemmMicrokernelTester()
7157           .mr(3)
7158           .nr(4)
7159           .kr(2)
7160           .sr(1)
7161           .m(3)
7162           .n(4)
7163           .k(k)
7164           .ks(3)
7165           .a_offset(127)
7166           .zero_index(mz)
7167           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7168       }
7169     }
7170   }
7171 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)7172   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
7173     TEST_REQUIRES_X86_AVX;
7174     GemmMicrokernelTester()
7175       .mr(3)
7176       .nr(4)
7177       .kr(2)
7178       .sr(1)
7179       .m(3)
7180       .n(4)
7181       .k(8)
7182       .qmin(128)
7183       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7184   }
7185 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)7186   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
7187     TEST_REQUIRES_X86_AVX;
7188     GemmMicrokernelTester()
7189       .mr(3)
7190       .nr(4)
7191       .kr(2)
7192       .sr(1)
7193       .m(3)
7194       .n(4)
7195       .k(8)
7196       .qmax(128)
7197       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7198   }
7199 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)7200   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
7201     TEST_REQUIRES_X86_AVX;
7202     GemmMicrokernelTester()
7203       .mr(3)
7204       .nr(4)
7205       .kr(2)
7206       .sr(1)
7207       .m(3)
7208       .n(4)
7209       .k(8)
7210       .cm_stride(7)
7211       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7212   }
7213 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_a_zero_point)7214   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
7215     TEST_REQUIRES_X86_AVX;
7216     for (size_t k = 1; k <= 40; k += 9) {
7217       GemmMicrokernelTester()
7218         .mr(3)
7219         .nr(4)
7220         .kr(2)
7221         .sr(1)
7222         .m(3)
7223         .n(4)
7224         .k(k)
7225         .a_zero_point(0)
7226         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7227     }
7228   }
7229 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_b_zero_point)7230   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
7231     TEST_REQUIRES_X86_AVX;
7232     for (size_t k = 1; k <= 40; k += 9) {
7233       GemmMicrokernelTester()
7234         .mr(3)
7235         .nr(4)
7236         .kr(2)
7237         .sr(1)
7238         .m(3)
7239         .n(4)
7240         .k(k)
7241         .b_zero_point(0)
7242         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7243     }
7244   }
7245 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,no_zero_point)7246   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
7247     TEST_REQUIRES_X86_AVX;
7248     for (size_t k = 1; k <= 40; k += 9) {
7249       GemmMicrokernelTester()
7250         .mr(3)
7251         .nr(4)
7252         .kr(2)
7253         .sr(1)
7254         .m(3)
7255         .n(4)
7256         .k(k)
7257         .a_zero_point(0)
7258         .b_zero_point(0)
7259         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7260     }
7261   }
7262 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7263 
7264 
7265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)7266   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
7267     TEST_REQUIRES_X86_XOP;
7268     GemmMicrokernelTester()
7269       .mr(3)
7270       .nr(4)
7271       .kr(2)
7272       .sr(1)
7273       .m(3)
7274       .n(4)
7275       .k(8)
7276       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7277   }
7278 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)7279   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
7280     TEST_REQUIRES_X86_XOP;
7281     GemmMicrokernelTester()
7282       .mr(3)
7283       .nr(4)
7284       .kr(2)
7285       .sr(1)
7286       .m(3)
7287       .n(4)
7288       .k(8)
7289       .cn_stride(7)
7290       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7291   }
7292 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)7293   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
7294     TEST_REQUIRES_X86_XOP;
7295     for (uint32_t n = 1; n <= 4; n++) {
7296       for (uint32_t m = 1; m <= 3; m++) {
7297         GemmMicrokernelTester()
7298           .mr(3)
7299           .nr(4)
7300           .kr(2)
7301           .sr(1)
7302           .m(m)
7303           .n(n)
7304           .k(8)
7305           .iterations(1)
7306           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7307       }
7308     }
7309   }
7310 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)7311   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
7312     TEST_REQUIRES_X86_XOP;
7313     for (uint32_t m = 1; m <= 3; m++) {
7314       GemmMicrokernelTester()
7315         .mr(3)
7316         .nr(4)
7317         .kr(2)
7318         .sr(1)
7319         .m(m)
7320         .n(4)
7321         .k(8)
7322         .iterations(1)
7323         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7324     }
7325   }
7326 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)7327   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
7328     TEST_REQUIRES_X86_XOP;
7329     for (uint32_t n = 1; n <= 4; n++) {
7330       GemmMicrokernelTester()
7331         .mr(3)
7332         .nr(4)
7333         .kr(2)
7334         .sr(1)
7335         .m(3)
7336         .n(n)
7337         .k(8)
7338         .iterations(1)
7339         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7340     }
7341   }
7342 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)7343   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
7344     TEST_REQUIRES_X86_XOP;
7345     for (size_t k = 1; k < 8; k++) {
7346       GemmMicrokernelTester()
7347         .mr(3)
7348         .nr(4)
7349         .kr(2)
7350         .sr(1)
7351         .m(3)
7352         .n(4)
7353         .k(k)
7354         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7355     }
7356   }
7357 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)7358   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
7359     TEST_REQUIRES_X86_XOP;
7360     for (size_t k = 1; k < 8; k++) {
7361       for (uint32_t n = 1; n <= 4; n++) {
7362         for (uint32_t m = 1; m <= 3; m++) {
7363           GemmMicrokernelTester()
7364             .mr(3)
7365             .nr(4)
7366             .kr(2)
7367             .sr(1)
7368             .m(m)
7369             .n(n)
7370             .k(k)
7371             .iterations(1)
7372             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7373         }
7374       }
7375     }
7376   }
7377 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)7378   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
7379     TEST_REQUIRES_X86_XOP;
7380     for (size_t k = 9; k < 16; k++) {
7381       GemmMicrokernelTester()
7382         .mr(3)
7383         .nr(4)
7384         .kr(2)
7385         .sr(1)
7386         .m(3)
7387         .n(4)
7388         .k(k)
7389         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7390     }
7391   }
7392 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)7393   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
7394     TEST_REQUIRES_X86_XOP;
7395     for (size_t k = 9; k < 16; k++) {
7396       for (uint32_t n = 1; n <= 4; n++) {
7397         for (uint32_t m = 1; m <= 3; m++) {
7398           GemmMicrokernelTester()
7399             .mr(3)
7400             .nr(4)
7401             .kr(2)
7402             .sr(1)
7403             .m(m)
7404             .n(n)
7405             .k(k)
7406             .iterations(1)
7407             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7408         }
7409       }
7410     }
7411   }
7412 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)7413   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
7414     TEST_REQUIRES_X86_XOP;
7415     for (size_t k = 16; k <= 80; k += 8) {
7416       GemmMicrokernelTester()
7417         .mr(3)
7418         .nr(4)
7419         .kr(2)
7420         .sr(1)
7421         .m(3)
7422         .n(4)
7423         .k(k)
7424         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7425     }
7426   }
7427 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)7428   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
7429     TEST_REQUIRES_X86_XOP;
7430     for (size_t k = 16; k <= 80; k += 8) {
7431       for (uint32_t n = 1; n <= 4; n++) {
7432         for (uint32_t m = 1; m <= 3; m++) {
7433           GemmMicrokernelTester()
7434             .mr(3)
7435             .nr(4)
7436             .kr(2)
7437             .sr(1)
7438             .m(m)
7439             .n(n)
7440             .k(k)
7441             .iterations(1)
7442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7443         }
7444       }
7445     }
7446   }
7447 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)7448   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
7449     TEST_REQUIRES_X86_XOP;
7450     for (uint32_t n = 5; n < 8; n++) {
7451       for (size_t k = 1; k <= 40; k += 9) {
7452         GemmMicrokernelTester()
7453           .mr(3)
7454           .nr(4)
7455           .kr(2)
7456           .sr(1)
7457           .m(3)
7458           .n(n)
7459           .k(k)
7460           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7461       }
7462     }
7463   }
7464 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)7465   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
7466     TEST_REQUIRES_X86_XOP;
7467     for (uint32_t n = 5; n < 8; n++) {
7468       for (size_t k = 1; k <= 40; k += 9) {
7469         GemmMicrokernelTester()
7470           .mr(3)
7471           .nr(4)
7472           .kr(2)
7473           .sr(1)
7474           .m(3)
7475           .n(n)
7476           .k(k)
7477           .cn_stride(7)
7478           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7479       }
7480     }
7481   }
7482 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)7483   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
7484     TEST_REQUIRES_X86_XOP;
7485     for (uint32_t n = 5; n < 8; n++) {
7486       for (size_t k = 1; k <= 40; k += 9) {
7487         for (uint32_t m = 1; m <= 3; m++) {
7488           GemmMicrokernelTester()
7489             .mr(3)
7490             .nr(4)
7491             .kr(2)
7492             .sr(1)
7493             .m(m)
7494             .n(n)
7495             .k(k)
7496             .iterations(1)
7497             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7498         }
7499       }
7500     }
7501   }
7502 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)7503   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
7504     TEST_REQUIRES_X86_XOP;
7505     for (uint32_t n = 8; n <= 12; n += 4) {
7506       for (size_t k = 1; k <= 40; k += 9) {
7507         GemmMicrokernelTester()
7508           .mr(3)
7509           .nr(4)
7510           .kr(2)
7511           .sr(1)
7512           .m(3)
7513           .n(n)
7514           .k(k)
7515           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7516       }
7517     }
7518   }
7519 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)7520   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
7521     TEST_REQUIRES_X86_XOP;
7522     for (uint32_t n = 8; n <= 12; n += 4) {
7523       for (size_t k = 1; k <= 40; k += 9) {
7524         GemmMicrokernelTester()
7525           .mr(3)
7526           .nr(4)
7527           .kr(2)
7528           .sr(1)
7529           .m(3)
7530           .n(n)
7531           .k(k)
7532           .cn_stride(7)
7533           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7534       }
7535     }
7536   }
7537 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)7538   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
7539     TEST_REQUIRES_X86_XOP;
7540     for (uint32_t n = 8; n <= 12; n += 4) {
7541       for (size_t k = 1; k <= 40; k += 9) {
7542         for (uint32_t m = 1; m <= 3; m++) {
7543           GemmMicrokernelTester()
7544             .mr(3)
7545             .nr(4)
7546             .kr(2)
7547             .sr(1)
7548             .m(m)
7549             .n(n)
7550             .k(k)
7551             .iterations(1)
7552             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7553         }
7554       }
7555     }
7556   }
7557 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel)7558   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
7559     TEST_REQUIRES_X86_XOP;
7560     for (size_t k = 1; k <= 40; k += 9) {
7561       GemmMicrokernelTester()
7562         .mr(3)
7563         .nr(4)
7564         .kr(2)
7565         .sr(1)
7566         .m(3)
7567         .n(4)
7568         .k(k)
7569         .ks(3)
7570         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7571     }
7572   }
7573 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel_subtile)7574   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
7575     TEST_REQUIRES_X86_XOP;
7576     for (size_t k = 1; k <= 40; k += 9) {
7577       for (uint32_t n = 1; n <= 4; n++) {
7578         for (uint32_t m = 1; m <= 3; m++) {
7579           GemmMicrokernelTester()
7580             .mr(3)
7581             .nr(4)
7582             .kr(2)
7583             .sr(1)
7584             .m(m)
7585             .n(n)
7586             .k(k)
7587             .ks(3)
7588             .iterations(1)
7589             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7590         }
7591       }
7592     }
7593   }
7594 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_small_kernel)7595   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
7596     TEST_REQUIRES_X86_XOP;
7597     for (uint32_t n = 5; n < 8; n++) {
7598       for (size_t k = 1; k <= 40; k += 9) {
7599         GemmMicrokernelTester()
7600           .mr(3)
7601           .nr(4)
7602           .kr(2)
7603           .sr(1)
7604           .m(3)
7605           .n(n)
7606           .k(k)
7607           .ks(3)
7608           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7609       }
7610     }
7611   }
7612 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_small_kernel)7613   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
7614     TEST_REQUIRES_X86_XOP;
7615     for (uint32_t n = 8; n <= 12; n += 4) {
7616       for (size_t k = 1; k <= 40; k += 9) {
7617         GemmMicrokernelTester()
7618           .mr(3)
7619           .nr(4)
7620           .kr(2)
7621           .sr(1)
7622           .m(3)
7623           .n(n)
7624           .k(k)
7625           .ks(3)
7626           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7627       }
7628     }
7629   }
7630 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)7631   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
7632     TEST_REQUIRES_X86_XOP;
7633     for (size_t k = 1; k <= 40; k += 9) {
7634       for (uint32_t n = 1; n <= 4; n++) {
7635         for (uint32_t m = 1; m <= 3; m++) {
7636           GemmMicrokernelTester()
7637             .mr(3)
7638             .nr(4)
7639             .kr(2)
7640             .sr(1)
7641             .m(m)
7642             .n(n)
7643             .k(k)
7644             .cm_stride(7)
7645             .iterations(1)
7646             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7647         }
7648       }
7649     }
7650   }
7651 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,a_offset)7652   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
7653     TEST_REQUIRES_X86_XOP;
7654     for (size_t k = 1; k <= 40; k += 9) {
7655       GemmMicrokernelTester()
7656         .mr(3)
7657         .nr(4)
7658         .kr(2)
7659         .sr(1)
7660         .m(3)
7661         .n(4)
7662         .k(k)
7663         .ks(3)
7664         .a_offset(127)
7665         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7666     }
7667   }
7668 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,zero)7669   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
7670     TEST_REQUIRES_X86_XOP;
7671     for (size_t k = 1; k <= 40; k += 9) {
7672       for (uint32_t mz = 0; mz < 3; mz++) {
7673         GemmMicrokernelTester()
7674           .mr(3)
7675           .nr(4)
7676           .kr(2)
7677           .sr(1)
7678           .m(3)
7679           .n(4)
7680           .k(k)
7681           .ks(3)
7682           .a_offset(127)
7683           .zero_index(mz)
7684           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7685       }
7686     }
7687   }
7688 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)7689   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
7690     TEST_REQUIRES_X86_XOP;
7691     GemmMicrokernelTester()
7692       .mr(3)
7693       .nr(4)
7694       .kr(2)
7695       .sr(1)
7696       .m(3)
7697       .n(4)
7698       .k(8)
7699       .qmin(128)
7700       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7701   }
7702 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)7703   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
7704     TEST_REQUIRES_X86_XOP;
7705     GemmMicrokernelTester()
7706       .mr(3)
7707       .nr(4)
7708       .kr(2)
7709       .sr(1)
7710       .m(3)
7711       .n(4)
7712       .k(8)
7713       .qmax(128)
7714       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7715   }
7716 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)7717   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
7718     TEST_REQUIRES_X86_XOP;
7719     GemmMicrokernelTester()
7720       .mr(3)
7721       .nr(4)
7722       .kr(2)
7723       .sr(1)
7724       .m(3)
7725       .n(4)
7726       .k(8)
7727       .cm_stride(7)
7728       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7729   }
7730 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_a_zero_point)7731   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
7732     TEST_REQUIRES_X86_XOP;
7733     for (size_t k = 1; k <= 40; k += 9) {
7734       GemmMicrokernelTester()
7735         .mr(3)
7736         .nr(4)
7737         .kr(2)
7738         .sr(1)
7739         .m(3)
7740         .n(4)
7741         .k(k)
7742         .a_zero_point(0)
7743         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7744     }
7745   }
7746 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_b_zero_point)7747   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
7748     TEST_REQUIRES_X86_XOP;
7749     for (size_t k = 1; k <= 40; k += 9) {
7750       GemmMicrokernelTester()
7751         .mr(3)
7752         .nr(4)
7753         .kr(2)
7754         .sr(1)
7755         .m(3)
7756         .n(4)
7757         .k(k)
7758         .b_zero_point(0)
7759         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7760     }
7761   }
7762 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,no_zero_point)7763   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
7764     TEST_REQUIRES_X86_XOP;
7765     for (size_t k = 1; k <= 40; k += 9) {
7766       GemmMicrokernelTester()
7767         .mr(3)
7768         .nr(4)
7769         .kr(2)
7770         .sr(1)
7771         .m(3)
7772         .n(4)
7773         .k(k)
7774         .a_zero_point(0)
7775         .b_zero_point(0)
7776         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7777     }
7778   }
7779 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7780 
7781 
7782 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)7783   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
7784     TEST_REQUIRES_X86_AVX;
7785     GemmMicrokernelTester()
7786       .mr(4)
7787       .nr(4)
7788       .kr(2)
7789       .sr(1)
7790       .m(4)
7791       .n(4)
7792       .k(8)
7793       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7794   }
7795 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)7796   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
7797     TEST_REQUIRES_X86_AVX;
7798     GemmMicrokernelTester()
7799       .mr(4)
7800       .nr(4)
7801       .kr(2)
7802       .sr(1)
7803       .m(4)
7804       .n(4)
7805       .k(8)
7806       .cn_stride(7)
7807       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7808   }
7809 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)7810   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
7811     TEST_REQUIRES_X86_AVX;
7812     for (uint32_t n = 1; n <= 4; n++) {
7813       for (uint32_t m = 1; m <= 4; m++) {
7814         GemmMicrokernelTester()
7815           .mr(4)
7816           .nr(4)
7817           .kr(2)
7818           .sr(1)
7819           .m(m)
7820           .n(n)
7821           .k(8)
7822           .iterations(1)
7823           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7824       }
7825     }
7826   }
7827 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)7828   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
7829     TEST_REQUIRES_X86_AVX;
7830     for (uint32_t m = 1; m <= 4; m++) {
7831       GemmMicrokernelTester()
7832         .mr(4)
7833         .nr(4)
7834         .kr(2)
7835         .sr(1)
7836         .m(m)
7837         .n(4)
7838         .k(8)
7839         .iterations(1)
7840         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7841     }
7842   }
7843 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)7844   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
7845     TEST_REQUIRES_X86_AVX;
7846     for (uint32_t n = 1; n <= 4; n++) {
7847       GemmMicrokernelTester()
7848         .mr(4)
7849         .nr(4)
7850         .kr(2)
7851         .sr(1)
7852         .m(4)
7853         .n(n)
7854         .k(8)
7855         .iterations(1)
7856         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7857     }
7858   }
7859 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)7860   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
7861     TEST_REQUIRES_X86_AVX;
7862     for (size_t k = 1; k < 8; k++) {
7863       GemmMicrokernelTester()
7864         .mr(4)
7865         .nr(4)
7866         .kr(2)
7867         .sr(1)
7868         .m(4)
7869         .n(4)
7870         .k(k)
7871         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7872     }
7873   }
7874 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)7875   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
7876     TEST_REQUIRES_X86_AVX;
7877     for (size_t k = 1; k < 8; k++) {
7878       for (uint32_t n = 1; n <= 4; n++) {
7879         for (uint32_t m = 1; m <= 4; m++) {
7880           GemmMicrokernelTester()
7881             .mr(4)
7882             .nr(4)
7883             .kr(2)
7884             .sr(1)
7885             .m(m)
7886             .n(n)
7887             .k(k)
7888             .iterations(1)
7889             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7890         }
7891       }
7892     }
7893   }
7894 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)7895   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
7896     TEST_REQUIRES_X86_AVX;
7897     for (size_t k = 9; k < 16; k++) {
7898       GemmMicrokernelTester()
7899         .mr(4)
7900         .nr(4)
7901         .kr(2)
7902         .sr(1)
7903         .m(4)
7904         .n(4)
7905         .k(k)
7906         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7907     }
7908   }
7909 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)7910   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
7911     TEST_REQUIRES_X86_AVX;
7912     for (size_t k = 9; k < 16; k++) {
7913       for (uint32_t n = 1; n <= 4; n++) {
7914         for (uint32_t m = 1; m <= 4; m++) {
7915           GemmMicrokernelTester()
7916             .mr(4)
7917             .nr(4)
7918             .kr(2)
7919             .sr(1)
7920             .m(m)
7921             .n(n)
7922             .k(k)
7923             .iterations(1)
7924             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7925         }
7926       }
7927     }
7928   }
7929 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)7930   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
7931     TEST_REQUIRES_X86_AVX;
7932     for (size_t k = 16; k <= 80; k += 8) {
7933       GemmMicrokernelTester()
7934         .mr(4)
7935         .nr(4)
7936         .kr(2)
7937         .sr(1)
7938         .m(4)
7939         .n(4)
7940         .k(k)
7941         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7942     }
7943   }
7944 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)7945   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
7946     TEST_REQUIRES_X86_AVX;
7947     for (size_t k = 16; k <= 80; k += 8) {
7948       for (uint32_t n = 1; n <= 4; n++) {
7949         for (uint32_t m = 1; m <= 4; m++) {
7950           GemmMicrokernelTester()
7951             .mr(4)
7952             .nr(4)
7953             .kr(2)
7954             .sr(1)
7955             .m(m)
7956             .n(n)
7957             .k(k)
7958             .iterations(1)
7959             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7960         }
7961       }
7962     }
7963   }
7964 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)7965   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
7966     TEST_REQUIRES_X86_AVX;
7967     for (uint32_t n = 5; n < 8; n++) {
7968       for (size_t k = 1; k <= 40; k += 9) {
7969         GemmMicrokernelTester()
7970           .mr(4)
7971           .nr(4)
7972           .kr(2)
7973           .sr(1)
7974           .m(4)
7975           .n(n)
7976           .k(k)
7977           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7978       }
7979     }
7980   }
7981 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)7982   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
7983     TEST_REQUIRES_X86_AVX;
7984     for (uint32_t n = 5; n < 8; n++) {
7985       for (size_t k = 1; k <= 40; k += 9) {
7986         GemmMicrokernelTester()
7987           .mr(4)
7988           .nr(4)
7989           .kr(2)
7990           .sr(1)
7991           .m(4)
7992           .n(n)
7993           .k(k)
7994           .cn_stride(7)
7995           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7996       }
7997     }
7998   }
7999 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)8000   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
8001     TEST_REQUIRES_X86_AVX;
8002     for (uint32_t n = 5; n < 8; n++) {
8003       for (size_t k = 1; k <= 40; k += 9) {
8004         for (uint32_t m = 1; m <= 4; m++) {
8005           GemmMicrokernelTester()
8006             .mr(4)
8007             .nr(4)
8008             .kr(2)
8009             .sr(1)
8010             .m(m)
8011             .n(n)
8012             .k(k)
8013             .iterations(1)
8014             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8015         }
8016       }
8017     }
8018   }
8019 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)8020   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
8021     TEST_REQUIRES_X86_AVX;
8022     for (uint32_t n = 8; n <= 12; n += 4) {
8023       for (size_t k = 1; k <= 40; k += 9) {
8024         GemmMicrokernelTester()
8025           .mr(4)
8026           .nr(4)
8027           .kr(2)
8028           .sr(1)
8029           .m(4)
8030           .n(n)
8031           .k(k)
8032           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8033       }
8034     }
8035   }
8036 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)8037   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
8038     TEST_REQUIRES_X86_AVX;
8039     for (uint32_t n = 8; n <= 12; n += 4) {
8040       for (size_t k = 1; k <= 40; k += 9) {
8041         GemmMicrokernelTester()
8042           .mr(4)
8043           .nr(4)
8044           .kr(2)
8045           .sr(1)
8046           .m(4)
8047           .n(n)
8048           .k(k)
8049           .cn_stride(7)
8050           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8051       }
8052     }
8053   }
8054 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)8055   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
8056     TEST_REQUIRES_X86_AVX;
8057     for (uint32_t n = 8; n <= 12; n += 4) {
8058       for (size_t k = 1; k <= 40; k += 9) {
8059         for (uint32_t m = 1; m <= 4; m++) {
8060           GemmMicrokernelTester()
8061             .mr(4)
8062             .nr(4)
8063             .kr(2)
8064             .sr(1)
8065             .m(m)
8066             .n(n)
8067             .k(k)
8068             .iterations(1)
8069             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8070         }
8071       }
8072     }
8073   }
8074 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel)8075   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
8076     TEST_REQUIRES_X86_AVX;
8077     for (size_t k = 1; k <= 40; k += 9) {
8078       GemmMicrokernelTester()
8079         .mr(4)
8080         .nr(4)
8081         .kr(2)
8082         .sr(1)
8083         .m(4)
8084         .n(4)
8085         .k(k)
8086         .ks(3)
8087         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8088     }
8089   }
8090 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel_subtile)8091   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
8092     TEST_REQUIRES_X86_AVX;
8093     for (size_t k = 1; k <= 40; k += 9) {
8094       for (uint32_t n = 1; n <= 4; n++) {
8095         for (uint32_t m = 1; m <= 4; m++) {
8096           GemmMicrokernelTester()
8097             .mr(4)
8098             .nr(4)
8099             .kr(2)
8100             .sr(1)
8101             .m(m)
8102             .n(n)
8103             .k(k)
8104             .ks(3)
8105             .iterations(1)
8106             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8107         }
8108       }
8109     }
8110   }
8111 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_small_kernel)8112   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
8113     TEST_REQUIRES_X86_AVX;
8114     for (uint32_t n = 5; n < 8; n++) {
8115       for (size_t k = 1; k <= 40; k += 9) {
8116         GemmMicrokernelTester()
8117           .mr(4)
8118           .nr(4)
8119           .kr(2)
8120           .sr(1)
8121           .m(4)
8122           .n(n)
8123           .k(k)
8124           .ks(3)
8125           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8126       }
8127     }
8128   }
8129 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_small_kernel)8130   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
8131     TEST_REQUIRES_X86_AVX;
8132     for (uint32_t n = 8; n <= 12; n += 4) {
8133       for (size_t k = 1; k <= 40; k += 9) {
8134         GemmMicrokernelTester()
8135           .mr(4)
8136           .nr(4)
8137           .kr(2)
8138           .sr(1)
8139           .m(4)
8140           .n(n)
8141           .k(k)
8142           .ks(3)
8143           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8144       }
8145     }
8146   }
8147 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)8148   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
8149     TEST_REQUIRES_X86_AVX;
8150     for (size_t k = 1; k <= 40; k += 9) {
8151       for (uint32_t n = 1; n <= 4; n++) {
8152         for (uint32_t m = 1; m <= 4; m++) {
8153           GemmMicrokernelTester()
8154             .mr(4)
8155             .nr(4)
8156             .kr(2)
8157             .sr(1)
8158             .m(m)
8159             .n(n)
8160             .k(k)
8161             .cm_stride(7)
8162             .iterations(1)
8163             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8164         }
8165       }
8166     }
8167   }
8168 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,a_offset)8169   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
8170     TEST_REQUIRES_X86_AVX;
8171     for (size_t k = 1; k <= 40; k += 9) {
8172       GemmMicrokernelTester()
8173         .mr(4)
8174         .nr(4)
8175         .kr(2)
8176         .sr(1)
8177         .m(4)
8178         .n(4)
8179         .k(k)
8180         .ks(3)
8181         .a_offset(163)
8182         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8183     }
8184   }
8185 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,zero)8186   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
8187     TEST_REQUIRES_X86_AVX;
8188     for (size_t k = 1; k <= 40; k += 9) {
8189       for (uint32_t mz = 0; mz < 4; mz++) {
8190         GemmMicrokernelTester()
8191           .mr(4)
8192           .nr(4)
8193           .kr(2)
8194           .sr(1)
8195           .m(4)
8196           .n(4)
8197           .k(k)
8198           .ks(3)
8199           .a_offset(163)
8200           .zero_index(mz)
8201           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8202       }
8203     }
8204   }
8205 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)8206   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
8207     TEST_REQUIRES_X86_AVX;
8208     GemmMicrokernelTester()
8209       .mr(4)
8210       .nr(4)
8211       .kr(2)
8212       .sr(1)
8213       .m(4)
8214       .n(4)
8215       .k(8)
8216       .qmin(128)
8217       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8218   }
8219 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)8220   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
8221     TEST_REQUIRES_X86_AVX;
8222     GemmMicrokernelTester()
8223       .mr(4)
8224       .nr(4)
8225       .kr(2)
8226       .sr(1)
8227       .m(4)
8228       .n(4)
8229       .k(8)
8230       .qmax(128)
8231       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8232   }
8233 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)8234   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
8235     TEST_REQUIRES_X86_AVX;
8236     GemmMicrokernelTester()
8237       .mr(4)
8238       .nr(4)
8239       .kr(2)
8240       .sr(1)
8241       .m(4)
8242       .n(4)
8243       .k(8)
8244       .cm_stride(7)
8245       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8246   }
8247 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_a_zero_point)8248   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
8249     TEST_REQUIRES_X86_AVX;
8250     for (size_t k = 1; k <= 40; k += 9) {
8251       GemmMicrokernelTester()
8252         .mr(4)
8253         .nr(4)
8254         .kr(2)
8255         .sr(1)
8256         .m(4)
8257         .n(4)
8258         .k(k)
8259         .a_zero_point(0)
8260         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8261     }
8262   }
8263 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_b_zero_point)8264   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
8265     TEST_REQUIRES_X86_AVX;
8266     for (size_t k = 1; k <= 40; k += 9) {
8267       GemmMicrokernelTester()
8268         .mr(4)
8269         .nr(4)
8270         .kr(2)
8271         .sr(1)
8272         .m(4)
8273         .n(4)
8274         .k(k)
8275         .b_zero_point(0)
8276         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8277     }
8278   }
8279 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,no_zero_point)8280   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
8281     TEST_REQUIRES_X86_AVX;
8282     for (size_t k = 1; k <= 40; k += 9) {
8283       GemmMicrokernelTester()
8284         .mr(4)
8285         .nr(4)
8286         .kr(2)
8287         .sr(1)
8288         .m(4)
8289         .n(4)
8290         .k(k)
8291         .a_zero_point(0)
8292         .b_zero_point(0)
8293         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8294     }
8295   }
8296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8297 
8298 
8299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)8300   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
8301     TEST_REQUIRES_X86_XOP;
8302     GemmMicrokernelTester()
8303       .mr(4)
8304       .nr(4)
8305       .kr(2)
8306       .sr(1)
8307       .m(4)
8308       .n(4)
8309       .k(8)
8310       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8311   }
8312 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)8313   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
8314     TEST_REQUIRES_X86_XOP;
8315     GemmMicrokernelTester()
8316       .mr(4)
8317       .nr(4)
8318       .kr(2)
8319       .sr(1)
8320       .m(4)
8321       .n(4)
8322       .k(8)
8323       .cn_stride(7)
8324       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8325   }
8326 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)8327   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
8328     TEST_REQUIRES_X86_XOP;
8329     for (uint32_t n = 1; n <= 4; n++) {
8330       for (uint32_t m = 1; m <= 4; m++) {
8331         GemmMicrokernelTester()
8332           .mr(4)
8333           .nr(4)
8334           .kr(2)
8335           .sr(1)
8336           .m(m)
8337           .n(n)
8338           .k(8)
8339           .iterations(1)
8340           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8341       }
8342     }
8343   }
8344 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)8345   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
8346     TEST_REQUIRES_X86_XOP;
8347     for (uint32_t m = 1; m <= 4; m++) {
8348       GemmMicrokernelTester()
8349         .mr(4)
8350         .nr(4)
8351         .kr(2)
8352         .sr(1)
8353         .m(m)
8354         .n(4)
8355         .k(8)
8356         .iterations(1)
8357         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8358     }
8359   }
8360 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)8361   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
8362     TEST_REQUIRES_X86_XOP;
8363     for (uint32_t n = 1; n <= 4; n++) {
8364       GemmMicrokernelTester()
8365         .mr(4)
8366         .nr(4)
8367         .kr(2)
8368         .sr(1)
8369         .m(4)
8370         .n(n)
8371         .k(8)
8372         .iterations(1)
8373         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8374     }
8375   }
8376 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)8377   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
8378     TEST_REQUIRES_X86_XOP;
8379     for (size_t k = 1; k < 8; k++) {
8380       GemmMicrokernelTester()
8381         .mr(4)
8382         .nr(4)
8383         .kr(2)
8384         .sr(1)
8385         .m(4)
8386         .n(4)
8387         .k(k)
8388         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8389     }
8390   }
8391 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)8392   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
8393     TEST_REQUIRES_X86_XOP;
8394     for (size_t k = 1; k < 8; k++) {
8395       for (uint32_t n = 1; n <= 4; n++) {
8396         for (uint32_t m = 1; m <= 4; m++) {
8397           GemmMicrokernelTester()
8398             .mr(4)
8399             .nr(4)
8400             .kr(2)
8401             .sr(1)
8402             .m(m)
8403             .n(n)
8404             .k(k)
8405             .iterations(1)
8406             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8407         }
8408       }
8409     }
8410   }
8411 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)8412   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
8413     TEST_REQUIRES_X86_XOP;
8414     for (size_t k = 9; k < 16; k++) {
8415       GemmMicrokernelTester()
8416         .mr(4)
8417         .nr(4)
8418         .kr(2)
8419         .sr(1)
8420         .m(4)
8421         .n(4)
8422         .k(k)
8423         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8424     }
8425   }
8426 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)8427   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
8428     TEST_REQUIRES_X86_XOP;
8429     for (size_t k = 9; k < 16; k++) {
8430       for (uint32_t n = 1; n <= 4; n++) {
8431         for (uint32_t m = 1; m <= 4; m++) {
8432           GemmMicrokernelTester()
8433             .mr(4)
8434             .nr(4)
8435             .kr(2)
8436             .sr(1)
8437             .m(m)
8438             .n(n)
8439             .k(k)
8440             .iterations(1)
8441             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8442         }
8443       }
8444     }
8445   }
8446 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)8447   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
8448     TEST_REQUIRES_X86_XOP;
8449     for (size_t k = 16; k <= 80; k += 8) {
8450       GemmMicrokernelTester()
8451         .mr(4)
8452         .nr(4)
8453         .kr(2)
8454         .sr(1)
8455         .m(4)
8456         .n(4)
8457         .k(k)
8458         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8459     }
8460   }
8461 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)8462   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
8463     TEST_REQUIRES_X86_XOP;
8464     for (size_t k = 16; k <= 80; k += 8) {
8465       for (uint32_t n = 1; n <= 4; n++) {
8466         for (uint32_t m = 1; m <= 4; m++) {
8467           GemmMicrokernelTester()
8468             .mr(4)
8469             .nr(4)
8470             .kr(2)
8471             .sr(1)
8472             .m(m)
8473             .n(n)
8474             .k(k)
8475             .iterations(1)
8476             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8477         }
8478       }
8479     }
8480   }
8481 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)8482   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
8483     TEST_REQUIRES_X86_XOP;
8484     for (uint32_t n = 5; n < 8; n++) {
8485       for (size_t k = 1; k <= 40; k += 9) {
8486         GemmMicrokernelTester()
8487           .mr(4)
8488           .nr(4)
8489           .kr(2)
8490           .sr(1)
8491           .m(4)
8492           .n(n)
8493           .k(k)
8494           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8495       }
8496     }
8497   }
8498 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)8499   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
8500     TEST_REQUIRES_X86_XOP;
8501     for (uint32_t n = 5; n < 8; n++) {
8502       for (size_t k = 1; k <= 40; k += 9) {
8503         GemmMicrokernelTester()
8504           .mr(4)
8505           .nr(4)
8506           .kr(2)
8507           .sr(1)
8508           .m(4)
8509           .n(n)
8510           .k(k)
8511           .cn_stride(7)
8512           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8513       }
8514     }
8515   }
8516 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)8517   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
8518     TEST_REQUIRES_X86_XOP;
8519     for (uint32_t n = 5; n < 8; n++) {
8520       for (size_t k = 1; k <= 40; k += 9) {
8521         for (uint32_t m = 1; m <= 4; m++) {
8522           GemmMicrokernelTester()
8523             .mr(4)
8524             .nr(4)
8525             .kr(2)
8526             .sr(1)
8527             .m(m)
8528             .n(n)
8529             .k(k)
8530             .iterations(1)
8531             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8532         }
8533       }
8534     }
8535   }
8536 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)8537   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
8538     TEST_REQUIRES_X86_XOP;
8539     for (uint32_t n = 8; n <= 12; n += 4) {
8540       for (size_t k = 1; k <= 40; k += 9) {
8541         GemmMicrokernelTester()
8542           .mr(4)
8543           .nr(4)
8544           .kr(2)
8545           .sr(1)
8546           .m(4)
8547           .n(n)
8548           .k(k)
8549           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8550       }
8551     }
8552   }
8553 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)8554   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
8555     TEST_REQUIRES_X86_XOP;
8556     for (uint32_t n = 8; n <= 12; n += 4) {
8557       for (size_t k = 1; k <= 40; k += 9) {
8558         GemmMicrokernelTester()
8559           .mr(4)
8560           .nr(4)
8561           .kr(2)
8562           .sr(1)
8563           .m(4)
8564           .n(n)
8565           .k(k)
8566           .cn_stride(7)
8567           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8568       }
8569     }
8570   }
8571 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)8572   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
8573     TEST_REQUIRES_X86_XOP;
8574     for (uint32_t n = 8; n <= 12; n += 4) {
8575       for (size_t k = 1; k <= 40; k += 9) {
8576         for (uint32_t m = 1; m <= 4; m++) {
8577           GemmMicrokernelTester()
8578             .mr(4)
8579             .nr(4)
8580             .kr(2)
8581             .sr(1)
8582             .m(m)
8583             .n(n)
8584             .k(k)
8585             .iterations(1)
8586             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8587         }
8588       }
8589     }
8590   }
8591 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel)8592   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
8593     TEST_REQUIRES_X86_XOP;
8594     for (size_t k = 1; k <= 40; k += 9) {
8595       GemmMicrokernelTester()
8596         .mr(4)
8597         .nr(4)
8598         .kr(2)
8599         .sr(1)
8600         .m(4)
8601         .n(4)
8602         .k(k)
8603         .ks(3)
8604         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8605     }
8606   }
8607 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel_subtile)8608   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
8609     TEST_REQUIRES_X86_XOP;
8610     for (size_t k = 1; k <= 40; k += 9) {
8611       for (uint32_t n = 1; n <= 4; n++) {
8612         for (uint32_t m = 1; m <= 4; m++) {
8613           GemmMicrokernelTester()
8614             .mr(4)
8615             .nr(4)
8616             .kr(2)
8617             .sr(1)
8618             .m(m)
8619             .n(n)
8620             .k(k)
8621             .ks(3)
8622             .iterations(1)
8623             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8624         }
8625       }
8626     }
8627   }
8628 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_small_kernel)8629   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
8630     TEST_REQUIRES_X86_XOP;
8631     for (uint32_t n = 5; n < 8; n++) {
8632       for (size_t k = 1; k <= 40; k += 9) {
8633         GemmMicrokernelTester()
8634           .mr(4)
8635           .nr(4)
8636           .kr(2)
8637           .sr(1)
8638           .m(4)
8639           .n(n)
8640           .k(k)
8641           .ks(3)
8642           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8643       }
8644     }
8645   }
8646 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_small_kernel)8647   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
8648     TEST_REQUIRES_X86_XOP;
8649     for (uint32_t n = 8; n <= 12; n += 4) {
8650       for (size_t k = 1; k <= 40; k += 9) {
8651         GemmMicrokernelTester()
8652           .mr(4)
8653           .nr(4)
8654           .kr(2)
8655           .sr(1)
8656           .m(4)
8657           .n(n)
8658           .k(k)
8659           .ks(3)
8660           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8661       }
8662     }
8663   }
8664 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)8665   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
8666     TEST_REQUIRES_X86_XOP;
8667     for (size_t k = 1; k <= 40; k += 9) {
8668       for (uint32_t n = 1; n <= 4; n++) {
8669         for (uint32_t m = 1; m <= 4; m++) {
8670           GemmMicrokernelTester()
8671             .mr(4)
8672             .nr(4)
8673             .kr(2)
8674             .sr(1)
8675             .m(m)
8676             .n(n)
8677             .k(k)
8678             .cm_stride(7)
8679             .iterations(1)
8680             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8681         }
8682       }
8683     }
8684   }
8685 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,a_offset)8686   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
8687     TEST_REQUIRES_X86_XOP;
8688     for (size_t k = 1; k <= 40; k += 9) {
8689       GemmMicrokernelTester()
8690         .mr(4)
8691         .nr(4)
8692         .kr(2)
8693         .sr(1)
8694         .m(4)
8695         .n(4)
8696         .k(k)
8697         .ks(3)
8698         .a_offset(163)
8699         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8700     }
8701   }
8702 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,zero)8703   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
8704     TEST_REQUIRES_X86_XOP;
8705     for (size_t k = 1; k <= 40; k += 9) {
8706       for (uint32_t mz = 0; mz < 4; mz++) {
8707         GemmMicrokernelTester()
8708           .mr(4)
8709           .nr(4)
8710           .kr(2)
8711           .sr(1)
8712           .m(4)
8713           .n(4)
8714           .k(k)
8715           .ks(3)
8716           .a_offset(163)
8717           .zero_index(mz)
8718           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8719       }
8720     }
8721   }
8722 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)8723   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
8724     TEST_REQUIRES_X86_XOP;
8725     GemmMicrokernelTester()
8726       .mr(4)
8727       .nr(4)
8728       .kr(2)
8729       .sr(1)
8730       .m(4)
8731       .n(4)
8732       .k(8)
8733       .qmin(128)
8734       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8735   }
8736 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)8737   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
8738     TEST_REQUIRES_X86_XOP;
8739     GemmMicrokernelTester()
8740       .mr(4)
8741       .nr(4)
8742       .kr(2)
8743       .sr(1)
8744       .m(4)
8745       .n(4)
8746       .k(8)
8747       .qmax(128)
8748       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8749   }
8750 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)8751   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
8752     TEST_REQUIRES_X86_XOP;
8753     GemmMicrokernelTester()
8754       .mr(4)
8755       .nr(4)
8756       .kr(2)
8757       .sr(1)
8758       .m(4)
8759       .n(4)
8760       .k(8)
8761       .cm_stride(7)
8762       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8763   }
8764 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_a_zero_point)8765   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
8766     TEST_REQUIRES_X86_XOP;
8767     for (size_t k = 1; k <= 40; k += 9) {
8768       GemmMicrokernelTester()
8769         .mr(4)
8770         .nr(4)
8771         .kr(2)
8772         .sr(1)
8773         .m(4)
8774         .n(4)
8775         .k(k)
8776         .a_zero_point(0)
8777         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8778     }
8779   }
8780 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_b_zero_point)8781   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
8782     TEST_REQUIRES_X86_XOP;
8783     for (size_t k = 1; k <= 40; k += 9) {
8784       GemmMicrokernelTester()
8785         .mr(4)
8786         .nr(4)
8787         .kr(2)
8788         .sr(1)
8789         .m(4)
8790         .n(4)
8791         .k(k)
8792         .b_zero_point(0)
8793         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8794     }
8795   }
8796 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,no_zero_point)8797   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
8798     TEST_REQUIRES_X86_XOP;
8799     for (size_t k = 1; k <= 40; k += 9) {
8800       GemmMicrokernelTester()
8801         .mr(4)
8802         .nr(4)
8803         .kr(2)
8804         .sr(1)
8805         .m(4)
8806         .n(4)
8807         .k(k)
8808         .a_zero_point(0)
8809         .b_zero_point(0)
8810         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8811     }
8812   }
8813 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8814 
8815 
8816 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)8817   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
8818     TEST_REQUIRES_X86_SSE2;
8819     GemmMicrokernelTester()
8820       .mr(3)
8821       .nr(4)
8822       .kr(2)
8823       .sr(1)
8824       .m(3)
8825       .n(4)
8826       .k(8)
8827       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8828   }
8829 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)8830   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
8831     TEST_REQUIRES_X86_SSE2;
8832     GemmMicrokernelTester()
8833       .mr(3)
8834       .nr(4)
8835       .kr(2)
8836       .sr(1)
8837       .m(3)
8838       .n(4)
8839       .k(8)
8840       .cn_stride(7)
8841       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8842   }
8843 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)8844   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
8845     TEST_REQUIRES_X86_SSE2;
8846     for (uint32_t n = 1; n <= 4; n++) {
8847       for (uint32_t m = 1; m <= 3; m++) {
8848         GemmMicrokernelTester()
8849           .mr(3)
8850           .nr(4)
8851           .kr(2)
8852           .sr(1)
8853           .m(m)
8854           .n(n)
8855           .k(8)
8856           .iterations(1)
8857           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8858       }
8859     }
8860   }
8861 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)8862   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
8863     TEST_REQUIRES_X86_SSE2;
8864     for (uint32_t m = 1; m <= 3; m++) {
8865       GemmMicrokernelTester()
8866         .mr(3)
8867         .nr(4)
8868         .kr(2)
8869         .sr(1)
8870         .m(m)
8871         .n(4)
8872         .k(8)
8873         .iterations(1)
8874         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8875     }
8876   }
8877 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)8878   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
8879     TEST_REQUIRES_X86_SSE2;
8880     for (uint32_t n = 1; n <= 4; n++) {
8881       GemmMicrokernelTester()
8882         .mr(3)
8883         .nr(4)
8884         .kr(2)
8885         .sr(1)
8886         .m(3)
8887         .n(n)
8888         .k(8)
8889         .iterations(1)
8890         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8891     }
8892   }
8893 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)8894   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
8895     TEST_REQUIRES_X86_SSE2;
8896     for (size_t k = 1; k < 8; k++) {
8897       GemmMicrokernelTester()
8898         .mr(3)
8899         .nr(4)
8900         .kr(2)
8901         .sr(1)
8902         .m(3)
8903         .n(4)
8904         .k(k)
8905         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8906     }
8907   }
8908 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)8909   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
8910     TEST_REQUIRES_X86_SSE2;
8911     for (size_t k = 1; k < 8; k++) {
8912       for (uint32_t n = 1; n <= 4; n++) {
8913         for (uint32_t m = 1; m <= 3; m++) {
8914           GemmMicrokernelTester()
8915             .mr(3)
8916             .nr(4)
8917             .kr(2)
8918             .sr(1)
8919             .m(m)
8920             .n(n)
8921             .k(k)
8922             .iterations(1)
8923             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8924         }
8925       }
8926     }
8927   }
8928 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)8929   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
8930     TEST_REQUIRES_X86_SSE2;
8931     for (size_t k = 9; k < 16; k++) {
8932       GemmMicrokernelTester()
8933         .mr(3)
8934         .nr(4)
8935         .kr(2)
8936         .sr(1)
8937         .m(3)
8938         .n(4)
8939         .k(k)
8940         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8941     }
8942   }
8943 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)8944   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
8945     TEST_REQUIRES_X86_SSE2;
8946     for (size_t k = 9; k < 16; k++) {
8947       for (uint32_t n = 1; n <= 4; n++) {
8948         for (uint32_t m = 1; m <= 3; m++) {
8949           GemmMicrokernelTester()
8950             .mr(3)
8951             .nr(4)
8952             .kr(2)
8953             .sr(1)
8954             .m(m)
8955             .n(n)
8956             .k(k)
8957             .iterations(1)
8958             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8959         }
8960       }
8961     }
8962   }
8963 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)8964   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
8965     TEST_REQUIRES_X86_SSE2;
8966     for (size_t k = 16; k <= 80; k += 8) {
8967       GemmMicrokernelTester()
8968         .mr(3)
8969         .nr(4)
8970         .kr(2)
8971         .sr(1)
8972         .m(3)
8973         .n(4)
8974         .k(k)
8975         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8976     }
8977   }
8978 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)8979   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
8980     TEST_REQUIRES_X86_SSE2;
8981     for (size_t k = 16; k <= 80; k += 8) {
8982       for (uint32_t n = 1; n <= 4; n++) {
8983         for (uint32_t m = 1; m <= 3; m++) {
8984           GemmMicrokernelTester()
8985             .mr(3)
8986             .nr(4)
8987             .kr(2)
8988             .sr(1)
8989             .m(m)
8990             .n(n)
8991             .k(k)
8992             .iterations(1)
8993             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8994         }
8995       }
8996     }
8997   }
8998 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)8999   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
9000     TEST_REQUIRES_X86_SSE2;
9001     for (uint32_t n = 5; n < 8; n++) {
9002       for (size_t k = 1; k <= 40; k += 9) {
9003         GemmMicrokernelTester()
9004           .mr(3)
9005           .nr(4)
9006           .kr(2)
9007           .sr(1)
9008           .m(3)
9009           .n(n)
9010           .k(k)
9011           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9012       }
9013     }
9014   }
9015 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)9016   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
9017     TEST_REQUIRES_X86_SSE2;
9018     for (uint32_t n = 5; n < 8; n++) {
9019       for (size_t k = 1; k <= 40; k += 9) {
9020         GemmMicrokernelTester()
9021           .mr(3)
9022           .nr(4)
9023           .kr(2)
9024           .sr(1)
9025           .m(3)
9026           .n(n)
9027           .k(k)
9028           .cn_stride(7)
9029           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9030       }
9031     }
9032   }
9033 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)9034   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
9035     TEST_REQUIRES_X86_SSE2;
9036     for (uint32_t n = 5; n < 8; n++) {
9037       for (size_t k = 1; k <= 40; k += 9) {
9038         for (uint32_t m = 1; m <= 3; m++) {
9039           GemmMicrokernelTester()
9040             .mr(3)
9041             .nr(4)
9042             .kr(2)
9043             .sr(1)
9044             .m(m)
9045             .n(n)
9046             .k(k)
9047             .iterations(1)
9048             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9049         }
9050       }
9051     }
9052   }
9053 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)9054   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
9055     TEST_REQUIRES_X86_SSE2;
9056     for (uint32_t n = 8; n <= 12; n += 4) {
9057       for (size_t k = 1; k <= 40; k += 9) {
9058         GemmMicrokernelTester()
9059           .mr(3)
9060           .nr(4)
9061           .kr(2)
9062           .sr(1)
9063           .m(3)
9064           .n(n)
9065           .k(k)
9066           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9067       }
9068     }
9069   }
9070 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)9071   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
9072     TEST_REQUIRES_X86_SSE2;
9073     for (uint32_t n = 8; n <= 12; n += 4) {
9074       for (size_t k = 1; k <= 40; k += 9) {
9075         GemmMicrokernelTester()
9076           .mr(3)
9077           .nr(4)
9078           .kr(2)
9079           .sr(1)
9080           .m(3)
9081           .n(n)
9082           .k(k)
9083           .cn_stride(7)
9084           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9085       }
9086     }
9087   }
9088 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)9089   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
9090     TEST_REQUIRES_X86_SSE2;
9091     for (uint32_t n = 8; n <= 12; n += 4) {
9092       for (size_t k = 1; k <= 40; k += 9) {
9093         for (uint32_t m = 1; m <= 3; m++) {
9094           GemmMicrokernelTester()
9095             .mr(3)
9096             .nr(4)
9097             .kr(2)
9098             .sr(1)
9099             .m(m)
9100             .n(n)
9101             .k(k)
9102             .iterations(1)
9103             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9104         }
9105       }
9106     }
9107   }
9108 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel)9109   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
9110     TEST_REQUIRES_X86_SSE2;
9111     for (size_t k = 1; k <= 40; k += 9) {
9112       GemmMicrokernelTester()
9113         .mr(3)
9114         .nr(4)
9115         .kr(2)
9116         .sr(1)
9117         .m(3)
9118         .n(4)
9119         .k(k)
9120         .ks(3)
9121         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9122     }
9123   }
9124 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel_subtile)9125   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
9126     TEST_REQUIRES_X86_SSE2;
9127     for (size_t k = 1; k <= 40; k += 9) {
9128       for (uint32_t n = 1; n <= 4; n++) {
9129         for (uint32_t m = 1; m <= 3; m++) {
9130           GemmMicrokernelTester()
9131             .mr(3)
9132             .nr(4)
9133             .kr(2)
9134             .sr(1)
9135             .m(m)
9136             .n(n)
9137             .k(k)
9138             .ks(3)
9139             .iterations(1)
9140             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9141         }
9142       }
9143     }
9144   }
9145 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_small_kernel)9146   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
9147     TEST_REQUIRES_X86_SSE2;
9148     for (uint32_t n = 5; n < 8; n++) {
9149       for (size_t k = 1; k <= 40; k += 9) {
9150         GemmMicrokernelTester()
9151           .mr(3)
9152           .nr(4)
9153           .kr(2)
9154           .sr(1)
9155           .m(3)
9156           .n(n)
9157           .k(k)
9158           .ks(3)
9159           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9160       }
9161     }
9162   }
9163 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_small_kernel)9164   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
9165     TEST_REQUIRES_X86_SSE2;
9166     for (uint32_t n = 8; n <= 12; n += 4) {
9167       for (size_t k = 1; k <= 40; k += 9) {
9168         GemmMicrokernelTester()
9169           .mr(3)
9170           .nr(4)
9171           .kr(2)
9172           .sr(1)
9173           .m(3)
9174           .n(n)
9175           .k(k)
9176           .ks(3)
9177           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9178       }
9179     }
9180   }
9181 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)9182   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
9183     TEST_REQUIRES_X86_SSE2;
9184     for (size_t k = 1; k <= 40; k += 9) {
9185       for (uint32_t n = 1; n <= 4; n++) {
9186         for (uint32_t m = 1; m <= 3; m++) {
9187           GemmMicrokernelTester()
9188             .mr(3)
9189             .nr(4)
9190             .kr(2)
9191             .sr(1)
9192             .m(m)
9193             .n(n)
9194             .k(k)
9195             .cm_stride(7)
9196             .iterations(1)
9197             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9198         }
9199       }
9200     }
9201   }
9202 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,a_offset)9203   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
9204     TEST_REQUIRES_X86_SSE2;
9205     for (size_t k = 1; k <= 40; k += 9) {
9206       GemmMicrokernelTester()
9207         .mr(3)
9208         .nr(4)
9209         .kr(2)
9210         .sr(1)
9211         .m(3)
9212         .n(4)
9213         .k(k)
9214         .ks(3)
9215         .a_offset(127)
9216         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9217     }
9218   }
9219 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,zero)9220   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
9221     TEST_REQUIRES_X86_SSE2;
9222     for (size_t k = 1; k <= 40; k += 9) {
9223       for (uint32_t mz = 0; mz < 3; mz++) {
9224         GemmMicrokernelTester()
9225           .mr(3)
9226           .nr(4)
9227           .kr(2)
9228           .sr(1)
9229           .m(3)
9230           .n(4)
9231           .k(k)
9232           .ks(3)
9233           .a_offset(127)
9234           .zero_index(mz)
9235           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9236       }
9237     }
9238   }
9239 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)9240   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
9241     TEST_REQUIRES_X86_SSE2;
9242     GemmMicrokernelTester()
9243       .mr(3)
9244       .nr(4)
9245       .kr(2)
9246       .sr(1)
9247       .m(3)
9248       .n(4)
9249       .k(8)
9250       .qmin(128)
9251       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9252   }
9253 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)9254   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
9255     TEST_REQUIRES_X86_SSE2;
9256     GemmMicrokernelTester()
9257       .mr(3)
9258       .nr(4)
9259       .kr(2)
9260       .sr(1)
9261       .m(3)
9262       .n(4)
9263       .k(8)
9264       .qmax(128)
9265       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9266   }
9267 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)9268   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
9269     TEST_REQUIRES_X86_SSE2;
9270     GemmMicrokernelTester()
9271       .mr(3)
9272       .nr(4)
9273       .kr(2)
9274       .sr(1)
9275       .m(3)
9276       .n(4)
9277       .k(8)
9278       .cm_stride(7)
9279       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9280   }
9281 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_a_zero_point)9282   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_a_zero_point) {
9283     TEST_REQUIRES_X86_SSE2;
9284     for (size_t k = 1; k <= 40; k += 9) {
9285       GemmMicrokernelTester()
9286         .mr(3)
9287         .nr(4)
9288         .kr(2)
9289         .sr(1)
9290         .m(3)
9291         .n(4)
9292         .k(k)
9293         .a_zero_point(0)
9294         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9295     }
9296   }
9297 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_b_zero_point)9298   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_b_zero_point) {
9299     TEST_REQUIRES_X86_SSE2;
9300     for (size_t k = 1; k <= 40; k += 9) {
9301       GemmMicrokernelTester()
9302         .mr(3)
9303         .nr(4)
9304         .kr(2)
9305         .sr(1)
9306         .m(3)
9307         .n(4)
9308         .k(k)
9309         .b_zero_point(0)
9310         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9311     }
9312   }
9313 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,no_zero_point)9314   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_zero_point) {
9315     TEST_REQUIRES_X86_SSE2;
9316     for (size_t k = 1; k <= 40; k += 9) {
9317       GemmMicrokernelTester()
9318         .mr(3)
9319         .nr(4)
9320         .kr(2)
9321         .sr(1)
9322         .m(3)
9323         .n(4)
9324         .k(k)
9325         .a_zero_point(0)
9326         .b_zero_point(0)
9327         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9328     }
9329   }
9330 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9331 
9332 
9333 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)9334   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
9335     TEST_REQUIRES_X86_SSE41;
9336     GemmMicrokernelTester()
9337       .mr(3)
9338       .nr(4)
9339       .kr(2)
9340       .sr(1)
9341       .m(3)
9342       .n(4)
9343       .k(8)
9344       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9345   }
9346 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)9347   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
9348     TEST_REQUIRES_X86_SSE41;
9349     GemmMicrokernelTester()
9350       .mr(3)
9351       .nr(4)
9352       .kr(2)
9353       .sr(1)
9354       .m(3)
9355       .n(4)
9356       .k(8)
9357       .cn_stride(7)
9358       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9359   }
9360 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)9361   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
9362     TEST_REQUIRES_X86_SSE41;
9363     for (uint32_t n = 1; n <= 4; n++) {
9364       for (uint32_t m = 1; m <= 3; m++) {
9365         GemmMicrokernelTester()
9366           .mr(3)
9367           .nr(4)
9368           .kr(2)
9369           .sr(1)
9370           .m(m)
9371           .n(n)
9372           .k(8)
9373           .iterations(1)
9374           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9375       }
9376     }
9377   }
9378 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)9379   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
9380     TEST_REQUIRES_X86_SSE41;
9381     for (uint32_t m = 1; m <= 3; m++) {
9382       GemmMicrokernelTester()
9383         .mr(3)
9384         .nr(4)
9385         .kr(2)
9386         .sr(1)
9387         .m(m)
9388         .n(4)
9389         .k(8)
9390         .iterations(1)
9391         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9392     }
9393   }
9394 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)9395   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
9396     TEST_REQUIRES_X86_SSE41;
9397     for (uint32_t n = 1; n <= 4; n++) {
9398       GemmMicrokernelTester()
9399         .mr(3)
9400         .nr(4)
9401         .kr(2)
9402         .sr(1)
9403         .m(3)
9404         .n(n)
9405         .k(8)
9406         .iterations(1)
9407         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9408     }
9409   }
9410 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)9411   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
9412     TEST_REQUIRES_X86_SSE41;
9413     for (size_t k = 1; k < 8; k++) {
9414       GemmMicrokernelTester()
9415         .mr(3)
9416         .nr(4)
9417         .kr(2)
9418         .sr(1)
9419         .m(3)
9420         .n(4)
9421         .k(k)
9422         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9423     }
9424   }
9425 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)9426   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
9427     TEST_REQUIRES_X86_SSE41;
9428     for (size_t k = 1; k < 8; k++) {
9429       for (uint32_t n = 1; n <= 4; n++) {
9430         for (uint32_t m = 1; m <= 3; m++) {
9431           GemmMicrokernelTester()
9432             .mr(3)
9433             .nr(4)
9434             .kr(2)
9435             .sr(1)
9436             .m(m)
9437             .n(n)
9438             .k(k)
9439             .iterations(1)
9440             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9441         }
9442       }
9443     }
9444   }
9445 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)9446   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
9447     TEST_REQUIRES_X86_SSE41;
9448     for (size_t k = 9; k < 16; k++) {
9449       GemmMicrokernelTester()
9450         .mr(3)
9451         .nr(4)
9452         .kr(2)
9453         .sr(1)
9454         .m(3)
9455         .n(4)
9456         .k(k)
9457         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9458     }
9459   }
9460 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)9461   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
9462     TEST_REQUIRES_X86_SSE41;
9463     for (size_t k = 9; k < 16; k++) {
9464       for (uint32_t n = 1; n <= 4; n++) {
9465         for (uint32_t m = 1; m <= 3; m++) {
9466           GemmMicrokernelTester()
9467             .mr(3)
9468             .nr(4)
9469             .kr(2)
9470             .sr(1)
9471             .m(m)
9472             .n(n)
9473             .k(k)
9474             .iterations(1)
9475             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9476         }
9477       }
9478     }
9479   }
9480 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)9481   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
9482     TEST_REQUIRES_X86_SSE41;
9483     for (size_t k = 16; k <= 80; k += 8) {
9484       GemmMicrokernelTester()
9485         .mr(3)
9486         .nr(4)
9487         .kr(2)
9488         .sr(1)
9489         .m(3)
9490         .n(4)
9491         .k(k)
9492         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9493     }
9494   }
9495 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)9496   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
9497     TEST_REQUIRES_X86_SSE41;
9498     for (size_t k = 16; k <= 80; k += 8) {
9499       for (uint32_t n = 1; n <= 4; n++) {
9500         for (uint32_t m = 1; m <= 3; m++) {
9501           GemmMicrokernelTester()
9502             .mr(3)
9503             .nr(4)
9504             .kr(2)
9505             .sr(1)
9506             .m(m)
9507             .n(n)
9508             .k(k)
9509             .iterations(1)
9510             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9511         }
9512       }
9513     }
9514   }
9515 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)9516   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
9517     TEST_REQUIRES_X86_SSE41;
9518     for (uint32_t n = 5; n < 8; n++) {
9519       for (size_t k = 1; k <= 40; k += 9) {
9520         GemmMicrokernelTester()
9521           .mr(3)
9522           .nr(4)
9523           .kr(2)
9524           .sr(1)
9525           .m(3)
9526           .n(n)
9527           .k(k)
9528           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9529       }
9530     }
9531   }
9532 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)9533   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9534     TEST_REQUIRES_X86_SSE41;
9535     for (uint32_t n = 5; n < 8; n++) {
9536       for (size_t k = 1; k <= 40; k += 9) {
9537         GemmMicrokernelTester()
9538           .mr(3)
9539           .nr(4)
9540           .kr(2)
9541           .sr(1)
9542           .m(3)
9543           .n(n)
9544           .k(k)
9545           .cn_stride(7)
9546           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9547       }
9548     }
9549   }
9550 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)9551   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
9552     TEST_REQUIRES_X86_SSE41;
9553     for (uint32_t n = 5; n < 8; n++) {
9554       for (size_t k = 1; k <= 40; k += 9) {
9555         for (uint32_t m = 1; m <= 3; m++) {
9556           GemmMicrokernelTester()
9557             .mr(3)
9558             .nr(4)
9559             .kr(2)
9560             .sr(1)
9561             .m(m)
9562             .n(n)
9563             .k(k)
9564             .iterations(1)
9565             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9566         }
9567       }
9568     }
9569   }
9570 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)9571   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
9572     TEST_REQUIRES_X86_SSE41;
9573     for (uint32_t n = 8; n <= 12; n += 4) {
9574       for (size_t k = 1; k <= 40; k += 9) {
9575         GemmMicrokernelTester()
9576           .mr(3)
9577           .nr(4)
9578           .kr(2)
9579           .sr(1)
9580           .m(3)
9581           .n(n)
9582           .k(k)
9583           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9584       }
9585     }
9586   }
9587 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)9588   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
9589     TEST_REQUIRES_X86_SSE41;
9590     for (uint32_t n = 8; n <= 12; n += 4) {
9591       for (size_t k = 1; k <= 40; k += 9) {
9592         GemmMicrokernelTester()
9593           .mr(3)
9594           .nr(4)
9595           .kr(2)
9596           .sr(1)
9597           .m(3)
9598           .n(n)
9599           .k(k)
9600           .cn_stride(7)
9601           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9602       }
9603     }
9604   }
9605 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)9606   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
9607     TEST_REQUIRES_X86_SSE41;
9608     for (uint32_t n = 8; n <= 12; n += 4) {
9609       for (size_t k = 1; k <= 40; k += 9) {
9610         for (uint32_t m = 1; m <= 3; m++) {
9611           GemmMicrokernelTester()
9612             .mr(3)
9613             .nr(4)
9614             .kr(2)
9615             .sr(1)
9616             .m(m)
9617             .n(n)
9618             .k(k)
9619             .iterations(1)
9620             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9621         }
9622       }
9623     }
9624   }
9625 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel)9626   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
9627     TEST_REQUIRES_X86_SSE41;
9628     for (size_t k = 1; k <= 40; k += 9) {
9629       GemmMicrokernelTester()
9630         .mr(3)
9631         .nr(4)
9632         .kr(2)
9633         .sr(1)
9634         .m(3)
9635         .n(4)
9636         .k(k)
9637         .ks(3)
9638         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9639     }
9640   }
9641 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel_subtile)9642   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
9643     TEST_REQUIRES_X86_SSE41;
9644     for (size_t k = 1; k <= 40; k += 9) {
9645       for (uint32_t n = 1; n <= 4; n++) {
9646         for (uint32_t m = 1; m <= 3; m++) {
9647           GemmMicrokernelTester()
9648             .mr(3)
9649             .nr(4)
9650             .kr(2)
9651             .sr(1)
9652             .m(m)
9653             .n(n)
9654             .k(k)
9655             .ks(3)
9656             .iterations(1)
9657             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9658         }
9659       }
9660     }
9661   }
9662 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_small_kernel)9663   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
9664     TEST_REQUIRES_X86_SSE41;
9665     for (uint32_t n = 5; n < 8; n++) {
9666       for (size_t k = 1; k <= 40; k += 9) {
9667         GemmMicrokernelTester()
9668           .mr(3)
9669           .nr(4)
9670           .kr(2)
9671           .sr(1)
9672           .m(3)
9673           .n(n)
9674           .k(k)
9675           .ks(3)
9676           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9677       }
9678     }
9679   }
9680 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_small_kernel)9681   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
9682     TEST_REQUIRES_X86_SSE41;
9683     for (uint32_t n = 8; n <= 12; n += 4) {
9684       for (size_t k = 1; k <= 40; k += 9) {
9685         GemmMicrokernelTester()
9686           .mr(3)
9687           .nr(4)
9688           .kr(2)
9689           .sr(1)
9690           .m(3)
9691           .n(n)
9692           .k(k)
9693           .ks(3)
9694           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9695       }
9696     }
9697   }
9698 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)9699   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
9700     TEST_REQUIRES_X86_SSE41;
9701     for (size_t k = 1; k <= 40; k += 9) {
9702       for (uint32_t n = 1; n <= 4; n++) {
9703         for (uint32_t m = 1; m <= 3; m++) {
9704           GemmMicrokernelTester()
9705             .mr(3)
9706             .nr(4)
9707             .kr(2)
9708             .sr(1)
9709             .m(m)
9710             .n(n)
9711             .k(k)
9712             .cm_stride(7)
9713             .iterations(1)
9714             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9715         }
9716       }
9717     }
9718   }
9719 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,a_offset)9720   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
9721     TEST_REQUIRES_X86_SSE41;
9722     for (size_t k = 1; k <= 40; k += 9) {
9723       GemmMicrokernelTester()
9724         .mr(3)
9725         .nr(4)
9726         .kr(2)
9727         .sr(1)
9728         .m(3)
9729         .n(4)
9730         .k(k)
9731         .ks(3)
9732         .a_offset(127)
9733         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9734     }
9735   }
9736 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,zero)9737   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
9738     TEST_REQUIRES_X86_SSE41;
9739     for (size_t k = 1; k <= 40; k += 9) {
9740       for (uint32_t mz = 0; mz < 3; mz++) {
9741         GemmMicrokernelTester()
9742           .mr(3)
9743           .nr(4)
9744           .kr(2)
9745           .sr(1)
9746           .m(3)
9747           .n(4)
9748           .k(k)
9749           .ks(3)
9750           .a_offset(127)
9751           .zero_index(mz)
9752           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9753       }
9754     }
9755   }
9756 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)9757   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
9758     TEST_REQUIRES_X86_SSE41;
9759     GemmMicrokernelTester()
9760       .mr(3)
9761       .nr(4)
9762       .kr(2)
9763       .sr(1)
9764       .m(3)
9765       .n(4)
9766       .k(8)
9767       .qmin(128)
9768       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9769   }
9770 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)9771   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
9772     TEST_REQUIRES_X86_SSE41;
9773     GemmMicrokernelTester()
9774       .mr(3)
9775       .nr(4)
9776       .kr(2)
9777       .sr(1)
9778       .m(3)
9779       .n(4)
9780       .k(8)
9781       .qmax(128)
9782       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9783   }
9784 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)9785   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
9786     TEST_REQUIRES_X86_SSE41;
9787     GemmMicrokernelTester()
9788       .mr(3)
9789       .nr(4)
9790       .kr(2)
9791       .sr(1)
9792       .m(3)
9793       .n(4)
9794       .k(8)
9795       .cm_stride(7)
9796       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9797   }
9798 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_a_zero_point)9799   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
9800     TEST_REQUIRES_X86_SSE41;
9801     for (size_t k = 1; k <= 40; k += 9) {
9802       GemmMicrokernelTester()
9803         .mr(3)
9804         .nr(4)
9805         .kr(2)
9806         .sr(1)
9807         .m(3)
9808         .n(4)
9809         .k(k)
9810         .a_zero_point(0)
9811         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9812     }
9813   }
9814 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_b_zero_point)9815   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
9816     TEST_REQUIRES_X86_SSE41;
9817     for (size_t k = 1; k <= 40; k += 9) {
9818       GemmMicrokernelTester()
9819         .mr(3)
9820         .nr(4)
9821         .kr(2)
9822         .sr(1)
9823         .m(3)
9824         .n(4)
9825         .k(k)
9826         .b_zero_point(0)
9827         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9828     }
9829   }
9830 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,no_zero_point)9831   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
9832     TEST_REQUIRES_X86_SSE41;
9833     for (size_t k = 1; k <= 40; k += 9) {
9834       GemmMicrokernelTester()
9835         .mr(3)
9836         .nr(4)
9837         .kr(2)
9838         .sr(1)
9839         .m(3)
9840         .n(4)
9841         .k(k)
9842         .a_zero_point(0)
9843         .b_zero_point(0)
9844         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9845     }
9846   }
9847 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9848 
9849 
9850 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)9851   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
9852     TEST_REQUIRES_X86_SSE41;
9853     GemmMicrokernelTester()
9854       .mr(4)
9855       .nr(4)
9856       .kr(2)
9857       .sr(1)
9858       .m(4)
9859       .n(4)
9860       .k(8)
9861       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9862   }
9863 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)9864   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
9865     TEST_REQUIRES_X86_SSE41;
9866     GemmMicrokernelTester()
9867       .mr(4)
9868       .nr(4)
9869       .kr(2)
9870       .sr(1)
9871       .m(4)
9872       .n(4)
9873       .k(8)
9874       .cn_stride(7)
9875       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9876   }
9877 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)9878   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
9879     TEST_REQUIRES_X86_SSE41;
9880     for (uint32_t n = 1; n <= 4; n++) {
9881       for (uint32_t m = 1; m <= 4; m++) {
9882         GemmMicrokernelTester()
9883           .mr(4)
9884           .nr(4)
9885           .kr(2)
9886           .sr(1)
9887           .m(m)
9888           .n(n)
9889           .k(8)
9890           .iterations(1)
9891           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9892       }
9893     }
9894   }
9895 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)9896   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
9897     TEST_REQUIRES_X86_SSE41;
9898     for (uint32_t m = 1; m <= 4; m++) {
9899       GemmMicrokernelTester()
9900         .mr(4)
9901         .nr(4)
9902         .kr(2)
9903         .sr(1)
9904         .m(m)
9905         .n(4)
9906         .k(8)
9907         .iterations(1)
9908         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9909     }
9910   }
9911 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)9912   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
9913     TEST_REQUIRES_X86_SSE41;
9914     for (uint32_t n = 1; n <= 4; n++) {
9915       GemmMicrokernelTester()
9916         .mr(4)
9917         .nr(4)
9918         .kr(2)
9919         .sr(1)
9920         .m(4)
9921         .n(n)
9922         .k(8)
9923         .iterations(1)
9924         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9925     }
9926   }
9927 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)9928   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
9929     TEST_REQUIRES_X86_SSE41;
9930     for (size_t k = 1; k < 8; k++) {
9931       GemmMicrokernelTester()
9932         .mr(4)
9933         .nr(4)
9934         .kr(2)
9935         .sr(1)
9936         .m(4)
9937         .n(4)
9938         .k(k)
9939         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9940     }
9941   }
9942 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)9943   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
9944     TEST_REQUIRES_X86_SSE41;
9945     for (size_t k = 1; k < 8; k++) {
9946       for (uint32_t n = 1; n <= 4; n++) {
9947         for (uint32_t m = 1; m <= 4; m++) {
9948           GemmMicrokernelTester()
9949             .mr(4)
9950             .nr(4)
9951             .kr(2)
9952             .sr(1)
9953             .m(m)
9954             .n(n)
9955             .k(k)
9956             .iterations(1)
9957             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9958         }
9959       }
9960     }
9961   }
9962 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)9963   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
9964     TEST_REQUIRES_X86_SSE41;
9965     for (size_t k = 9; k < 16; k++) {
9966       GemmMicrokernelTester()
9967         .mr(4)
9968         .nr(4)
9969         .kr(2)
9970         .sr(1)
9971         .m(4)
9972         .n(4)
9973         .k(k)
9974         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9975     }
9976   }
9977 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)9978   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
9979     TEST_REQUIRES_X86_SSE41;
9980     for (size_t k = 9; k < 16; k++) {
9981       for (uint32_t n = 1; n <= 4; n++) {
9982         for (uint32_t m = 1; m <= 4; m++) {
9983           GemmMicrokernelTester()
9984             .mr(4)
9985             .nr(4)
9986             .kr(2)
9987             .sr(1)
9988             .m(m)
9989             .n(n)
9990             .k(k)
9991             .iterations(1)
9992             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9993         }
9994       }
9995     }
9996   }
9997 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)9998   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
9999     TEST_REQUIRES_X86_SSE41;
10000     for (size_t k = 16; k <= 80; k += 8) {
10001       GemmMicrokernelTester()
10002         .mr(4)
10003         .nr(4)
10004         .kr(2)
10005         .sr(1)
10006         .m(4)
10007         .n(4)
10008         .k(k)
10009         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10010     }
10011   }
10012 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)10013   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
10014     TEST_REQUIRES_X86_SSE41;
10015     for (size_t k = 16; k <= 80; k += 8) {
10016       for (uint32_t n = 1; n <= 4; n++) {
10017         for (uint32_t m = 1; m <= 4; m++) {
10018           GemmMicrokernelTester()
10019             .mr(4)
10020             .nr(4)
10021             .kr(2)
10022             .sr(1)
10023             .m(m)
10024             .n(n)
10025             .k(k)
10026             .iterations(1)
10027             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10028         }
10029       }
10030     }
10031   }
10032 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)10033   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
10034     TEST_REQUIRES_X86_SSE41;
10035     for (uint32_t n = 5; n < 8; n++) {
10036       for (size_t k = 1; k <= 40; k += 9) {
10037         GemmMicrokernelTester()
10038           .mr(4)
10039           .nr(4)
10040           .kr(2)
10041           .sr(1)
10042           .m(4)
10043           .n(n)
10044           .k(k)
10045           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10046       }
10047     }
10048   }
10049 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)10050   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
10051     TEST_REQUIRES_X86_SSE41;
10052     for (uint32_t n = 5; n < 8; n++) {
10053       for (size_t k = 1; k <= 40; k += 9) {
10054         GemmMicrokernelTester()
10055           .mr(4)
10056           .nr(4)
10057           .kr(2)
10058           .sr(1)
10059           .m(4)
10060           .n(n)
10061           .k(k)
10062           .cn_stride(7)
10063           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10064       }
10065     }
10066   }
10067 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)10068   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
10069     TEST_REQUIRES_X86_SSE41;
10070     for (uint32_t n = 5; n < 8; n++) {
10071       for (size_t k = 1; k <= 40; k += 9) {
10072         for (uint32_t m = 1; m <= 4; m++) {
10073           GemmMicrokernelTester()
10074             .mr(4)
10075             .nr(4)
10076             .kr(2)
10077             .sr(1)
10078             .m(m)
10079             .n(n)
10080             .k(k)
10081             .iterations(1)
10082             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10083         }
10084       }
10085     }
10086   }
10087 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)10088   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
10089     TEST_REQUIRES_X86_SSE41;
10090     for (uint32_t n = 8; n <= 12; n += 4) {
10091       for (size_t k = 1; k <= 40; k += 9) {
10092         GemmMicrokernelTester()
10093           .mr(4)
10094           .nr(4)
10095           .kr(2)
10096           .sr(1)
10097           .m(4)
10098           .n(n)
10099           .k(k)
10100           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10101       }
10102     }
10103   }
10104 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)10105   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
10106     TEST_REQUIRES_X86_SSE41;
10107     for (uint32_t n = 8; n <= 12; n += 4) {
10108       for (size_t k = 1; k <= 40; k += 9) {
10109         GemmMicrokernelTester()
10110           .mr(4)
10111           .nr(4)
10112           .kr(2)
10113           .sr(1)
10114           .m(4)
10115           .n(n)
10116           .k(k)
10117           .cn_stride(7)
10118           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10119       }
10120     }
10121   }
10122 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)10123   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
10124     TEST_REQUIRES_X86_SSE41;
10125     for (uint32_t n = 8; n <= 12; n += 4) {
10126       for (size_t k = 1; k <= 40; k += 9) {
10127         for (uint32_t m = 1; m <= 4; m++) {
10128           GemmMicrokernelTester()
10129             .mr(4)
10130             .nr(4)
10131             .kr(2)
10132             .sr(1)
10133             .m(m)
10134             .n(n)
10135             .k(k)
10136             .iterations(1)
10137             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10138         }
10139       }
10140     }
10141   }
10142 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel)10143   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
10144     TEST_REQUIRES_X86_SSE41;
10145     for (size_t k = 1; k <= 40; k += 9) {
10146       GemmMicrokernelTester()
10147         .mr(4)
10148         .nr(4)
10149         .kr(2)
10150         .sr(1)
10151         .m(4)
10152         .n(4)
10153         .k(k)
10154         .ks(3)
10155         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10156     }
10157   }
10158 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel_subtile)10159   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
10160     TEST_REQUIRES_X86_SSE41;
10161     for (size_t k = 1; k <= 40; k += 9) {
10162       for (uint32_t n = 1; n <= 4; n++) {
10163         for (uint32_t m = 1; m <= 4; m++) {
10164           GemmMicrokernelTester()
10165             .mr(4)
10166             .nr(4)
10167             .kr(2)
10168             .sr(1)
10169             .m(m)
10170             .n(n)
10171             .k(k)
10172             .ks(3)
10173             .iterations(1)
10174             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10175         }
10176       }
10177     }
10178   }
10179 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_small_kernel)10180   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
10181     TEST_REQUIRES_X86_SSE41;
10182     for (uint32_t n = 5; n < 8; n++) {
10183       for (size_t k = 1; k <= 40; k += 9) {
10184         GemmMicrokernelTester()
10185           .mr(4)
10186           .nr(4)
10187           .kr(2)
10188           .sr(1)
10189           .m(4)
10190           .n(n)
10191           .k(k)
10192           .ks(3)
10193           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10194       }
10195     }
10196   }
10197 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_small_kernel)10198   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
10199     TEST_REQUIRES_X86_SSE41;
10200     for (uint32_t n = 8; n <= 12; n += 4) {
10201       for (size_t k = 1; k <= 40; k += 9) {
10202         GemmMicrokernelTester()
10203           .mr(4)
10204           .nr(4)
10205           .kr(2)
10206           .sr(1)
10207           .m(4)
10208           .n(n)
10209           .k(k)
10210           .ks(3)
10211           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10212       }
10213     }
10214   }
10215 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)10216   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
10217     TEST_REQUIRES_X86_SSE41;
10218     for (size_t k = 1; k <= 40; k += 9) {
10219       for (uint32_t n = 1; n <= 4; n++) {
10220         for (uint32_t m = 1; m <= 4; m++) {
10221           GemmMicrokernelTester()
10222             .mr(4)
10223             .nr(4)
10224             .kr(2)
10225             .sr(1)
10226             .m(m)
10227             .n(n)
10228             .k(k)
10229             .cm_stride(7)
10230             .iterations(1)
10231             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10232         }
10233       }
10234     }
10235   }
10236 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,a_offset)10237   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
10238     TEST_REQUIRES_X86_SSE41;
10239     for (size_t k = 1; k <= 40; k += 9) {
10240       GemmMicrokernelTester()
10241         .mr(4)
10242         .nr(4)
10243         .kr(2)
10244         .sr(1)
10245         .m(4)
10246         .n(4)
10247         .k(k)
10248         .ks(3)
10249         .a_offset(163)
10250         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10251     }
10252   }
10253 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,zero)10254   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
10255     TEST_REQUIRES_X86_SSE41;
10256     for (size_t k = 1; k <= 40; k += 9) {
10257       for (uint32_t mz = 0; mz < 4; mz++) {
10258         GemmMicrokernelTester()
10259           .mr(4)
10260           .nr(4)
10261           .kr(2)
10262           .sr(1)
10263           .m(4)
10264           .n(4)
10265           .k(k)
10266           .ks(3)
10267           .a_offset(163)
10268           .zero_index(mz)
10269           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10270       }
10271     }
10272   }
10273 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)10274   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
10275     TEST_REQUIRES_X86_SSE41;
10276     GemmMicrokernelTester()
10277       .mr(4)
10278       .nr(4)
10279       .kr(2)
10280       .sr(1)
10281       .m(4)
10282       .n(4)
10283       .k(8)
10284       .qmin(128)
10285       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10286   }
10287 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)10288   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
10289     TEST_REQUIRES_X86_SSE41;
10290     GemmMicrokernelTester()
10291       .mr(4)
10292       .nr(4)
10293       .kr(2)
10294       .sr(1)
10295       .m(4)
10296       .n(4)
10297       .k(8)
10298       .qmax(128)
10299       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10300   }
10301 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)10302   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
10303     TEST_REQUIRES_X86_SSE41;
10304     GemmMicrokernelTester()
10305       .mr(4)
10306       .nr(4)
10307       .kr(2)
10308       .sr(1)
10309       .m(4)
10310       .n(4)
10311       .k(8)
10312       .cm_stride(7)
10313       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10314   }
10315 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_a_zero_point)10316   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
10317     TEST_REQUIRES_X86_SSE41;
10318     for (size_t k = 1; k <= 40; k += 9) {
10319       GemmMicrokernelTester()
10320         .mr(4)
10321         .nr(4)
10322         .kr(2)
10323         .sr(1)
10324         .m(4)
10325         .n(4)
10326         .k(k)
10327         .a_zero_point(0)
10328         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10329     }
10330   }
10331 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_b_zero_point)10332   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
10333     TEST_REQUIRES_X86_SSE41;
10334     for (size_t k = 1; k <= 40; k += 9) {
10335       GemmMicrokernelTester()
10336         .mr(4)
10337         .nr(4)
10338         .kr(2)
10339         .sr(1)
10340         .m(4)
10341         .n(4)
10342         .k(k)
10343         .b_zero_point(0)
10344         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10345     }
10346   }
10347 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,no_zero_point)10348   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
10349     TEST_REQUIRES_X86_SSE41;
10350     for (size_t k = 1; k <= 40; k += 9) {
10351       GemmMicrokernelTester()
10352         .mr(4)
10353         .nr(4)
10354         .kr(2)
10355         .sr(1)
10356         .m(4)
10357         .n(4)
10358         .k(k)
10359         .a_zero_point(0)
10360         .b_zero_point(0)
10361         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10362     }
10363   }
10364 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10365 
10366 
10367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)10368   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
10369     TEST_REQUIRES_X86_XOP;
10370     GemmMicrokernelTester()
10371       .mr(1)
10372       .nr(4)
10373       .kr(2)
10374       .sr(1)
10375       .m(1)
10376       .n(4)
10377       .k(8)
10378       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10379   }
10380 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)10381   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
10382     TEST_REQUIRES_X86_XOP;
10383     GemmMicrokernelTester()
10384       .mr(1)
10385       .nr(4)
10386       .kr(2)
10387       .sr(1)
10388       .m(1)
10389       .n(4)
10390       .k(8)
10391       .cn_stride(7)
10392       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10393   }
10394 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)10395   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
10396     TEST_REQUIRES_X86_XOP;
10397     for (uint32_t n = 1; n <= 4; n++) {
10398       for (uint32_t m = 1; m <= 1; m++) {
10399         GemmMicrokernelTester()
10400           .mr(1)
10401           .nr(4)
10402           .kr(2)
10403           .sr(1)
10404           .m(m)
10405           .n(n)
10406           .k(8)
10407           .iterations(1)
10408           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10409       }
10410     }
10411   }
10412 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)10413   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
10414     TEST_REQUIRES_X86_XOP;
10415     for (uint32_t m = 1; m <= 1; m++) {
10416       GemmMicrokernelTester()
10417         .mr(1)
10418         .nr(4)
10419         .kr(2)
10420         .sr(1)
10421         .m(m)
10422         .n(4)
10423         .k(8)
10424         .iterations(1)
10425         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10426     }
10427   }
10428 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)10429   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
10430     TEST_REQUIRES_X86_XOP;
10431     for (uint32_t n = 1; n <= 4; n++) {
10432       GemmMicrokernelTester()
10433         .mr(1)
10434         .nr(4)
10435         .kr(2)
10436         .sr(1)
10437         .m(1)
10438         .n(n)
10439         .k(8)
10440         .iterations(1)
10441         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10442     }
10443   }
10444 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)10445   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
10446     TEST_REQUIRES_X86_XOP;
10447     for (size_t k = 1; k < 8; k++) {
10448       GemmMicrokernelTester()
10449         .mr(1)
10450         .nr(4)
10451         .kr(2)
10452         .sr(1)
10453         .m(1)
10454         .n(4)
10455         .k(k)
10456         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10457     }
10458   }
10459 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)10460   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
10461     TEST_REQUIRES_X86_XOP;
10462     for (size_t k = 1; k < 8; k++) {
10463       for (uint32_t n = 1; n <= 4; n++) {
10464         for (uint32_t m = 1; m <= 1; m++) {
10465           GemmMicrokernelTester()
10466             .mr(1)
10467             .nr(4)
10468             .kr(2)
10469             .sr(1)
10470             .m(m)
10471             .n(n)
10472             .k(k)
10473             .iterations(1)
10474             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10475         }
10476       }
10477     }
10478   }
10479 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)10480   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
10481     TEST_REQUIRES_X86_XOP;
10482     for (size_t k = 9; k < 16; k++) {
10483       GemmMicrokernelTester()
10484         .mr(1)
10485         .nr(4)
10486         .kr(2)
10487         .sr(1)
10488         .m(1)
10489         .n(4)
10490         .k(k)
10491         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10492     }
10493   }
10494 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)10495   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
10496     TEST_REQUIRES_X86_XOP;
10497     for (size_t k = 9; k < 16; k++) {
10498       for (uint32_t n = 1; n <= 4; n++) {
10499         for (uint32_t m = 1; m <= 1; m++) {
10500           GemmMicrokernelTester()
10501             .mr(1)
10502             .nr(4)
10503             .kr(2)
10504             .sr(1)
10505             .m(m)
10506             .n(n)
10507             .k(k)
10508             .iterations(1)
10509             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10510         }
10511       }
10512     }
10513   }
10514 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)10515   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
10516     TEST_REQUIRES_X86_XOP;
10517     for (size_t k = 16; k <= 80; k += 8) {
10518       GemmMicrokernelTester()
10519         .mr(1)
10520         .nr(4)
10521         .kr(2)
10522         .sr(1)
10523         .m(1)
10524         .n(4)
10525         .k(k)
10526         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10527     }
10528   }
10529 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)10530   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
10531     TEST_REQUIRES_X86_XOP;
10532     for (size_t k = 16; k <= 80; k += 8) {
10533       for (uint32_t n = 1; n <= 4; n++) {
10534         for (uint32_t m = 1; m <= 1; m++) {
10535           GemmMicrokernelTester()
10536             .mr(1)
10537             .nr(4)
10538             .kr(2)
10539             .sr(1)
10540             .m(m)
10541             .n(n)
10542             .k(k)
10543             .iterations(1)
10544             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10545         }
10546       }
10547     }
10548   }
10549 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)10550   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
10551     TEST_REQUIRES_X86_XOP;
10552     for (uint32_t n = 5; n < 8; n++) {
10553       for (size_t k = 1; k <= 40; k += 9) {
10554         GemmMicrokernelTester()
10555           .mr(1)
10556           .nr(4)
10557           .kr(2)
10558           .sr(1)
10559           .m(1)
10560           .n(n)
10561           .k(k)
10562           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10563       }
10564     }
10565   }
10566 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)10567   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
10568     TEST_REQUIRES_X86_XOP;
10569     for (uint32_t n = 5; n < 8; n++) {
10570       for (size_t k = 1; k <= 40; k += 9) {
10571         GemmMicrokernelTester()
10572           .mr(1)
10573           .nr(4)
10574           .kr(2)
10575           .sr(1)
10576           .m(1)
10577           .n(n)
10578           .k(k)
10579           .cn_stride(7)
10580           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10581       }
10582     }
10583   }
10584 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)10585   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
10586     TEST_REQUIRES_X86_XOP;
10587     for (uint32_t n = 5; n < 8; n++) {
10588       for (size_t k = 1; k <= 40; k += 9) {
10589         for (uint32_t m = 1; m <= 1; m++) {
10590           GemmMicrokernelTester()
10591             .mr(1)
10592             .nr(4)
10593             .kr(2)
10594             .sr(1)
10595             .m(m)
10596             .n(n)
10597             .k(k)
10598             .iterations(1)
10599             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10600         }
10601       }
10602     }
10603   }
10604 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)10605   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
10606     TEST_REQUIRES_X86_XOP;
10607     for (uint32_t n = 8; n <= 12; n += 4) {
10608       for (size_t k = 1; k <= 40; k += 9) {
10609         GemmMicrokernelTester()
10610           .mr(1)
10611           .nr(4)
10612           .kr(2)
10613           .sr(1)
10614           .m(1)
10615           .n(n)
10616           .k(k)
10617           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10618       }
10619     }
10620   }
10621 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)10622   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
10623     TEST_REQUIRES_X86_XOP;
10624     for (uint32_t n = 8; n <= 12; n += 4) {
10625       for (size_t k = 1; k <= 40; k += 9) {
10626         GemmMicrokernelTester()
10627           .mr(1)
10628           .nr(4)
10629           .kr(2)
10630           .sr(1)
10631           .m(1)
10632           .n(n)
10633           .k(k)
10634           .cn_stride(7)
10635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10636       }
10637     }
10638   }
10639 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)10640   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
10641     TEST_REQUIRES_X86_XOP;
10642     for (uint32_t n = 8; n <= 12; n += 4) {
10643       for (size_t k = 1; k <= 40; k += 9) {
10644         for (uint32_t m = 1; m <= 1; m++) {
10645           GemmMicrokernelTester()
10646             .mr(1)
10647             .nr(4)
10648             .kr(2)
10649             .sr(1)
10650             .m(m)
10651             .n(n)
10652             .k(k)
10653             .iterations(1)
10654             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10655         }
10656       }
10657     }
10658   }
10659 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel)10660   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
10661     TEST_REQUIRES_X86_XOP;
10662     for (size_t k = 1; k <= 40; k += 9) {
10663       GemmMicrokernelTester()
10664         .mr(1)
10665         .nr(4)
10666         .kr(2)
10667         .sr(1)
10668         .m(1)
10669         .n(4)
10670         .k(k)
10671         .ks(3)
10672         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10673     }
10674   }
10675 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel_subtile)10676   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
10677     TEST_REQUIRES_X86_XOP;
10678     for (size_t k = 1; k <= 40; k += 9) {
10679       for (uint32_t n = 1; n <= 4; n++) {
10680         for (uint32_t m = 1; m <= 1; m++) {
10681           GemmMicrokernelTester()
10682             .mr(1)
10683             .nr(4)
10684             .kr(2)
10685             .sr(1)
10686             .m(m)
10687             .n(n)
10688             .k(k)
10689             .ks(3)
10690             .iterations(1)
10691             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10692         }
10693       }
10694     }
10695   }
10696 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_small_kernel)10697   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
10698     TEST_REQUIRES_X86_XOP;
10699     for (uint32_t n = 5; n < 8; n++) {
10700       for (size_t k = 1; k <= 40; k += 9) {
10701         GemmMicrokernelTester()
10702           .mr(1)
10703           .nr(4)
10704           .kr(2)
10705           .sr(1)
10706           .m(1)
10707           .n(n)
10708           .k(k)
10709           .ks(3)
10710           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10711       }
10712     }
10713   }
10714 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_small_kernel)10715   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
10716     TEST_REQUIRES_X86_XOP;
10717     for (uint32_t n = 8; n <= 12; n += 4) {
10718       for (size_t k = 1; k <= 40; k += 9) {
10719         GemmMicrokernelTester()
10720           .mr(1)
10721           .nr(4)
10722           .kr(2)
10723           .sr(1)
10724           .m(1)
10725           .n(n)
10726           .k(k)
10727           .ks(3)
10728           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10729       }
10730     }
10731   }
10732 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)10733   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
10734     TEST_REQUIRES_X86_XOP;
10735     for (size_t k = 1; k <= 40; k += 9) {
10736       for (uint32_t n = 1; n <= 4; n++) {
10737         for (uint32_t m = 1; m <= 1; m++) {
10738           GemmMicrokernelTester()
10739             .mr(1)
10740             .nr(4)
10741             .kr(2)
10742             .sr(1)
10743             .m(m)
10744             .n(n)
10745             .k(k)
10746             .cm_stride(7)
10747             .iterations(1)
10748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10749         }
10750       }
10751     }
10752   }
10753 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,a_offset)10754   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
10755     TEST_REQUIRES_X86_XOP;
10756     for (size_t k = 1; k <= 40; k += 9) {
10757       GemmMicrokernelTester()
10758         .mr(1)
10759         .nr(4)
10760         .kr(2)
10761         .sr(1)
10762         .m(1)
10763         .n(4)
10764         .k(k)
10765         .ks(3)
10766         .a_offset(43)
10767         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10768     }
10769   }
10770 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,zero)10771   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
10772     TEST_REQUIRES_X86_XOP;
10773     for (size_t k = 1; k <= 40; k += 9) {
10774       for (uint32_t mz = 0; mz < 1; mz++) {
10775         GemmMicrokernelTester()
10776           .mr(1)
10777           .nr(4)
10778           .kr(2)
10779           .sr(1)
10780           .m(1)
10781           .n(4)
10782           .k(k)
10783           .ks(3)
10784           .a_offset(43)
10785           .zero_index(mz)
10786           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10787       }
10788     }
10789   }
10790 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)10791   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
10792     TEST_REQUIRES_X86_XOP;
10793     GemmMicrokernelTester()
10794       .mr(1)
10795       .nr(4)
10796       .kr(2)
10797       .sr(1)
10798       .m(1)
10799       .n(4)
10800       .k(8)
10801       .qmin(128)
10802       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10803   }
10804 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)10805   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
10806     TEST_REQUIRES_X86_XOP;
10807     GemmMicrokernelTester()
10808       .mr(1)
10809       .nr(4)
10810       .kr(2)
10811       .sr(1)
10812       .m(1)
10813       .n(4)
10814       .k(8)
10815       .qmax(128)
10816       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10817   }
10818 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)10819   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
10820     TEST_REQUIRES_X86_XOP;
10821     GemmMicrokernelTester()
10822       .mr(1)
10823       .nr(4)
10824       .kr(2)
10825       .sr(1)
10826       .m(1)
10827       .n(4)
10828       .k(8)
10829       .cm_stride(7)
10830       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10831   }
10832 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_a_zero_point)10833   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
10834     TEST_REQUIRES_X86_XOP;
10835     for (size_t k = 1; k <= 40; k += 9) {
10836       GemmMicrokernelTester()
10837         .mr(1)
10838         .nr(4)
10839         .kr(2)
10840         .sr(1)
10841         .m(1)
10842         .n(4)
10843         .k(k)
10844         .a_zero_point(0)
10845         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10846     }
10847   }
10848 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_b_zero_point)10849   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
10850     TEST_REQUIRES_X86_XOP;
10851     for (size_t k = 1; k <= 40; k += 9) {
10852       GemmMicrokernelTester()
10853         .mr(1)
10854         .nr(4)
10855         .kr(2)
10856         .sr(1)
10857         .m(1)
10858         .n(4)
10859         .k(k)
10860         .b_zero_point(0)
10861         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10862     }
10863   }
10864 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,no_zero_point)10865   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
10866     TEST_REQUIRES_X86_XOP;
10867     for (size_t k = 1; k <= 40; k += 9) {
10868       GemmMicrokernelTester()
10869         .mr(1)
10870         .nr(4)
10871         .kr(2)
10872         .sr(1)
10873         .m(1)
10874         .n(4)
10875         .k(k)
10876         .a_zero_point(0)
10877         .b_zero_point(0)
10878         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10879     }
10880   }
10881 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10882 
10883 
10884 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)10885   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
10886     TEST_REQUIRES_X86_XOP;
10887     GemmMicrokernelTester()
10888       .mr(2)
10889       .nr(4)
10890       .kr(2)
10891       .sr(1)
10892       .m(2)
10893       .n(4)
10894       .k(8)
10895       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10896   }
10897 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)10898   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
10899     TEST_REQUIRES_X86_XOP;
10900     GemmMicrokernelTester()
10901       .mr(2)
10902       .nr(4)
10903       .kr(2)
10904       .sr(1)
10905       .m(2)
10906       .n(4)
10907       .k(8)
10908       .cn_stride(7)
10909       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10910   }
10911 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)10912   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
10913     TEST_REQUIRES_X86_XOP;
10914     for (uint32_t n = 1; n <= 4; n++) {
10915       for (uint32_t m = 1; m <= 2; m++) {
10916         GemmMicrokernelTester()
10917           .mr(2)
10918           .nr(4)
10919           .kr(2)
10920           .sr(1)
10921           .m(m)
10922           .n(n)
10923           .k(8)
10924           .iterations(1)
10925           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10926       }
10927     }
10928   }
10929 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)10930   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
10931     TEST_REQUIRES_X86_XOP;
10932     for (uint32_t m = 1; m <= 2; m++) {
10933       GemmMicrokernelTester()
10934         .mr(2)
10935         .nr(4)
10936         .kr(2)
10937         .sr(1)
10938         .m(m)
10939         .n(4)
10940         .k(8)
10941         .iterations(1)
10942         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10943     }
10944   }
10945 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)10946   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
10947     TEST_REQUIRES_X86_XOP;
10948     for (uint32_t n = 1; n <= 4; n++) {
10949       GemmMicrokernelTester()
10950         .mr(2)
10951         .nr(4)
10952         .kr(2)
10953         .sr(1)
10954         .m(2)
10955         .n(n)
10956         .k(8)
10957         .iterations(1)
10958         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10959     }
10960   }
10961 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)10962   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
10963     TEST_REQUIRES_X86_XOP;
10964     for (size_t k = 1; k < 8; k++) {
10965       GemmMicrokernelTester()
10966         .mr(2)
10967         .nr(4)
10968         .kr(2)
10969         .sr(1)
10970         .m(2)
10971         .n(4)
10972         .k(k)
10973         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10974     }
10975   }
10976 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)10977   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
10978     TEST_REQUIRES_X86_XOP;
10979     for (size_t k = 1; k < 8; k++) {
10980       for (uint32_t n = 1; n <= 4; n++) {
10981         for (uint32_t m = 1; m <= 2; m++) {
10982           GemmMicrokernelTester()
10983             .mr(2)
10984             .nr(4)
10985             .kr(2)
10986             .sr(1)
10987             .m(m)
10988             .n(n)
10989             .k(k)
10990             .iterations(1)
10991             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10992         }
10993       }
10994     }
10995   }
10996 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)10997   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
10998     TEST_REQUIRES_X86_XOP;
10999     for (size_t k = 9; k < 16; k++) {
11000       GemmMicrokernelTester()
11001         .mr(2)
11002         .nr(4)
11003         .kr(2)
11004         .sr(1)
11005         .m(2)
11006         .n(4)
11007         .k(k)
11008         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11009     }
11010   }
11011 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)11012   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
11013     TEST_REQUIRES_X86_XOP;
11014     for (size_t k = 9; k < 16; k++) {
11015       for (uint32_t n = 1; n <= 4; n++) {
11016         for (uint32_t m = 1; m <= 2; m++) {
11017           GemmMicrokernelTester()
11018             .mr(2)
11019             .nr(4)
11020             .kr(2)
11021             .sr(1)
11022             .m(m)
11023             .n(n)
11024             .k(k)
11025             .iterations(1)
11026             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11027         }
11028       }
11029     }
11030   }
11031 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)11032   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
11033     TEST_REQUIRES_X86_XOP;
11034     for (size_t k = 16; k <= 80; k += 8) {
11035       GemmMicrokernelTester()
11036         .mr(2)
11037         .nr(4)
11038         .kr(2)
11039         .sr(1)
11040         .m(2)
11041         .n(4)
11042         .k(k)
11043         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11044     }
11045   }
11046 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)11047   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
11048     TEST_REQUIRES_X86_XOP;
11049     for (size_t k = 16; k <= 80; k += 8) {
11050       for (uint32_t n = 1; n <= 4; n++) {
11051         for (uint32_t m = 1; m <= 2; m++) {
11052           GemmMicrokernelTester()
11053             .mr(2)
11054             .nr(4)
11055             .kr(2)
11056             .sr(1)
11057             .m(m)
11058             .n(n)
11059             .k(k)
11060             .iterations(1)
11061             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11062         }
11063       }
11064     }
11065   }
11066 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)11067   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
11068     TEST_REQUIRES_X86_XOP;
11069     for (uint32_t n = 5; n < 8; n++) {
11070       for (size_t k = 1; k <= 40; k += 9) {
11071         GemmMicrokernelTester()
11072           .mr(2)
11073           .nr(4)
11074           .kr(2)
11075           .sr(1)
11076           .m(2)
11077           .n(n)
11078           .k(k)
11079           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11080       }
11081     }
11082   }
11083 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)11084   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
11085     TEST_REQUIRES_X86_XOP;
11086     for (uint32_t n = 5; n < 8; n++) {
11087       for (size_t k = 1; k <= 40; k += 9) {
11088         GemmMicrokernelTester()
11089           .mr(2)
11090           .nr(4)
11091           .kr(2)
11092           .sr(1)
11093           .m(2)
11094           .n(n)
11095           .k(k)
11096           .cn_stride(7)
11097           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11098       }
11099     }
11100   }
11101 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)11102   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
11103     TEST_REQUIRES_X86_XOP;
11104     for (uint32_t n = 5; n < 8; n++) {
11105       for (size_t k = 1; k <= 40; k += 9) {
11106         for (uint32_t m = 1; m <= 2; m++) {
11107           GemmMicrokernelTester()
11108             .mr(2)
11109             .nr(4)
11110             .kr(2)
11111             .sr(1)
11112             .m(m)
11113             .n(n)
11114             .k(k)
11115             .iterations(1)
11116             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11117         }
11118       }
11119     }
11120   }
11121 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)11122   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
11123     TEST_REQUIRES_X86_XOP;
11124     for (uint32_t n = 8; n <= 12; n += 4) {
11125       for (size_t k = 1; k <= 40; k += 9) {
11126         GemmMicrokernelTester()
11127           .mr(2)
11128           .nr(4)
11129           .kr(2)
11130           .sr(1)
11131           .m(2)
11132           .n(n)
11133           .k(k)
11134           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11135       }
11136     }
11137   }
11138 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)11139   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
11140     TEST_REQUIRES_X86_XOP;
11141     for (uint32_t n = 8; n <= 12; n += 4) {
11142       for (size_t k = 1; k <= 40; k += 9) {
11143         GemmMicrokernelTester()
11144           .mr(2)
11145           .nr(4)
11146           .kr(2)
11147           .sr(1)
11148           .m(2)
11149           .n(n)
11150           .k(k)
11151           .cn_stride(7)
11152           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11153       }
11154     }
11155   }
11156 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)11157   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
11158     TEST_REQUIRES_X86_XOP;
11159     for (uint32_t n = 8; n <= 12; n += 4) {
11160       for (size_t k = 1; k <= 40; k += 9) {
11161         for (uint32_t m = 1; m <= 2; m++) {
11162           GemmMicrokernelTester()
11163             .mr(2)
11164             .nr(4)
11165             .kr(2)
11166             .sr(1)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .iterations(1)
11171             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11172         }
11173       }
11174     }
11175   }
11176 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel)11177   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
11178     TEST_REQUIRES_X86_XOP;
11179     for (size_t k = 1; k <= 40; k += 9) {
11180       GemmMicrokernelTester()
11181         .mr(2)
11182         .nr(4)
11183         .kr(2)
11184         .sr(1)
11185         .m(2)
11186         .n(4)
11187         .k(k)
11188         .ks(3)
11189         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11190     }
11191   }
11192 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel_subtile)11193   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
11194     TEST_REQUIRES_X86_XOP;
11195     for (size_t k = 1; k <= 40; k += 9) {
11196       for (uint32_t n = 1; n <= 4; n++) {
11197         for (uint32_t m = 1; m <= 2; m++) {
11198           GemmMicrokernelTester()
11199             .mr(2)
11200             .nr(4)
11201             .kr(2)
11202             .sr(1)
11203             .m(m)
11204             .n(n)
11205             .k(k)
11206             .ks(3)
11207             .iterations(1)
11208             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11209         }
11210       }
11211     }
11212   }
11213 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_small_kernel)11214   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
11215     TEST_REQUIRES_X86_XOP;
11216     for (uint32_t n = 5; n < 8; n++) {
11217       for (size_t k = 1; k <= 40; k += 9) {
11218         GemmMicrokernelTester()
11219           .mr(2)
11220           .nr(4)
11221           .kr(2)
11222           .sr(1)
11223           .m(2)
11224           .n(n)
11225           .k(k)
11226           .ks(3)
11227           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11228       }
11229     }
11230   }
11231 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_small_kernel)11232   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
11233     TEST_REQUIRES_X86_XOP;
11234     for (uint32_t n = 8; n <= 12; n += 4) {
11235       for (size_t k = 1; k <= 40; k += 9) {
11236         GemmMicrokernelTester()
11237           .mr(2)
11238           .nr(4)
11239           .kr(2)
11240           .sr(1)
11241           .m(2)
11242           .n(n)
11243           .k(k)
11244           .ks(3)
11245           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11246       }
11247     }
11248   }
11249 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)11250   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
11251     TEST_REQUIRES_X86_XOP;
11252     for (size_t k = 1; k <= 40; k += 9) {
11253       for (uint32_t n = 1; n <= 4; n++) {
11254         for (uint32_t m = 1; m <= 2; m++) {
11255           GemmMicrokernelTester()
11256             .mr(2)
11257             .nr(4)
11258             .kr(2)
11259             .sr(1)
11260             .m(m)
11261             .n(n)
11262             .k(k)
11263             .cm_stride(7)
11264             .iterations(1)
11265             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11266         }
11267       }
11268     }
11269   }
11270 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,a_offset)11271   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
11272     TEST_REQUIRES_X86_XOP;
11273     for (size_t k = 1; k <= 40; k += 9) {
11274       GemmMicrokernelTester()
11275         .mr(2)
11276         .nr(4)
11277         .kr(2)
11278         .sr(1)
11279         .m(2)
11280         .n(4)
11281         .k(k)
11282         .ks(3)
11283         .a_offset(83)
11284         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11285     }
11286   }
11287 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,zero)11288   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
11289     TEST_REQUIRES_X86_XOP;
11290     for (size_t k = 1; k <= 40; k += 9) {
11291       for (uint32_t mz = 0; mz < 2; mz++) {
11292         GemmMicrokernelTester()
11293           .mr(2)
11294           .nr(4)
11295           .kr(2)
11296           .sr(1)
11297           .m(2)
11298           .n(4)
11299           .k(k)
11300           .ks(3)
11301           .a_offset(83)
11302           .zero_index(mz)
11303           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11304       }
11305     }
11306   }
11307 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)11308   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
11309     TEST_REQUIRES_X86_XOP;
11310     GemmMicrokernelTester()
11311       .mr(2)
11312       .nr(4)
11313       .kr(2)
11314       .sr(1)
11315       .m(2)
11316       .n(4)
11317       .k(8)
11318       .qmin(128)
11319       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11320   }
11321 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)11322   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
11323     TEST_REQUIRES_X86_XOP;
11324     GemmMicrokernelTester()
11325       .mr(2)
11326       .nr(4)
11327       .kr(2)
11328       .sr(1)
11329       .m(2)
11330       .n(4)
11331       .k(8)
11332       .qmax(128)
11333       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11334   }
11335 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)11336   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
11337     TEST_REQUIRES_X86_XOP;
11338     GemmMicrokernelTester()
11339       .mr(2)
11340       .nr(4)
11341       .kr(2)
11342       .sr(1)
11343       .m(2)
11344       .n(4)
11345       .k(8)
11346       .cm_stride(7)
11347       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11348   }
11349 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_a_zero_point)11350   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
11351     TEST_REQUIRES_X86_XOP;
11352     for (size_t k = 1; k <= 40; k += 9) {
11353       GemmMicrokernelTester()
11354         .mr(2)
11355         .nr(4)
11356         .kr(2)
11357         .sr(1)
11358         .m(2)
11359         .n(4)
11360         .k(k)
11361         .a_zero_point(0)
11362         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11363     }
11364   }
11365 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_b_zero_point)11366   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
11367     TEST_REQUIRES_X86_XOP;
11368     for (size_t k = 1; k <= 40; k += 9) {
11369       GemmMicrokernelTester()
11370         .mr(2)
11371         .nr(4)
11372         .kr(2)
11373         .sr(1)
11374         .m(2)
11375         .n(4)
11376         .k(k)
11377         .b_zero_point(0)
11378         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11379     }
11380   }
11381 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,no_zero_point)11382   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
11383     TEST_REQUIRES_X86_XOP;
11384     for (size_t k = 1; k <= 40; k += 9) {
11385       GemmMicrokernelTester()
11386         .mr(2)
11387         .nr(4)
11388         .kr(2)
11389         .sr(1)
11390         .m(2)
11391         .n(4)
11392         .k(k)
11393         .a_zero_point(0)
11394         .b_zero_point(0)
11395         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11396     }
11397   }
11398 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11399 
11400 
11401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)11402   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
11403     TEST_REQUIRES_X86_AVX;
11404     GemmMicrokernelTester()
11405       .mr(3)
11406       .nr(4)
11407       .kr(2)
11408       .sr(1)
11409       .m(3)
11410       .n(4)
11411       .k(8)
11412       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11413   }
11414 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)11415   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
11416     TEST_REQUIRES_X86_AVX;
11417     GemmMicrokernelTester()
11418       .mr(3)
11419       .nr(4)
11420       .kr(2)
11421       .sr(1)
11422       .m(3)
11423       .n(4)
11424       .k(8)
11425       .cn_stride(7)
11426       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11427   }
11428 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)11429   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
11430     TEST_REQUIRES_X86_AVX;
11431     for (uint32_t n = 1; n <= 4; n++) {
11432       for (uint32_t m = 1; m <= 3; m++) {
11433         GemmMicrokernelTester()
11434           .mr(3)
11435           .nr(4)
11436           .kr(2)
11437           .sr(1)
11438           .m(m)
11439           .n(n)
11440           .k(8)
11441           .iterations(1)
11442           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11443       }
11444     }
11445   }
11446 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)11447   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
11448     TEST_REQUIRES_X86_AVX;
11449     for (uint32_t m = 1; m <= 3; m++) {
11450       GemmMicrokernelTester()
11451         .mr(3)
11452         .nr(4)
11453         .kr(2)
11454         .sr(1)
11455         .m(m)
11456         .n(4)
11457         .k(8)
11458         .iterations(1)
11459         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11460     }
11461   }
11462 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)11463   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
11464     TEST_REQUIRES_X86_AVX;
11465     for (uint32_t n = 1; n <= 4; n++) {
11466       GemmMicrokernelTester()
11467         .mr(3)
11468         .nr(4)
11469         .kr(2)
11470         .sr(1)
11471         .m(3)
11472         .n(n)
11473         .k(8)
11474         .iterations(1)
11475         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11476     }
11477   }
11478 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)11479   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
11480     TEST_REQUIRES_X86_AVX;
11481     for (size_t k = 1; k < 8; k++) {
11482       GemmMicrokernelTester()
11483         .mr(3)
11484         .nr(4)
11485         .kr(2)
11486         .sr(1)
11487         .m(3)
11488         .n(4)
11489         .k(k)
11490         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11491     }
11492   }
11493 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)11494   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
11495     TEST_REQUIRES_X86_AVX;
11496     for (size_t k = 1; k < 8; k++) {
11497       for (uint32_t n = 1; n <= 4; n++) {
11498         for (uint32_t m = 1; m <= 3; m++) {
11499           GemmMicrokernelTester()
11500             .mr(3)
11501             .nr(4)
11502             .kr(2)
11503             .sr(1)
11504             .m(m)
11505             .n(n)
11506             .k(k)
11507             .iterations(1)
11508             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11509         }
11510       }
11511     }
11512   }
11513 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)11514   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
11515     TEST_REQUIRES_X86_AVX;
11516     for (size_t k = 9; k < 16; k++) {
11517       GemmMicrokernelTester()
11518         .mr(3)
11519         .nr(4)
11520         .kr(2)
11521         .sr(1)
11522         .m(3)
11523         .n(4)
11524         .k(k)
11525         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11526     }
11527   }
11528 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)11529   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
11530     TEST_REQUIRES_X86_AVX;
11531     for (size_t k = 9; k < 16; k++) {
11532       for (uint32_t n = 1; n <= 4; n++) {
11533         for (uint32_t m = 1; m <= 3; m++) {
11534           GemmMicrokernelTester()
11535             .mr(3)
11536             .nr(4)
11537             .kr(2)
11538             .sr(1)
11539             .m(m)
11540             .n(n)
11541             .k(k)
11542             .iterations(1)
11543             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11544         }
11545       }
11546     }
11547   }
11548 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)11549   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
11550     TEST_REQUIRES_X86_AVX;
11551     for (size_t k = 16; k <= 80; k += 8) {
11552       GemmMicrokernelTester()
11553         .mr(3)
11554         .nr(4)
11555         .kr(2)
11556         .sr(1)
11557         .m(3)
11558         .n(4)
11559         .k(k)
11560         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11561     }
11562   }
11563 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)11564   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
11565     TEST_REQUIRES_X86_AVX;
11566     for (size_t k = 16; k <= 80; k += 8) {
11567       for (uint32_t n = 1; n <= 4; n++) {
11568         for (uint32_t m = 1; m <= 3; m++) {
11569           GemmMicrokernelTester()
11570             .mr(3)
11571             .nr(4)
11572             .kr(2)
11573             .sr(1)
11574             .m(m)
11575             .n(n)
11576             .k(k)
11577             .iterations(1)
11578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11579         }
11580       }
11581     }
11582   }
11583 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)11584   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
11585     TEST_REQUIRES_X86_AVX;
11586     for (uint32_t n = 5; n < 8; n++) {
11587       for (size_t k = 1; k <= 40; k += 9) {
11588         GemmMicrokernelTester()
11589           .mr(3)
11590           .nr(4)
11591           .kr(2)
11592           .sr(1)
11593           .m(3)
11594           .n(n)
11595           .k(k)
11596           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11597       }
11598     }
11599   }
11600 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)11601   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
11602     TEST_REQUIRES_X86_AVX;
11603     for (uint32_t n = 5; n < 8; n++) {
11604       for (size_t k = 1; k <= 40; k += 9) {
11605         GemmMicrokernelTester()
11606           .mr(3)
11607           .nr(4)
11608           .kr(2)
11609           .sr(1)
11610           .m(3)
11611           .n(n)
11612           .k(k)
11613           .cn_stride(7)
11614           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11615       }
11616     }
11617   }
11618 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)11619   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
11620     TEST_REQUIRES_X86_AVX;
11621     for (uint32_t n = 5; n < 8; n++) {
11622       for (size_t k = 1; k <= 40; k += 9) {
11623         for (uint32_t m = 1; m <= 3; m++) {
11624           GemmMicrokernelTester()
11625             .mr(3)
11626             .nr(4)
11627             .kr(2)
11628             .sr(1)
11629             .m(m)
11630             .n(n)
11631             .k(k)
11632             .iterations(1)
11633             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11634         }
11635       }
11636     }
11637   }
11638 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)11639   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
11640     TEST_REQUIRES_X86_AVX;
11641     for (uint32_t n = 8; n <= 12; n += 4) {
11642       for (size_t k = 1; k <= 40; k += 9) {
11643         GemmMicrokernelTester()
11644           .mr(3)
11645           .nr(4)
11646           .kr(2)
11647           .sr(1)
11648           .m(3)
11649           .n(n)
11650           .k(k)
11651           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11652       }
11653     }
11654   }
11655 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)11656   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
11657     TEST_REQUIRES_X86_AVX;
11658     for (uint32_t n = 8; n <= 12; n += 4) {
11659       for (size_t k = 1; k <= 40; k += 9) {
11660         GemmMicrokernelTester()
11661           .mr(3)
11662           .nr(4)
11663           .kr(2)
11664           .sr(1)
11665           .m(3)
11666           .n(n)
11667           .k(k)
11668           .cn_stride(7)
11669           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11670       }
11671     }
11672   }
11673 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)11674   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
11675     TEST_REQUIRES_X86_AVX;
11676     for (uint32_t n = 8; n <= 12; n += 4) {
11677       for (size_t k = 1; k <= 40; k += 9) {
11678         for (uint32_t m = 1; m <= 3; m++) {
11679           GemmMicrokernelTester()
11680             .mr(3)
11681             .nr(4)
11682             .kr(2)
11683             .sr(1)
11684             .m(m)
11685             .n(n)
11686             .k(k)
11687             .iterations(1)
11688             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11689         }
11690       }
11691     }
11692   }
11693 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel)11694   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
11695     TEST_REQUIRES_X86_AVX;
11696     for (size_t k = 1; k <= 40; k += 9) {
11697       GemmMicrokernelTester()
11698         .mr(3)
11699         .nr(4)
11700         .kr(2)
11701         .sr(1)
11702         .m(3)
11703         .n(4)
11704         .k(k)
11705         .ks(3)
11706         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11707     }
11708   }
11709 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel_subtile)11710   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
11711     TEST_REQUIRES_X86_AVX;
11712     for (size_t k = 1; k <= 40; k += 9) {
11713       for (uint32_t n = 1; n <= 4; n++) {
11714         for (uint32_t m = 1; m <= 3; m++) {
11715           GemmMicrokernelTester()
11716             .mr(3)
11717             .nr(4)
11718             .kr(2)
11719             .sr(1)
11720             .m(m)
11721             .n(n)
11722             .k(k)
11723             .ks(3)
11724             .iterations(1)
11725             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11726         }
11727       }
11728     }
11729   }
11730 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_small_kernel)11731   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
11732     TEST_REQUIRES_X86_AVX;
11733     for (uint32_t n = 5; n < 8; n++) {
11734       for (size_t k = 1; k <= 40; k += 9) {
11735         GemmMicrokernelTester()
11736           .mr(3)
11737           .nr(4)
11738           .kr(2)
11739           .sr(1)
11740           .m(3)
11741           .n(n)
11742           .k(k)
11743           .ks(3)
11744           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11745       }
11746     }
11747   }
11748 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_small_kernel)11749   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
11750     TEST_REQUIRES_X86_AVX;
11751     for (uint32_t n = 8; n <= 12; n += 4) {
11752       for (size_t k = 1; k <= 40; k += 9) {
11753         GemmMicrokernelTester()
11754           .mr(3)
11755           .nr(4)
11756           .kr(2)
11757           .sr(1)
11758           .m(3)
11759           .n(n)
11760           .k(k)
11761           .ks(3)
11762           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11763       }
11764     }
11765   }
11766 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)11767   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
11768     TEST_REQUIRES_X86_AVX;
11769     for (size_t k = 1; k <= 40; k += 9) {
11770       for (uint32_t n = 1; n <= 4; n++) {
11771         for (uint32_t m = 1; m <= 3; m++) {
11772           GemmMicrokernelTester()
11773             .mr(3)
11774             .nr(4)
11775             .kr(2)
11776             .sr(1)
11777             .m(m)
11778             .n(n)
11779             .k(k)
11780             .cm_stride(7)
11781             .iterations(1)
11782             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11783         }
11784       }
11785     }
11786   }
11787 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,a_offset)11788   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
11789     TEST_REQUIRES_X86_AVX;
11790     for (size_t k = 1; k <= 40; k += 9) {
11791       GemmMicrokernelTester()
11792         .mr(3)
11793         .nr(4)
11794         .kr(2)
11795         .sr(1)
11796         .m(3)
11797         .n(4)
11798         .k(k)
11799         .ks(3)
11800         .a_offset(127)
11801         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11802     }
11803   }
11804 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,zero)11805   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
11806     TEST_REQUIRES_X86_AVX;
11807     for (size_t k = 1; k <= 40; k += 9) {
11808       for (uint32_t mz = 0; mz < 3; mz++) {
11809         GemmMicrokernelTester()
11810           .mr(3)
11811           .nr(4)
11812           .kr(2)
11813           .sr(1)
11814           .m(3)
11815           .n(4)
11816           .k(k)
11817           .ks(3)
11818           .a_offset(127)
11819           .zero_index(mz)
11820           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11821       }
11822     }
11823   }
11824 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)11825   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
11826     TEST_REQUIRES_X86_AVX;
11827     GemmMicrokernelTester()
11828       .mr(3)
11829       .nr(4)
11830       .kr(2)
11831       .sr(1)
11832       .m(3)
11833       .n(4)
11834       .k(8)
11835       .qmin(128)
11836       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11837   }
11838 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)11839   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
11840     TEST_REQUIRES_X86_AVX;
11841     GemmMicrokernelTester()
11842       .mr(3)
11843       .nr(4)
11844       .kr(2)
11845       .sr(1)
11846       .m(3)
11847       .n(4)
11848       .k(8)
11849       .qmax(128)
11850       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11851   }
11852 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)11853   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
11854     TEST_REQUIRES_X86_AVX;
11855     GemmMicrokernelTester()
11856       .mr(3)
11857       .nr(4)
11858       .kr(2)
11859       .sr(1)
11860       .m(3)
11861       .n(4)
11862       .k(8)
11863       .cm_stride(7)
11864       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11865   }
11866 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_a_zero_point)11867   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
11868     TEST_REQUIRES_X86_AVX;
11869     for (size_t k = 1; k <= 40; k += 9) {
11870       GemmMicrokernelTester()
11871         .mr(3)
11872         .nr(4)
11873         .kr(2)
11874         .sr(1)
11875         .m(3)
11876         .n(4)
11877         .k(k)
11878         .a_zero_point(0)
11879         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11880     }
11881   }
11882 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_b_zero_point)11883   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
11884     TEST_REQUIRES_X86_AVX;
11885     for (size_t k = 1; k <= 40; k += 9) {
11886       GemmMicrokernelTester()
11887         .mr(3)
11888         .nr(4)
11889         .kr(2)
11890         .sr(1)
11891         .m(3)
11892         .n(4)
11893         .k(k)
11894         .b_zero_point(0)
11895         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11896     }
11897   }
11898 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,no_zero_point)11899   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
11900     TEST_REQUIRES_X86_AVX;
11901     for (size_t k = 1; k <= 40; k += 9) {
11902       GemmMicrokernelTester()
11903         .mr(3)
11904         .nr(4)
11905         .kr(2)
11906         .sr(1)
11907         .m(3)
11908         .n(4)
11909         .k(k)
11910         .a_zero_point(0)
11911         .b_zero_point(0)
11912         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11913     }
11914   }
11915 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11916 
11917 
11918 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)11919   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
11920     TEST_REQUIRES_X86_XOP;
11921     GemmMicrokernelTester()
11922       .mr(4)
11923       .nr(4)
11924       .kr(2)
11925       .sr(1)
11926       .m(4)
11927       .n(4)
11928       .k(8)
11929       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11930   }
11931 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)11932   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
11933     TEST_REQUIRES_X86_XOP;
11934     GemmMicrokernelTester()
11935       .mr(4)
11936       .nr(4)
11937       .kr(2)
11938       .sr(1)
11939       .m(4)
11940       .n(4)
11941       .k(8)
11942       .cn_stride(7)
11943       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11944   }
11945 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)11946   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
11947     TEST_REQUIRES_X86_XOP;
11948     for (uint32_t n = 1; n <= 4; n++) {
11949       for (uint32_t m = 1; m <= 4; m++) {
11950         GemmMicrokernelTester()
11951           .mr(4)
11952           .nr(4)
11953           .kr(2)
11954           .sr(1)
11955           .m(m)
11956           .n(n)
11957           .k(8)
11958           .iterations(1)
11959           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11960       }
11961     }
11962   }
11963 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)11964   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
11965     TEST_REQUIRES_X86_XOP;
11966     for (uint32_t m = 1; m <= 4; m++) {
11967       GemmMicrokernelTester()
11968         .mr(4)
11969         .nr(4)
11970         .kr(2)
11971         .sr(1)
11972         .m(m)
11973         .n(4)
11974         .k(8)
11975         .iterations(1)
11976         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11977     }
11978   }
11979 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)11980   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
11981     TEST_REQUIRES_X86_XOP;
11982     for (uint32_t n = 1; n <= 4; n++) {
11983       GemmMicrokernelTester()
11984         .mr(4)
11985         .nr(4)
11986         .kr(2)
11987         .sr(1)
11988         .m(4)
11989         .n(n)
11990         .k(8)
11991         .iterations(1)
11992         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11993     }
11994   }
11995 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)11996   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
11997     TEST_REQUIRES_X86_XOP;
11998     for (size_t k = 1; k < 8; k++) {
11999       GemmMicrokernelTester()
12000         .mr(4)
12001         .nr(4)
12002         .kr(2)
12003         .sr(1)
12004         .m(4)
12005         .n(4)
12006         .k(k)
12007         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12008     }
12009   }
12010 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)12011   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
12012     TEST_REQUIRES_X86_XOP;
12013     for (size_t k = 1; k < 8; k++) {
12014       for (uint32_t n = 1; n <= 4; n++) {
12015         for (uint32_t m = 1; m <= 4; m++) {
12016           GemmMicrokernelTester()
12017             .mr(4)
12018             .nr(4)
12019             .kr(2)
12020             .sr(1)
12021             .m(m)
12022             .n(n)
12023             .k(k)
12024             .iterations(1)
12025             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12026         }
12027       }
12028     }
12029   }
12030 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)12031   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
12032     TEST_REQUIRES_X86_XOP;
12033     for (size_t k = 9; k < 16; k++) {
12034       GemmMicrokernelTester()
12035         .mr(4)
12036         .nr(4)
12037         .kr(2)
12038         .sr(1)
12039         .m(4)
12040         .n(4)
12041         .k(k)
12042         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12043     }
12044   }
12045 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)12046   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
12047     TEST_REQUIRES_X86_XOP;
12048     for (size_t k = 9; k < 16; k++) {
12049       for (uint32_t n = 1; n <= 4; n++) {
12050         for (uint32_t m = 1; m <= 4; m++) {
12051           GemmMicrokernelTester()
12052             .mr(4)
12053             .nr(4)
12054             .kr(2)
12055             .sr(1)
12056             .m(m)
12057             .n(n)
12058             .k(k)
12059             .iterations(1)
12060             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12061         }
12062       }
12063     }
12064   }
12065 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)12066   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
12067     TEST_REQUIRES_X86_XOP;
12068     for (size_t k = 16; k <= 80; k += 8) {
12069       GemmMicrokernelTester()
12070         .mr(4)
12071         .nr(4)
12072         .kr(2)
12073         .sr(1)
12074         .m(4)
12075         .n(4)
12076         .k(k)
12077         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12078     }
12079   }
12080 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)12081   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
12082     TEST_REQUIRES_X86_XOP;
12083     for (size_t k = 16; k <= 80; k += 8) {
12084       for (uint32_t n = 1; n <= 4; n++) {
12085         for (uint32_t m = 1; m <= 4; m++) {
12086           GemmMicrokernelTester()
12087             .mr(4)
12088             .nr(4)
12089             .kr(2)
12090             .sr(1)
12091             .m(m)
12092             .n(n)
12093             .k(k)
12094             .iterations(1)
12095             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12096         }
12097       }
12098     }
12099   }
12100 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)12101   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
12102     TEST_REQUIRES_X86_XOP;
12103     for (uint32_t n = 5; n < 8; n++) {
12104       for (size_t k = 1; k <= 40; k += 9) {
12105         GemmMicrokernelTester()
12106           .mr(4)
12107           .nr(4)
12108           .kr(2)
12109           .sr(1)
12110           .m(4)
12111           .n(n)
12112           .k(k)
12113           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12114       }
12115     }
12116   }
12117 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)12118   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
12119     TEST_REQUIRES_X86_XOP;
12120     for (uint32_t n = 5; n < 8; n++) {
12121       for (size_t k = 1; k <= 40; k += 9) {
12122         GemmMicrokernelTester()
12123           .mr(4)
12124           .nr(4)
12125           .kr(2)
12126           .sr(1)
12127           .m(4)
12128           .n(n)
12129           .k(k)
12130           .cn_stride(7)
12131           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12132       }
12133     }
12134   }
12135 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)12136   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
12137     TEST_REQUIRES_X86_XOP;
12138     for (uint32_t n = 5; n < 8; n++) {
12139       for (size_t k = 1; k <= 40; k += 9) {
12140         for (uint32_t m = 1; m <= 4; m++) {
12141           GemmMicrokernelTester()
12142             .mr(4)
12143             .nr(4)
12144             .kr(2)
12145             .sr(1)
12146             .m(m)
12147             .n(n)
12148             .k(k)
12149             .iterations(1)
12150             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12151         }
12152       }
12153     }
12154   }
12155 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)12156   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
12157     TEST_REQUIRES_X86_XOP;
12158     for (uint32_t n = 8; n <= 12; n += 4) {
12159       for (size_t k = 1; k <= 40; k += 9) {
12160         GemmMicrokernelTester()
12161           .mr(4)
12162           .nr(4)
12163           .kr(2)
12164           .sr(1)
12165           .m(4)
12166           .n(n)
12167           .k(k)
12168           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12169       }
12170     }
12171   }
12172 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)12173   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
12174     TEST_REQUIRES_X86_XOP;
12175     for (uint32_t n = 8; n <= 12; n += 4) {
12176       for (size_t k = 1; k <= 40; k += 9) {
12177         GemmMicrokernelTester()
12178           .mr(4)
12179           .nr(4)
12180           .kr(2)
12181           .sr(1)
12182           .m(4)
12183           .n(n)
12184           .k(k)
12185           .cn_stride(7)
12186           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12187       }
12188     }
12189   }
12190 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)12191   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
12192     TEST_REQUIRES_X86_XOP;
12193     for (uint32_t n = 8; n <= 12; n += 4) {
12194       for (size_t k = 1; k <= 40; k += 9) {
12195         for (uint32_t m = 1; m <= 4; m++) {
12196           GemmMicrokernelTester()
12197             .mr(4)
12198             .nr(4)
12199             .kr(2)
12200             .sr(1)
12201             .m(m)
12202             .n(n)
12203             .k(k)
12204             .iterations(1)
12205             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12206         }
12207       }
12208     }
12209   }
12210 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)12211   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
12212     TEST_REQUIRES_X86_XOP;
12213     for (size_t k = 1; k <= 40; k += 9) {
12214       GemmMicrokernelTester()
12215         .mr(4)
12216         .nr(4)
12217         .kr(2)
12218         .sr(1)
12219         .m(4)
12220         .n(4)
12221         .k(k)
12222         .ks(3)
12223         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12224     }
12225   }
12226 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)12227   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
12228     TEST_REQUIRES_X86_XOP;
12229     for (size_t k = 1; k <= 40; k += 9) {
12230       for (uint32_t n = 1; n <= 4; n++) {
12231         for (uint32_t m = 1; m <= 4; m++) {
12232           GemmMicrokernelTester()
12233             .mr(4)
12234             .nr(4)
12235             .kr(2)
12236             .sr(1)
12237             .m(m)
12238             .n(n)
12239             .k(k)
12240             .ks(3)
12241             .iterations(1)
12242             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12243         }
12244       }
12245     }
12246   }
12247 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)12248   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
12249     TEST_REQUIRES_X86_XOP;
12250     for (uint32_t n = 5; n < 8; n++) {
12251       for (size_t k = 1; k <= 40; k += 9) {
12252         GemmMicrokernelTester()
12253           .mr(4)
12254           .nr(4)
12255           .kr(2)
12256           .sr(1)
12257           .m(4)
12258           .n(n)
12259           .k(k)
12260           .ks(3)
12261           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12262       }
12263     }
12264   }
12265 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)12266   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
12267     TEST_REQUIRES_X86_XOP;
12268     for (uint32_t n = 8; n <= 12; n += 4) {
12269       for (size_t k = 1; k <= 40; k += 9) {
12270         GemmMicrokernelTester()
12271           .mr(4)
12272           .nr(4)
12273           .kr(2)
12274           .sr(1)
12275           .m(4)
12276           .n(n)
12277           .k(k)
12278           .ks(3)
12279           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12280       }
12281     }
12282   }
12283 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)12284   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
12285     TEST_REQUIRES_X86_XOP;
12286     for (size_t k = 1; k <= 40; k += 9) {
12287       for (uint32_t n = 1; n <= 4; n++) {
12288         for (uint32_t m = 1; m <= 4; m++) {
12289           GemmMicrokernelTester()
12290             .mr(4)
12291             .nr(4)
12292             .kr(2)
12293             .sr(1)
12294             .m(m)
12295             .n(n)
12296             .k(k)
12297             .cm_stride(7)
12298             .iterations(1)
12299             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12300         }
12301       }
12302     }
12303   }
12304 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)12305   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
12306     TEST_REQUIRES_X86_XOP;
12307     for (size_t k = 1; k <= 40; k += 9) {
12308       GemmMicrokernelTester()
12309         .mr(4)
12310         .nr(4)
12311         .kr(2)
12312         .sr(1)
12313         .m(4)
12314         .n(4)
12315         .k(k)
12316         .ks(3)
12317         .a_offset(163)
12318         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12319     }
12320   }
12321 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)12322   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
12323     TEST_REQUIRES_X86_XOP;
12324     for (size_t k = 1; k <= 40; k += 9) {
12325       for (uint32_t mz = 0; mz < 4; mz++) {
12326         GemmMicrokernelTester()
12327           .mr(4)
12328           .nr(4)
12329           .kr(2)
12330           .sr(1)
12331           .m(4)
12332           .n(4)
12333           .k(k)
12334           .ks(3)
12335           .a_offset(163)
12336           .zero_index(mz)
12337           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12338       }
12339     }
12340   }
12341 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)12342   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
12343     TEST_REQUIRES_X86_XOP;
12344     GemmMicrokernelTester()
12345       .mr(4)
12346       .nr(4)
12347       .kr(2)
12348       .sr(1)
12349       .m(4)
12350       .n(4)
12351       .k(8)
12352       .qmin(128)
12353       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12354   }
12355 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)12356   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
12357     TEST_REQUIRES_X86_XOP;
12358     GemmMicrokernelTester()
12359       .mr(4)
12360       .nr(4)
12361       .kr(2)
12362       .sr(1)
12363       .m(4)
12364       .n(4)
12365       .k(8)
12366       .qmax(128)
12367       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12368   }
12369 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)12370   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
12371     TEST_REQUIRES_X86_XOP;
12372     GemmMicrokernelTester()
12373       .mr(4)
12374       .nr(4)
12375       .kr(2)
12376       .sr(1)
12377       .m(4)
12378       .n(4)
12379       .k(8)
12380       .cm_stride(7)
12381       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12382   }
12383 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_a_zero_point)12384   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
12385     TEST_REQUIRES_X86_XOP;
12386     for (size_t k = 1; k <= 40; k += 9) {
12387       GemmMicrokernelTester()
12388         .mr(4)
12389         .nr(4)
12390         .kr(2)
12391         .sr(1)
12392         .m(4)
12393         .n(4)
12394         .k(k)
12395         .a_zero_point(0)
12396         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12397     }
12398   }
12399 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_b_zero_point)12400   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
12401     TEST_REQUIRES_X86_XOP;
12402     for (size_t k = 1; k <= 40; k += 9) {
12403       GemmMicrokernelTester()
12404         .mr(4)
12405         .nr(4)
12406         .kr(2)
12407         .sr(1)
12408         .m(4)
12409         .n(4)
12410         .k(k)
12411         .b_zero_point(0)
12412         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12413     }
12414   }
12415 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,no_zero_point)12416   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
12417     TEST_REQUIRES_X86_XOP;
12418     for (size_t k = 1; k <= 40; k += 9) {
12419       GemmMicrokernelTester()
12420         .mr(4)
12421         .nr(4)
12422         .kr(2)
12423         .sr(1)
12424         .m(4)
12425         .n(4)
12426         .k(k)
12427         .a_zero_point(0)
12428         .b_zero_point(0)
12429         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12430     }
12431   }
12432 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12433 
12434 
12435 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)12436   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
12437     TEST_REQUIRES_X86_SSE41;
12438     GemmMicrokernelTester()
12439       .mr(2)
12440       .nr(4)
12441       .kr(2)
12442       .sr(4)
12443       .m(2)
12444       .n(4)
12445       .k(8)
12446       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12447   }
12448 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)12449   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
12450     TEST_REQUIRES_X86_SSE41;
12451     GemmMicrokernelTester()
12452       .mr(2)
12453       .nr(4)
12454       .kr(2)
12455       .sr(4)
12456       .m(2)
12457       .n(4)
12458       .k(8)
12459       .cn_stride(7)
12460       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12461   }
12462 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)12463   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
12464     TEST_REQUIRES_X86_SSE41;
12465     for (uint32_t n = 1; n <= 4; n++) {
12466       for (uint32_t m = 1; m <= 2; m++) {
12467         GemmMicrokernelTester()
12468           .mr(2)
12469           .nr(4)
12470           .kr(2)
12471           .sr(4)
12472           .m(m)
12473           .n(n)
12474           .k(8)
12475           .iterations(1)
12476           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12477       }
12478     }
12479   }
12480 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)12481   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
12482     TEST_REQUIRES_X86_SSE41;
12483     for (uint32_t m = 1; m <= 2; m++) {
12484       GemmMicrokernelTester()
12485         .mr(2)
12486         .nr(4)
12487         .kr(2)
12488         .sr(4)
12489         .m(m)
12490         .n(4)
12491         .k(8)
12492         .iterations(1)
12493         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12494     }
12495   }
12496 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)12497   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
12498     TEST_REQUIRES_X86_SSE41;
12499     for (uint32_t n = 1; n <= 4; n++) {
12500       GemmMicrokernelTester()
12501         .mr(2)
12502         .nr(4)
12503         .kr(2)
12504         .sr(4)
12505         .m(2)
12506         .n(n)
12507         .k(8)
12508         .iterations(1)
12509         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12510     }
12511   }
12512 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)12513   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
12514     TEST_REQUIRES_X86_SSE41;
12515     for (size_t k = 1; k < 8; k++) {
12516       GemmMicrokernelTester()
12517         .mr(2)
12518         .nr(4)
12519         .kr(2)
12520         .sr(4)
12521         .m(2)
12522         .n(4)
12523         .k(k)
12524         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12525     }
12526   }
12527 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)12528   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
12529     TEST_REQUIRES_X86_SSE41;
12530     for (size_t k = 1; k < 8; k++) {
12531       for (uint32_t n = 1; n <= 4; n++) {
12532         for (uint32_t m = 1; m <= 2; m++) {
12533           GemmMicrokernelTester()
12534             .mr(2)
12535             .nr(4)
12536             .kr(2)
12537             .sr(4)
12538             .m(m)
12539             .n(n)
12540             .k(k)
12541             .iterations(1)
12542             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12543         }
12544       }
12545     }
12546   }
12547 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)12548   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
12549     TEST_REQUIRES_X86_SSE41;
12550     for (size_t k = 9; k < 16; k++) {
12551       GemmMicrokernelTester()
12552         .mr(2)
12553         .nr(4)
12554         .kr(2)
12555         .sr(4)
12556         .m(2)
12557         .n(4)
12558         .k(k)
12559         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12560     }
12561   }
12562 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)12563   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
12564     TEST_REQUIRES_X86_SSE41;
12565     for (size_t k = 9; k < 16; k++) {
12566       for (uint32_t n = 1; n <= 4; n++) {
12567         for (uint32_t m = 1; m <= 2; m++) {
12568           GemmMicrokernelTester()
12569             .mr(2)
12570             .nr(4)
12571             .kr(2)
12572             .sr(4)
12573             .m(m)
12574             .n(n)
12575             .k(k)
12576             .iterations(1)
12577             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12578         }
12579       }
12580     }
12581   }
12582 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)12583   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
12584     TEST_REQUIRES_X86_SSE41;
12585     for (size_t k = 16; k <= 80; k += 8) {
12586       GemmMicrokernelTester()
12587         .mr(2)
12588         .nr(4)
12589         .kr(2)
12590         .sr(4)
12591         .m(2)
12592         .n(4)
12593         .k(k)
12594         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12595     }
12596   }
12597 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)12598   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
12599     TEST_REQUIRES_X86_SSE41;
12600     for (size_t k = 16; k <= 80; k += 8) {
12601       for (uint32_t n = 1; n <= 4; n++) {
12602         for (uint32_t m = 1; m <= 2; m++) {
12603           GemmMicrokernelTester()
12604             .mr(2)
12605             .nr(4)
12606             .kr(2)
12607             .sr(4)
12608             .m(m)
12609             .n(n)
12610             .k(k)
12611             .iterations(1)
12612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12613         }
12614       }
12615     }
12616   }
12617 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)12618   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
12619     TEST_REQUIRES_X86_SSE41;
12620     for (uint32_t n = 5; n < 8; n++) {
12621       for (size_t k = 1; k <= 40; k += 9) {
12622         GemmMicrokernelTester()
12623           .mr(2)
12624           .nr(4)
12625           .kr(2)
12626           .sr(4)
12627           .m(2)
12628           .n(n)
12629           .k(k)
12630           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12631       }
12632     }
12633   }
12634 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)12635   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
12636     TEST_REQUIRES_X86_SSE41;
12637     for (uint32_t n = 5; n < 8; n++) {
12638       for (size_t k = 1; k <= 40; k += 9) {
12639         GemmMicrokernelTester()
12640           .mr(2)
12641           .nr(4)
12642           .kr(2)
12643           .sr(4)
12644           .m(2)
12645           .n(n)
12646           .k(k)
12647           .cn_stride(7)
12648           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12649       }
12650     }
12651   }
12652 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)12653   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
12654     TEST_REQUIRES_X86_SSE41;
12655     for (uint32_t n = 5; n < 8; n++) {
12656       for (size_t k = 1; k <= 40; k += 9) {
12657         for (uint32_t m = 1; m <= 2; m++) {
12658           GemmMicrokernelTester()
12659             .mr(2)
12660             .nr(4)
12661             .kr(2)
12662             .sr(4)
12663             .m(m)
12664             .n(n)
12665             .k(k)
12666             .iterations(1)
12667             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12668         }
12669       }
12670     }
12671   }
12672 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)12673   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
12674     TEST_REQUIRES_X86_SSE41;
12675     for (uint32_t n = 8; n <= 12; n += 4) {
12676       for (size_t k = 1; k <= 40; k += 9) {
12677         GemmMicrokernelTester()
12678           .mr(2)
12679           .nr(4)
12680           .kr(2)
12681           .sr(4)
12682           .m(2)
12683           .n(n)
12684           .k(k)
12685           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12686       }
12687     }
12688   }
12689 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)12690   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
12691     TEST_REQUIRES_X86_SSE41;
12692     for (uint32_t n = 8; n <= 12; n += 4) {
12693       for (size_t k = 1; k <= 40; k += 9) {
12694         GemmMicrokernelTester()
12695           .mr(2)
12696           .nr(4)
12697           .kr(2)
12698           .sr(4)
12699           .m(2)
12700           .n(n)
12701           .k(k)
12702           .cn_stride(7)
12703           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12704       }
12705     }
12706   }
12707 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)12708   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
12709     TEST_REQUIRES_X86_SSE41;
12710     for (uint32_t n = 8; n <= 12; n += 4) {
12711       for (size_t k = 1; k <= 40; k += 9) {
12712         for (uint32_t m = 1; m <= 2; m++) {
12713           GemmMicrokernelTester()
12714             .mr(2)
12715             .nr(4)
12716             .kr(2)
12717             .sr(4)
12718             .m(m)
12719             .n(n)
12720             .k(k)
12721             .iterations(1)
12722             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12723         }
12724       }
12725     }
12726   }
12727 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel)12728   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel) {
12729     TEST_REQUIRES_X86_SSE41;
12730     for (size_t k = 1; k <= 40; k += 9) {
12731       GemmMicrokernelTester()
12732         .mr(2)
12733         .nr(4)
12734         .kr(2)
12735         .sr(4)
12736         .m(2)
12737         .n(4)
12738         .k(k)
12739         .ks(3)
12740         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12741     }
12742   }
12743 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel_subtile)12744   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel_subtile) {
12745     TEST_REQUIRES_X86_SSE41;
12746     for (size_t k = 1; k <= 40; k += 9) {
12747       for (uint32_t n = 1; n <= 4; n++) {
12748         for (uint32_t m = 1; m <= 2; m++) {
12749           GemmMicrokernelTester()
12750             .mr(2)
12751             .nr(4)
12752             .kr(2)
12753             .sr(4)
12754             .m(m)
12755             .n(n)
12756             .k(k)
12757             .ks(3)
12758             .iterations(1)
12759             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12760         }
12761       }
12762     }
12763   }
12764 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_small_kernel)12765   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
12766     TEST_REQUIRES_X86_SSE41;
12767     for (uint32_t n = 5; n < 8; n++) {
12768       for (size_t k = 1; k <= 40; k += 9) {
12769         GemmMicrokernelTester()
12770           .mr(2)
12771           .nr(4)
12772           .kr(2)
12773           .sr(4)
12774           .m(2)
12775           .n(n)
12776           .k(k)
12777           .ks(3)
12778           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12779       }
12780     }
12781   }
12782 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_small_kernel)12783   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
12784     TEST_REQUIRES_X86_SSE41;
12785     for (uint32_t n = 8; n <= 12; n += 4) {
12786       for (size_t k = 1; k <= 40; k += 9) {
12787         GemmMicrokernelTester()
12788           .mr(2)
12789           .nr(4)
12790           .kr(2)
12791           .sr(4)
12792           .m(2)
12793           .n(n)
12794           .k(k)
12795           .ks(3)
12796           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12797       }
12798     }
12799   }
12800 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)12801   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
12802     TEST_REQUIRES_X86_SSE41;
12803     for (size_t k = 1; k <= 40; k += 9) {
12804       for (uint32_t n = 1; n <= 4; n++) {
12805         for (uint32_t m = 1; m <= 2; m++) {
12806           GemmMicrokernelTester()
12807             .mr(2)
12808             .nr(4)
12809             .kr(2)
12810             .sr(4)
12811             .m(m)
12812             .n(n)
12813             .k(k)
12814             .cm_stride(7)
12815             .iterations(1)
12816             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12817         }
12818       }
12819     }
12820   }
12821 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,a_offset)12822   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, a_offset) {
12823     TEST_REQUIRES_X86_SSE41;
12824     for (size_t k = 1; k <= 40; k += 9) {
12825       GemmMicrokernelTester()
12826         .mr(2)
12827         .nr(4)
12828         .kr(2)
12829         .sr(4)
12830         .m(2)
12831         .n(4)
12832         .k(k)
12833         .ks(3)
12834         .a_offset(83)
12835         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12836     }
12837   }
12838 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,zero)12839   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, zero) {
12840     TEST_REQUIRES_X86_SSE41;
12841     for (size_t k = 1; k <= 40; k += 9) {
12842       for (uint32_t mz = 0; mz < 2; mz++) {
12843         GemmMicrokernelTester()
12844           .mr(2)
12845           .nr(4)
12846           .kr(2)
12847           .sr(4)
12848           .m(2)
12849           .n(4)
12850           .k(k)
12851           .ks(3)
12852           .a_offset(83)
12853           .zero_index(mz)
12854           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12855       }
12856     }
12857   }
12858 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)12859   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
12860     TEST_REQUIRES_X86_SSE41;
12861     GemmMicrokernelTester()
12862       .mr(2)
12863       .nr(4)
12864       .kr(2)
12865       .sr(4)
12866       .m(2)
12867       .n(4)
12868       .k(8)
12869       .qmin(128)
12870       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12871   }
12872 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)12873   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
12874     TEST_REQUIRES_X86_SSE41;
12875     GemmMicrokernelTester()
12876       .mr(2)
12877       .nr(4)
12878       .kr(2)
12879       .sr(4)
12880       .m(2)
12881       .n(4)
12882       .k(8)
12883       .qmax(128)
12884       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12885   }
12886 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)12887   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
12888     TEST_REQUIRES_X86_SSE41;
12889     GemmMicrokernelTester()
12890       .mr(2)
12891       .nr(4)
12892       .kr(2)
12893       .sr(4)
12894       .m(2)
12895       .n(4)
12896       .k(8)
12897       .cm_stride(7)
12898       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12899   }
12900 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_a_zero_point)12901   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_a_zero_point) {
12902     TEST_REQUIRES_X86_SSE41;
12903     for (size_t k = 1; k <= 40; k += 9) {
12904       GemmMicrokernelTester()
12905         .mr(2)
12906         .nr(4)
12907         .kr(2)
12908         .sr(4)
12909         .m(2)
12910         .n(4)
12911         .k(k)
12912         .a_zero_point(0)
12913         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12914     }
12915   }
12916 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_b_zero_point)12917   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_b_zero_point) {
12918     TEST_REQUIRES_X86_SSE41;
12919     for (size_t k = 1; k <= 40; k += 9) {
12920       GemmMicrokernelTester()
12921         .mr(2)
12922         .nr(4)
12923         .kr(2)
12924         .sr(4)
12925         .m(2)
12926         .n(4)
12927         .k(k)
12928         .b_zero_point(0)
12929         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12930     }
12931   }
12932 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,no_zero_point)12933   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, no_zero_point) {
12934     TEST_REQUIRES_X86_SSE41;
12935     for (size_t k = 1; k <= 40; k += 9) {
12936       GemmMicrokernelTester()
12937         .mr(2)
12938         .nr(4)
12939         .kr(2)
12940         .sr(4)
12941         .m(2)
12942         .n(4)
12943         .k(k)
12944         .a_zero_point(0)
12945         .b_zero_point(0)
12946         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12947     }
12948   }
12949 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12950 
12951 
12952 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)12953   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
12954     TEST_REQUIRES_X86_SSE2;
12955     GemmMicrokernelTester()
12956       .mr(3)
12957       .nr(4)
12958       .kr(2)
12959       .sr(4)
12960       .m(3)
12961       .n(4)
12962       .k(8)
12963       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12964   }
12965 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)12966   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
12967     TEST_REQUIRES_X86_SSE2;
12968     GemmMicrokernelTester()
12969       .mr(3)
12970       .nr(4)
12971       .kr(2)
12972       .sr(4)
12973       .m(3)
12974       .n(4)
12975       .k(8)
12976       .cn_stride(7)
12977       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12978   }
12979 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)12980   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
12981     TEST_REQUIRES_X86_SSE2;
12982     for (uint32_t n = 1; n <= 4; n++) {
12983       for (uint32_t m = 1; m <= 3; m++) {
12984         GemmMicrokernelTester()
12985           .mr(3)
12986           .nr(4)
12987           .kr(2)
12988           .sr(4)
12989           .m(m)
12990           .n(n)
12991           .k(8)
12992           .iterations(1)
12993           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12994       }
12995     }
12996   }
12997 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)12998   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
12999     TEST_REQUIRES_X86_SSE2;
13000     for (uint32_t m = 1; m <= 3; m++) {
13001       GemmMicrokernelTester()
13002         .mr(3)
13003         .nr(4)
13004         .kr(2)
13005         .sr(4)
13006         .m(m)
13007         .n(4)
13008         .k(8)
13009         .iterations(1)
13010         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13011     }
13012   }
13013 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)13014   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
13015     TEST_REQUIRES_X86_SSE2;
13016     for (uint32_t n = 1; n <= 4; n++) {
13017       GemmMicrokernelTester()
13018         .mr(3)
13019         .nr(4)
13020         .kr(2)
13021         .sr(4)
13022         .m(3)
13023         .n(n)
13024         .k(8)
13025         .iterations(1)
13026         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13027     }
13028   }
13029 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)13030   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
13031     TEST_REQUIRES_X86_SSE2;
13032     for (size_t k = 1; k < 8; k++) {
13033       GemmMicrokernelTester()
13034         .mr(3)
13035         .nr(4)
13036         .kr(2)
13037         .sr(4)
13038         .m(3)
13039         .n(4)
13040         .k(k)
13041         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13042     }
13043   }
13044 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)13045   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
13046     TEST_REQUIRES_X86_SSE2;
13047     for (size_t k = 1; k < 8; k++) {
13048       for (uint32_t n = 1; n <= 4; n++) {
13049         for (uint32_t m = 1; m <= 3; m++) {
13050           GemmMicrokernelTester()
13051             .mr(3)
13052             .nr(4)
13053             .kr(2)
13054             .sr(4)
13055             .m(m)
13056             .n(n)
13057             .k(k)
13058             .iterations(1)
13059             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13060         }
13061       }
13062     }
13063   }
13064 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)13065   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
13066     TEST_REQUIRES_X86_SSE2;
13067     for (size_t k = 9; k < 16; k++) {
13068       GemmMicrokernelTester()
13069         .mr(3)
13070         .nr(4)
13071         .kr(2)
13072         .sr(4)
13073         .m(3)
13074         .n(4)
13075         .k(k)
13076         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13077     }
13078   }
13079 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)13080   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
13081     TEST_REQUIRES_X86_SSE2;
13082     for (size_t k = 9; k < 16; k++) {
13083       for (uint32_t n = 1; n <= 4; n++) {
13084         for (uint32_t m = 1; m <= 3; m++) {
13085           GemmMicrokernelTester()
13086             .mr(3)
13087             .nr(4)
13088             .kr(2)
13089             .sr(4)
13090             .m(m)
13091             .n(n)
13092             .k(k)
13093             .iterations(1)
13094             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13095         }
13096       }
13097     }
13098   }
13099 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)13100   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
13101     TEST_REQUIRES_X86_SSE2;
13102     for (size_t k = 16; k <= 80; k += 8) {
13103       GemmMicrokernelTester()
13104         .mr(3)
13105         .nr(4)
13106         .kr(2)
13107         .sr(4)
13108         .m(3)
13109         .n(4)
13110         .k(k)
13111         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13112     }
13113   }
13114 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)13115   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
13116     TEST_REQUIRES_X86_SSE2;
13117     for (size_t k = 16; k <= 80; k += 8) {
13118       for (uint32_t n = 1; n <= 4; n++) {
13119         for (uint32_t m = 1; m <= 3; m++) {
13120           GemmMicrokernelTester()
13121             .mr(3)
13122             .nr(4)
13123             .kr(2)
13124             .sr(4)
13125             .m(m)
13126             .n(n)
13127             .k(k)
13128             .iterations(1)
13129             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13130         }
13131       }
13132     }
13133   }
13134 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)13135   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
13136     TEST_REQUIRES_X86_SSE2;
13137     for (uint32_t n = 5; n < 8; n++) {
13138       for (size_t k = 1; k <= 40; k += 9) {
13139         GemmMicrokernelTester()
13140           .mr(3)
13141           .nr(4)
13142           .kr(2)
13143           .sr(4)
13144           .m(3)
13145           .n(n)
13146           .k(k)
13147           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13148       }
13149     }
13150   }
13151 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)13152   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
13153     TEST_REQUIRES_X86_SSE2;
13154     for (uint32_t n = 5; n < 8; n++) {
13155       for (size_t k = 1; k <= 40; k += 9) {
13156         GemmMicrokernelTester()
13157           .mr(3)
13158           .nr(4)
13159           .kr(2)
13160           .sr(4)
13161           .m(3)
13162           .n(n)
13163           .k(k)
13164           .cn_stride(7)
13165           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13166       }
13167     }
13168   }
13169 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)13170   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
13171     TEST_REQUIRES_X86_SSE2;
13172     for (uint32_t n = 5; n < 8; n++) {
13173       for (size_t k = 1; k <= 40; k += 9) {
13174         for (uint32_t m = 1; m <= 3; m++) {
13175           GemmMicrokernelTester()
13176             .mr(3)
13177             .nr(4)
13178             .kr(2)
13179             .sr(4)
13180             .m(m)
13181             .n(n)
13182             .k(k)
13183             .iterations(1)
13184             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13185         }
13186       }
13187     }
13188   }
13189 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)13190   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
13191     TEST_REQUIRES_X86_SSE2;
13192     for (uint32_t n = 8; n <= 12; n += 4) {
13193       for (size_t k = 1; k <= 40; k += 9) {
13194         GemmMicrokernelTester()
13195           .mr(3)
13196           .nr(4)
13197           .kr(2)
13198           .sr(4)
13199           .m(3)
13200           .n(n)
13201           .k(k)
13202           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13203       }
13204     }
13205   }
13206 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)13207   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
13208     TEST_REQUIRES_X86_SSE2;
13209     for (uint32_t n = 8; n <= 12; n += 4) {
13210       for (size_t k = 1; k <= 40; k += 9) {
13211         GemmMicrokernelTester()
13212           .mr(3)
13213           .nr(4)
13214           .kr(2)
13215           .sr(4)
13216           .m(3)
13217           .n(n)
13218           .k(k)
13219           .cn_stride(7)
13220           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13221       }
13222     }
13223   }
13224 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)13225   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
13226     TEST_REQUIRES_X86_SSE2;
13227     for (uint32_t n = 8; n <= 12; n += 4) {
13228       for (size_t k = 1; k <= 40; k += 9) {
13229         for (uint32_t m = 1; m <= 3; m++) {
13230           GemmMicrokernelTester()
13231             .mr(3)
13232             .nr(4)
13233             .kr(2)
13234             .sr(4)
13235             .m(m)
13236             .n(n)
13237             .k(k)
13238             .iterations(1)
13239             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13240         }
13241       }
13242     }
13243   }
13244 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel)13245   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel) {
13246     TEST_REQUIRES_X86_SSE2;
13247     for (size_t k = 1; k <= 40; k += 9) {
13248       GemmMicrokernelTester()
13249         .mr(3)
13250         .nr(4)
13251         .kr(2)
13252         .sr(4)
13253         .m(3)
13254         .n(4)
13255         .k(k)
13256         .ks(3)
13257         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13258     }
13259   }
13260 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel_subtile)13261   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel_subtile) {
13262     TEST_REQUIRES_X86_SSE2;
13263     for (size_t k = 1; k <= 40; k += 9) {
13264       for (uint32_t n = 1; n <= 4; n++) {
13265         for (uint32_t m = 1; m <= 3; m++) {
13266           GemmMicrokernelTester()
13267             .mr(3)
13268             .nr(4)
13269             .kr(2)
13270             .sr(4)
13271             .m(m)
13272             .n(n)
13273             .k(k)
13274             .ks(3)
13275             .iterations(1)
13276             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13277         }
13278       }
13279     }
13280   }
13281 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_small_kernel)13282   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
13283     TEST_REQUIRES_X86_SSE2;
13284     for (uint32_t n = 5; n < 8; n++) {
13285       for (size_t k = 1; k <= 40; k += 9) {
13286         GemmMicrokernelTester()
13287           .mr(3)
13288           .nr(4)
13289           .kr(2)
13290           .sr(4)
13291           .m(3)
13292           .n(n)
13293           .k(k)
13294           .ks(3)
13295           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13296       }
13297     }
13298   }
13299 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_small_kernel)13300   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
13301     TEST_REQUIRES_X86_SSE2;
13302     for (uint32_t n = 8; n <= 12; n += 4) {
13303       for (size_t k = 1; k <= 40; k += 9) {
13304         GemmMicrokernelTester()
13305           .mr(3)
13306           .nr(4)
13307           .kr(2)
13308           .sr(4)
13309           .m(3)
13310           .n(n)
13311           .k(k)
13312           .ks(3)
13313           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13314       }
13315     }
13316   }
13317 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)13318   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
13319     TEST_REQUIRES_X86_SSE2;
13320     for (size_t k = 1; k <= 40; k += 9) {
13321       for (uint32_t n = 1; n <= 4; n++) {
13322         for (uint32_t m = 1; m <= 3; m++) {
13323           GemmMicrokernelTester()
13324             .mr(3)
13325             .nr(4)
13326             .kr(2)
13327             .sr(4)
13328             .m(m)
13329             .n(n)
13330             .k(k)
13331             .cm_stride(7)
13332             .iterations(1)
13333             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13334         }
13335       }
13336     }
13337   }
13338 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,a_offset)13339   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, a_offset) {
13340     TEST_REQUIRES_X86_SSE2;
13341     for (size_t k = 1; k <= 40; k += 9) {
13342       GemmMicrokernelTester()
13343         .mr(3)
13344         .nr(4)
13345         .kr(2)
13346         .sr(4)
13347         .m(3)
13348         .n(4)
13349         .k(k)
13350         .ks(3)
13351         .a_offset(127)
13352         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13353     }
13354   }
13355 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,zero)13356   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, zero) {
13357     TEST_REQUIRES_X86_SSE2;
13358     for (size_t k = 1; k <= 40; k += 9) {
13359       for (uint32_t mz = 0; mz < 3; mz++) {
13360         GemmMicrokernelTester()
13361           .mr(3)
13362           .nr(4)
13363           .kr(2)
13364           .sr(4)
13365           .m(3)
13366           .n(4)
13367           .k(k)
13368           .ks(3)
13369           .a_offset(127)
13370           .zero_index(mz)
13371           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13372       }
13373     }
13374   }
13375 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)13376   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
13377     TEST_REQUIRES_X86_SSE2;
13378     GemmMicrokernelTester()
13379       .mr(3)
13380       .nr(4)
13381       .kr(2)
13382       .sr(4)
13383       .m(3)
13384       .n(4)
13385       .k(8)
13386       .qmin(128)
13387       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13388   }
13389 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)13390   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
13391     TEST_REQUIRES_X86_SSE2;
13392     GemmMicrokernelTester()
13393       .mr(3)
13394       .nr(4)
13395       .kr(2)
13396       .sr(4)
13397       .m(3)
13398       .n(4)
13399       .k(8)
13400       .qmax(128)
13401       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13402   }
13403 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)13404   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
13405     TEST_REQUIRES_X86_SSE2;
13406     GemmMicrokernelTester()
13407       .mr(3)
13408       .nr(4)
13409       .kr(2)
13410       .sr(4)
13411       .m(3)
13412       .n(4)
13413       .k(8)
13414       .cm_stride(7)
13415       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13416   }
13417 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_a_zero_point)13418   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_a_zero_point) {
13419     TEST_REQUIRES_X86_SSE2;
13420     for (size_t k = 1; k <= 40; k += 9) {
13421       GemmMicrokernelTester()
13422         .mr(3)
13423         .nr(4)
13424         .kr(2)
13425         .sr(4)
13426         .m(3)
13427         .n(4)
13428         .k(k)
13429         .a_zero_point(0)
13430         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13431     }
13432   }
13433 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_b_zero_point)13434   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_b_zero_point) {
13435     TEST_REQUIRES_X86_SSE2;
13436     for (size_t k = 1; k <= 40; k += 9) {
13437       GemmMicrokernelTester()
13438         .mr(3)
13439         .nr(4)
13440         .kr(2)
13441         .sr(4)
13442         .m(3)
13443         .n(4)
13444         .k(k)
13445         .b_zero_point(0)
13446         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13447     }
13448   }
13449 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,no_zero_point)13450   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, no_zero_point) {
13451     TEST_REQUIRES_X86_SSE2;
13452     for (size_t k = 1; k <= 40; k += 9) {
13453       GemmMicrokernelTester()
13454         .mr(3)
13455         .nr(4)
13456         .kr(2)
13457         .sr(4)
13458         .m(3)
13459         .n(4)
13460         .k(k)
13461         .a_zero_point(0)
13462         .b_zero_point(0)
13463         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13464     }
13465   }
13466 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13467 
13468 
13469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)13470   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
13471     TEST_REQUIRES_X86_SSE41;
13472     GemmMicrokernelTester()
13473       .mr(3)
13474       .nr(4)
13475       .kr(2)
13476       .sr(4)
13477       .m(3)
13478       .n(4)
13479       .k(8)
13480       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13481   }
13482 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)13483   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
13484     TEST_REQUIRES_X86_SSE41;
13485     GemmMicrokernelTester()
13486       .mr(3)
13487       .nr(4)
13488       .kr(2)
13489       .sr(4)
13490       .m(3)
13491       .n(4)
13492       .k(8)
13493       .cn_stride(7)
13494       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13495   }
13496 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)13497   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
13498     TEST_REQUIRES_X86_SSE41;
13499     for (uint32_t n = 1; n <= 4; n++) {
13500       for (uint32_t m = 1; m <= 3; m++) {
13501         GemmMicrokernelTester()
13502           .mr(3)
13503           .nr(4)
13504           .kr(2)
13505           .sr(4)
13506           .m(m)
13507           .n(n)
13508           .k(8)
13509           .iterations(1)
13510           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13511       }
13512     }
13513   }
13514 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)13515   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
13516     TEST_REQUIRES_X86_SSE41;
13517     for (uint32_t m = 1; m <= 3; m++) {
13518       GemmMicrokernelTester()
13519         .mr(3)
13520         .nr(4)
13521         .kr(2)
13522         .sr(4)
13523         .m(m)
13524         .n(4)
13525         .k(8)
13526         .iterations(1)
13527         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13528     }
13529   }
13530 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)13531   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
13532     TEST_REQUIRES_X86_SSE41;
13533     for (uint32_t n = 1; n <= 4; n++) {
13534       GemmMicrokernelTester()
13535         .mr(3)
13536         .nr(4)
13537         .kr(2)
13538         .sr(4)
13539         .m(3)
13540         .n(n)
13541         .k(8)
13542         .iterations(1)
13543         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13544     }
13545   }
13546 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)13547   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
13548     TEST_REQUIRES_X86_SSE41;
13549     for (size_t k = 1; k < 8; k++) {
13550       GemmMicrokernelTester()
13551         .mr(3)
13552         .nr(4)
13553         .kr(2)
13554         .sr(4)
13555         .m(3)
13556         .n(4)
13557         .k(k)
13558         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13559     }
13560   }
13561 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)13562   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
13563     TEST_REQUIRES_X86_SSE41;
13564     for (size_t k = 1; k < 8; k++) {
13565       for (uint32_t n = 1; n <= 4; n++) {
13566         for (uint32_t m = 1; m <= 3; m++) {
13567           GemmMicrokernelTester()
13568             .mr(3)
13569             .nr(4)
13570             .kr(2)
13571             .sr(4)
13572             .m(m)
13573             .n(n)
13574             .k(k)
13575             .iterations(1)
13576             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13577         }
13578       }
13579     }
13580   }
13581 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)13582   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
13583     TEST_REQUIRES_X86_SSE41;
13584     for (size_t k = 9; k < 16; k++) {
13585       GemmMicrokernelTester()
13586         .mr(3)
13587         .nr(4)
13588         .kr(2)
13589         .sr(4)
13590         .m(3)
13591         .n(4)
13592         .k(k)
13593         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13594     }
13595   }
13596 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)13597   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
13598     TEST_REQUIRES_X86_SSE41;
13599     for (size_t k = 9; k < 16; k++) {
13600       for (uint32_t n = 1; n <= 4; n++) {
13601         for (uint32_t m = 1; m <= 3; m++) {
13602           GemmMicrokernelTester()
13603             .mr(3)
13604             .nr(4)
13605             .kr(2)
13606             .sr(4)
13607             .m(m)
13608             .n(n)
13609             .k(k)
13610             .iterations(1)
13611             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13612         }
13613       }
13614     }
13615   }
13616 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)13617   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
13618     TEST_REQUIRES_X86_SSE41;
13619     for (size_t k = 16; k <= 80; k += 8) {
13620       GemmMicrokernelTester()
13621         .mr(3)
13622         .nr(4)
13623         .kr(2)
13624         .sr(4)
13625         .m(3)
13626         .n(4)
13627         .k(k)
13628         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13629     }
13630   }
13631 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)13632   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
13633     TEST_REQUIRES_X86_SSE41;
13634     for (size_t k = 16; k <= 80; k += 8) {
13635       for (uint32_t n = 1; n <= 4; n++) {
13636         for (uint32_t m = 1; m <= 3; m++) {
13637           GemmMicrokernelTester()
13638             .mr(3)
13639             .nr(4)
13640             .kr(2)
13641             .sr(4)
13642             .m(m)
13643             .n(n)
13644             .k(k)
13645             .iterations(1)
13646             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13647         }
13648       }
13649     }
13650   }
13651 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)13652   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
13653     TEST_REQUIRES_X86_SSE41;
13654     for (uint32_t n = 5; n < 8; n++) {
13655       for (size_t k = 1; k <= 40; k += 9) {
13656         GemmMicrokernelTester()
13657           .mr(3)
13658           .nr(4)
13659           .kr(2)
13660           .sr(4)
13661           .m(3)
13662           .n(n)
13663           .k(k)
13664           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13665       }
13666     }
13667   }
13668 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)13669   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
13670     TEST_REQUIRES_X86_SSE41;
13671     for (uint32_t n = 5; n < 8; n++) {
13672       for (size_t k = 1; k <= 40; k += 9) {
13673         GemmMicrokernelTester()
13674           .mr(3)
13675           .nr(4)
13676           .kr(2)
13677           .sr(4)
13678           .m(3)
13679           .n(n)
13680           .k(k)
13681           .cn_stride(7)
13682           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13683       }
13684     }
13685   }
13686 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)13687   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
13688     TEST_REQUIRES_X86_SSE41;
13689     for (uint32_t n = 5; n < 8; n++) {
13690       for (size_t k = 1; k <= 40; k += 9) {
13691         for (uint32_t m = 1; m <= 3; m++) {
13692           GemmMicrokernelTester()
13693             .mr(3)
13694             .nr(4)
13695             .kr(2)
13696             .sr(4)
13697             .m(m)
13698             .n(n)
13699             .k(k)
13700             .iterations(1)
13701             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13702         }
13703       }
13704     }
13705   }
13706 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)13707   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
13708     TEST_REQUIRES_X86_SSE41;
13709     for (uint32_t n = 8; n <= 12; n += 4) {
13710       for (size_t k = 1; k <= 40; k += 9) {
13711         GemmMicrokernelTester()
13712           .mr(3)
13713           .nr(4)
13714           .kr(2)
13715           .sr(4)
13716           .m(3)
13717           .n(n)
13718           .k(k)
13719           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13720       }
13721     }
13722   }
13723 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)13724   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
13725     TEST_REQUIRES_X86_SSE41;
13726     for (uint32_t n = 8; n <= 12; n += 4) {
13727       for (size_t k = 1; k <= 40; k += 9) {
13728         GemmMicrokernelTester()
13729           .mr(3)
13730           .nr(4)
13731           .kr(2)
13732           .sr(4)
13733           .m(3)
13734           .n(n)
13735           .k(k)
13736           .cn_stride(7)
13737           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13738       }
13739     }
13740   }
13741 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)13742   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
13743     TEST_REQUIRES_X86_SSE41;
13744     for (uint32_t n = 8; n <= 12; n += 4) {
13745       for (size_t k = 1; k <= 40; k += 9) {
13746         for (uint32_t m = 1; m <= 3; m++) {
13747           GemmMicrokernelTester()
13748             .mr(3)
13749             .nr(4)
13750             .kr(2)
13751             .sr(4)
13752             .m(m)
13753             .n(n)
13754             .k(k)
13755             .iterations(1)
13756             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13757         }
13758       }
13759     }
13760   }
13761 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel)13762   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel) {
13763     TEST_REQUIRES_X86_SSE41;
13764     for (size_t k = 1; k <= 40; k += 9) {
13765       GemmMicrokernelTester()
13766         .mr(3)
13767         .nr(4)
13768         .kr(2)
13769         .sr(4)
13770         .m(3)
13771         .n(4)
13772         .k(k)
13773         .ks(3)
13774         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13775     }
13776   }
13777 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel_subtile)13778   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel_subtile) {
13779     TEST_REQUIRES_X86_SSE41;
13780     for (size_t k = 1; k <= 40; k += 9) {
13781       for (uint32_t n = 1; n <= 4; n++) {
13782         for (uint32_t m = 1; m <= 3; m++) {
13783           GemmMicrokernelTester()
13784             .mr(3)
13785             .nr(4)
13786             .kr(2)
13787             .sr(4)
13788             .m(m)
13789             .n(n)
13790             .k(k)
13791             .ks(3)
13792             .iterations(1)
13793             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13794         }
13795       }
13796     }
13797   }
13798 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_small_kernel)13799   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
13800     TEST_REQUIRES_X86_SSE41;
13801     for (uint32_t n = 5; n < 8; n++) {
13802       for (size_t k = 1; k <= 40; k += 9) {
13803         GemmMicrokernelTester()
13804           .mr(3)
13805           .nr(4)
13806           .kr(2)
13807           .sr(4)
13808           .m(3)
13809           .n(n)
13810           .k(k)
13811           .ks(3)
13812           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13813       }
13814     }
13815   }
13816 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_small_kernel)13817   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
13818     TEST_REQUIRES_X86_SSE41;
13819     for (uint32_t n = 8; n <= 12; n += 4) {
13820       for (size_t k = 1; k <= 40; k += 9) {
13821         GemmMicrokernelTester()
13822           .mr(3)
13823           .nr(4)
13824           .kr(2)
13825           .sr(4)
13826           .m(3)
13827           .n(n)
13828           .k(k)
13829           .ks(3)
13830           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13831       }
13832     }
13833   }
13834 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)13835   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
13836     TEST_REQUIRES_X86_SSE41;
13837     for (size_t k = 1; k <= 40; k += 9) {
13838       for (uint32_t n = 1; n <= 4; n++) {
13839         for (uint32_t m = 1; m <= 3; m++) {
13840           GemmMicrokernelTester()
13841             .mr(3)
13842             .nr(4)
13843             .kr(2)
13844             .sr(4)
13845             .m(m)
13846             .n(n)
13847             .k(k)
13848             .cm_stride(7)
13849             .iterations(1)
13850             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13851         }
13852       }
13853     }
13854   }
13855 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,a_offset)13856   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, a_offset) {
13857     TEST_REQUIRES_X86_SSE41;
13858     for (size_t k = 1; k <= 40; k += 9) {
13859       GemmMicrokernelTester()
13860         .mr(3)
13861         .nr(4)
13862         .kr(2)
13863         .sr(4)
13864         .m(3)
13865         .n(4)
13866         .k(k)
13867         .ks(3)
13868         .a_offset(127)
13869         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13870     }
13871   }
13872 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,zero)13873   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, zero) {
13874     TEST_REQUIRES_X86_SSE41;
13875     for (size_t k = 1; k <= 40; k += 9) {
13876       for (uint32_t mz = 0; mz < 3; mz++) {
13877         GemmMicrokernelTester()
13878           .mr(3)
13879           .nr(4)
13880           .kr(2)
13881           .sr(4)
13882           .m(3)
13883           .n(4)
13884           .k(k)
13885           .ks(3)
13886           .a_offset(127)
13887           .zero_index(mz)
13888           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13889       }
13890     }
13891   }
13892 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)13893   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
13894     TEST_REQUIRES_X86_SSE41;
13895     GemmMicrokernelTester()
13896       .mr(3)
13897       .nr(4)
13898       .kr(2)
13899       .sr(4)
13900       .m(3)
13901       .n(4)
13902       .k(8)
13903       .qmin(128)
13904       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13905   }
13906 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)13907   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
13908     TEST_REQUIRES_X86_SSE41;
13909     GemmMicrokernelTester()
13910       .mr(3)
13911       .nr(4)
13912       .kr(2)
13913       .sr(4)
13914       .m(3)
13915       .n(4)
13916       .k(8)
13917       .qmax(128)
13918       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13919   }
13920 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)13921   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
13922     TEST_REQUIRES_X86_SSE41;
13923     GemmMicrokernelTester()
13924       .mr(3)
13925       .nr(4)
13926       .kr(2)
13927       .sr(4)
13928       .m(3)
13929       .n(4)
13930       .k(8)
13931       .cm_stride(7)
13932       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13933   }
13934 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_a_zero_point)13935   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_a_zero_point) {
13936     TEST_REQUIRES_X86_SSE41;
13937     for (size_t k = 1; k <= 40; k += 9) {
13938       GemmMicrokernelTester()
13939         .mr(3)
13940         .nr(4)
13941         .kr(2)
13942         .sr(4)
13943         .m(3)
13944         .n(4)
13945         .k(k)
13946         .a_zero_point(0)
13947         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13948     }
13949   }
13950 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_b_zero_point)13951   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_b_zero_point) {
13952     TEST_REQUIRES_X86_SSE41;
13953     for (size_t k = 1; k <= 40; k += 9) {
13954       GemmMicrokernelTester()
13955         .mr(3)
13956         .nr(4)
13957         .kr(2)
13958         .sr(4)
13959         .m(3)
13960         .n(4)
13961         .k(k)
13962         .b_zero_point(0)
13963         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13964     }
13965   }
13966 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,no_zero_point)13967   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, no_zero_point) {
13968     TEST_REQUIRES_X86_SSE41;
13969     for (size_t k = 1; k <= 40; k += 9) {
13970       GemmMicrokernelTester()
13971         .mr(3)
13972         .nr(4)
13973         .kr(2)
13974         .sr(4)
13975         .m(3)
13976         .n(4)
13977         .k(k)
13978         .a_zero_point(0)
13979         .b_zero_point(0)
13980         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13981     }
13982   }
13983 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13984 
13985 
13986 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)13987   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
13988     TEST_REQUIRES_X86_SSE2;
13989     GemmMicrokernelTester()
13990       .mr(4)
13991       .nr(4)
13992       .kr(2)
13993       .sr(4)
13994       .m(4)
13995       .n(4)
13996       .k(8)
13997       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13998   }
13999 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)14000   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
14001     TEST_REQUIRES_X86_SSE2;
14002     GemmMicrokernelTester()
14003       .mr(4)
14004       .nr(4)
14005       .kr(2)
14006       .sr(4)
14007       .m(4)
14008       .n(4)
14009       .k(8)
14010       .cn_stride(7)
14011       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14012   }
14013 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)14014   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
14015     TEST_REQUIRES_X86_SSE2;
14016     for (uint32_t n = 1; n <= 4; n++) {
14017       for (uint32_t m = 1; m <= 4; m++) {
14018         GemmMicrokernelTester()
14019           .mr(4)
14020           .nr(4)
14021           .kr(2)
14022           .sr(4)
14023           .m(m)
14024           .n(n)
14025           .k(8)
14026           .iterations(1)
14027           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14028       }
14029     }
14030   }
14031 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)14032   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
14033     TEST_REQUIRES_X86_SSE2;
14034     for (uint32_t m = 1; m <= 4; m++) {
14035       GemmMicrokernelTester()
14036         .mr(4)
14037         .nr(4)
14038         .kr(2)
14039         .sr(4)
14040         .m(m)
14041         .n(4)
14042         .k(8)
14043         .iterations(1)
14044         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14045     }
14046   }
14047 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)14048   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
14049     TEST_REQUIRES_X86_SSE2;
14050     for (uint32_t n = 1; n <= 4; n++) {
14051       GemmMicrokernelTester()
14052         .mr(4)
14053         .nr(4)
14054         .kr(2)
14055         .sr(4)
14056         .m(4)
14057         .n(n)
14058         .k(8)
14059         .iterations(1)
14060         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14061     }
14062   }
14063 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)14064   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
14065     TEST_REQUIRES_X86_SSE2;
14066     for (size_t k = 1; k < 8; k++) {
14067       GemmMicrokernelTester()
14068         .mr(4)
14069         .nr(4)
14070         .kr(2)
14071         .sr(4)
14072         .m(4)
14073         .n(4)
14074         .k(k)
14075         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14076     }
14077   }
14078 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)14079   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
14080     TEST_REQUIRES_X86_SSE2;
14081     for (size_t k = 1; k < 8; k++) {
14082       for (uint32_t n = 1; n <= 4; n++) {
14083         for (uint32_t m = 1; m <= 4; m++) {
14084           GemmMicrokernelTester()
14085             .mr(4)
14086             .nr(4)
14087             .kr(2)
14088             .sr(4)
14089             .m(m)
14090             .n(n)
14091             .k(k)
14092             .iterations(1)
14093             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14094         }
14095       }
14096     }
14097   }
14098 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)14099   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
14100     TEST_REQUIRES_X86_SSE2;
14101     for (size_t k = 9; k < 16; k++) {
14102       GemmMicrokernelTester()
14103         .mr(4)
14104         .nr(4)
14105         .kr(2)
14106         .sr(4)
14107         .m(4)
14108         .n(4)
14109         .k(k)
14110         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14111     }
14112   }
14113 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)14114   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
14115     TEST_REQUIRES_X86_SSE2;
14116     for (size_t k = 9; k < 16; k++) {
14117       for (uint32_t n = 1; n <= 4; n++) {
14118         for (uint32_t m = 1; m <= 4; m++) {
14119           GemmMicrokernelTester()
14120             .mr(4)
14121             .nr(4)
14122             .kr(2)
14123             .sr(4)
14124             .m(m)
14125             .n(n)
14126             .k(k)
14127             .iterations(1)
14128             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14129         }
14130       }
14131     }
14132   }
14133 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)14134   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
14135     TEST_REQUIRES_X86_SSE2;
14136     for (size_t k = 16; k <= 80; k += 8) {
14137       GemmMicrokernelTester()
14138         .mr(4)
14139         .nr(4)
14140         .kr(2)
14141         .sr(4)
14142         .m(4)
14143         .n(4)
14144         .k(k)
14145         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14146     }
14147   }
14148 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)14149   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
14150     TEST_REQUIRES_X86_SSE2;
14151     for (size_t k = 16; k <= 80; k += 8) {
14152       for (uint32_t n = 1; n <= 4; n++) {
14153         for (uint32_t m = 1; m <= 4; m++) {
14154           GemmMicrokernelTester()
14155             .mr(4)
14156             .nr(4)
14157             .kr(2)
14158             .sr(4)
14159             .m(m)
14160             .n(n)
14161             .k(k)
14162             .iterations(1)
14163             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14164         }
14165       }
14166     }
14167   }
14168 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)14169   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
14170     TEST_REQUIRES_X86_SSE2;
14171     for (uint32_t n = 5; n < 8; n++) {
14172       for (size_t k = 1; k <= 40; k += 9) {
14173         GemmMicrokernelTester()
14174           .mr(4)
14175           .nr(4)
14176           .kr(2)
14177           .sr(4)
14178           .m(4)
14179           .n(n)
14180           .k(k)
14181           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14182       }
14183     }
14184   }
14185 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)14186   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
14187     TEST_REQUIRES_X86_SSE2;
14188     for (uint32_t n = 5; n < 8; n++) {
14189       for (size_t k = 1; k <= 40; k += 9) {
14190         GemmMicrokernelTester()
14191           .mr(4)
14192           .nr(4)
14193           .kr(2)
14194           .sr(4)
14195           .m(4)
14196           .n(n)
14197           .k(k)
14198           .cn_stride(7)
14199           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14200       }
14201     }
14202   }
14203 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)14204   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
14205     TEST_REQUIRES_X86_SSE2;
14206     for (uint32_t n = 5; n < 8; n++) {
14207       for (size_t k = 1; k <= 40; k += 9) {
14208         for (uint32_t m = 1; m <= 4; m++) {
14209           GemmMicrokernelTester()
14210             .mr(4)
14211             .nr(4)
14212             .kr(2)
14213             .sr(4)
14214             .m(m)
14215             .n(n)
14216             .k(k)
14217             .iterations(1)
14218             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14219         }
14220       }
14221     }
14222   }
14223 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)14224   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
14225     TEST_REQUIRES_X86_SSE2;
14226     for (uint32_t n = 8; n <= 12; n += 4) {
14227       for (size_t k = 1; k <= 40; k += 9) {
14228         GemmMicrokernelTester()
14229           .mr(4)
14230           .nr(4)
14231           .kr(2)
14232           .sr(4)
14233           .m(4)
14234           .n(n)
14235           .k(k)
14236           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14237       }
14238     }
14239   }
14240 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)14241   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
14242     TEST_REQUIRES_X86_SSE2;
14243     for (uint32_t n = 8; n <= 12; n += 4) {
14244       for (size_t k = 1; k <= 40; k += 9) {
14245         GemmMicrokernelTester()
14246           .mr(4)
14247           .nr(4)
14248           .kr(2)
14249           .sr(4)
14250           .m(4)
14251           .n(n)
14252           .k(k)
14253           .cn_stride(7)
14254           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14255       }
14256     }
14257   }
14258 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)14259   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
14260     TEST_REQUIRES_X86_SSE2;
14261     for (uint32_t n = 8; n <= 12; n += 4) {
14262       for (size_t k = 1; k <= 40; k += 9) {
14263         for (uint32_t m = 1; m <= 4; m++) {
14264           GemmMicrokernelTester()
14265             .mr(4)
14266             .nr(4)
14267             .kr(2)
14268             .sr(4)
14269             .m(m)
14270             .n(n)
14271             .k(k)
14272             .iterations(1)
14273             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14274         }
14275       }
14276     }
14277   }
14278 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel)14279   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel) {
14280     TEST_REQUIRES_X86_SSE2;
14281     for (size_t k = 1; k <= 40; k += 9) {
14282       GemmMicrokernelTester()
14283         .mr(4)
14284         .nr(4)
14285         .kr(2)
14286         .sr(4)
14287         .m(4)
14288         .n(4)
14289         .k(k)
14290         .ks(3)
14291         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14292     }
14293   }
14294 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel_subtile)14295   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel_subtile) {
14296     TEST_REQUIRES_X86_SSE2;
14297     for (size_t k = 1; k <= 40; k += 9) {
14298       for (uint32_t n = 1; n <= 4; n++) {
14299         for (uint32_t m = 1; m <= 4; m++) {
14300           GemmMicrokernelTester()
14301             .mr(4)
14302             .nr(4)
14303             .kr(2)
14304             .sr(4)
14305             .m(m)
14306             .n(n)
14307             .k(k)
14308             .ks(3)
14309             .iterations(1)
14310             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14311         }
14312       }
14313     }
14314   }
14315 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_small_kernel)14316   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
14317     TEST_REQUIRES_X86_SSE2;
14318     for (uint32_t n = 5; n < 8; n++) {
14319       for (size_t k = 1; k <= 40; k += 9) {
14320         GemmMicrokernelTester()
14321           .mr(4)
14322           .nr(4)
14323           .kr(2)
14324           .sr(4)
14325           .m(4)
14326           .n(n)
14327           .k(k)
14328           .ks(3)
14329           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14330       }
14331     }
14332   }
14333 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_small_kernel)14334   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
14335     TEST_REQUIRES_X86_SSE2;
14336     for (uint32_t n = 8; n <= 12; n += 4) {
14337       for (size_t k = 1; k <= 40; k += 9) {
14338         GemmMicrokernelTester()
14339           .mr(4)
14340           .nr(4)
14341           .kr(2)
14342           .sr(4)
14343           .m(4)
14344           .n(n)
14345           .k(k)
14346           .ks(3)
14347           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14348       }
14349     }
14350   }
14351 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)14352   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
14353     TEST_REQUIRES_X86_SSE2;
14354     for (size_t k = 1; k <= 40; k += 9) {
14355       for (uint32_t n = 1; n <= 4; n++) {
14356         for (uint32_t m = 1; m <= 4; m++) {
14357           GemmMicrokernelTester()
14358             .mr(4)
14359             .nr(4)
14360             .kr(2)
14361             .sr(4)
14362             .m(m)
14363             .n(n)
14364             .k(k)
14365             .cm_stride(7)
14366             .iterations(1)
14367             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14368         }
14369       }
14370     }
14371   }
14372 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,a_offset)14373   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, a_offset) {
14374     TEST_REQUIRES_X86_SSE2;
14375     for (size_t k = 1; k <= 40; k += 9) {
14376       GemmMicrokernelTester()
14377         .mr(4)
14378         .nr(4)
14379         .kr(2)
14380         .sr(4)
14381         .m(4)
14382         .n(4)
14383         .k(k)
14384         .ks(3)
14385         .a_offset(163)
14386         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14387     }
14388   }
14389 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,zero)14390   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, zero) {
14391     TEST_REQUIRES_X86_SSE2;
14392     for (size_t k = 1; k <= 40; k += 9) {
14393       for (uint32_t mz = 0; mz < 4; mz++) {
14394         GemmMicrokernelTester()
14395           .mr(4)
14396           .nr(4)
14397           .kr(2)
14398           .sr(4)
14399           .m(4)
14400           .n(4)
14401           .k(k)
14402           .ks(3)
14403           .a_offset(163)
14404           .zero_index(mz)
14405           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14406       }
14407     }
14408   }
14409 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)14410   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
14411     TEST_REQUIRES_X86_SSE2;
14412     GemmMicrokernelTester()
14413       .mr(4)
14414       .nr(4)
14415       .kr(2)
14416       .sr(4)
14417       .m(4)
14418       .n(4)
14419       .k(8)
14420       .qmin(128)
14421       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14422   }
14423 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)14424   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
14425     TEST_REQUIRES_X86_SSE2;
14426     GemmMicrokernelTester()
14427       .mr(4)
14428       .nr(4)
14429       .kr(2)
14430       .sr(4)
14431       .m(4)
14432       .n(4)
14433       .k(8)
14434       .qmax(128)
14435       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14436   }
14437 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)14438   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
14439     TEST_REQUIRES_X86_SSE2;
14440     GemmMicrokernelTester()
14441       .mr(4)
14442       .nr(4)
14443       .kr(2)
14444       .sr(4)
14445       .m(4)
14446       .n(4)
14447       .k(8)
14448       .cm_stride(7)
14449       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14450   }
14451 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_a_zero_point)14452   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_a_zero_point) {
14453     TEST_REQUIRES_X86_SSE2;
14454     for (size_t k = 1; k <= 40; k += 9) {
14455       GemmMicrokernelTester()
14456         .mr(4)
14457         .nr(4)
14458         .kr(2)
14459         .sr(4)
14460         .m(4)
14461         .n(4)
14462         .k(k)
14463         .a_zero_point(0)
14464         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14465     }
14466   }
14467 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_b_zero_point)14468   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_b_zero_point) {
14469     TEST_REQUIRES_X86_SSE2;
14470     for (size_t k = 1; k <= 40; k += 9) {
14471       GemmMicrokernelTester()
14472         .mr(4)
14473         .nr(4)
14474         .kr(2)
14475         .sr(4)
14476         .m(4)
14477         .n(4)
14478         .k(k)
14479         .b_zero_point(0)
14480         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14481     }
14482   }
14483 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,no_zero_point)14484   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, no_zero_point) {
14485     TEST_REQUIRES_X86_SSE2;
14486     for (size_t k = 1; k <= 40; k += 9) {
14487       GemmMicrokernelTester()
14488         .mr(4)
14489         .nr(4)
14490         .kr(2)
14491         .sr(4)
14492         .m(4)
14493         .n(4)
14494         .k(k)
14495         .a_zero_point(0)
14496         .b_zero_point(0)
14497         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14498     }
14499   }
14500 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14501 
14502 
14503 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8)14504   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8) {
14505     TEST_REQUIRES_X86_AVX;
14506     GemmMicrokernelTester()
14507       .mr(1)
14508       .nr(4)
14509       .kr(2)
14510       .sr(4)
14511       .m(1)
14512       .n(4)
14513       .k(8)
14514       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14515   }
14516 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cn)14517   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cn) {
14518     TEST_REQUIRES_X86_AVX;
14519     GemmMicrokernelTester()
14520       .mr(1)
14521       .nr(4)
14522       .kr(2)
14523       .sr(4)
14524       .m(1)
14525       .n(4)
14526       .k(8)
14527       .cn_stride(7)
14528       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14529   }
14530 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile)14531   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile) {
14532     TEST_REQUIRES_X86_AVX;
14533     for (uint32_t n = 1; n <= 4; n++) {
14534       for (uint32_t m = 1; m <= 1; m++) {
14535         GemmMicrokernelTester()
14536           .mr(1)
14537           .nr(4)
14538           .kr(2)
14539           .sr(4)
14540           .m(m)
14541           .n(n)
14542           .k(8)
14543           .iterations(1)
14544           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14545       }
14546     }
14547   }
14548 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_m)14549   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
14550     TEST_REQUIRES_X86_AVX;
14551     for (uint32_t m = 1; m <= 1; m++) {
14552       GemmMicrokernelTester()
14553         .mr(1)
14554         .nr(4)
14555         .kr(2)
14556         .sr(4)
14557         .m(m)
14558         .n(4)
14559         .k(8)
14560         .iterations(1)
14561         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14562     }
14563   }
14564 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_n)14565   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
14566     TEST_REQUIRES_X86_AVX;
14567     for (uint32_t n = 1; n <= 4; n++) {
14568       GemmMicrokernelTester()
14569         .mr(1)
14570         .nr(4)
14571         .kr(2)
14572         .sr(4)
14573         .m(1)
14574         .n(n)
14575         .k(8)
14576         .iterations(1)
14577         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14578     }
14579   }
14580 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8)14581   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8) {
14582     TEST_REQUIRES_X86_AVX;
14583     for (size_t k = 1; k < 8; k++) {
14584       GemmMicrokernelTester()
14585         .mr(1)
14586         .nr(4)
14587         .kr(2)
14588         .sr(4)
14589         .m(1)
14590         .n(4)
14591         .k(k)
14592         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14593     }
14594   }
14595 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_subtile)14596   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_subtile) {
14597     TEST_REQUIRES_X86_AVX;
14598     for (size_t k = 1; k < 8; k++) {
14599       for (uint32_t n = 1; n <= 4; n++) {
14600         for (uint32_t m = 1; m <= 1; m++) {
14601           GemmMicrokernelTester()
14602             .mr(1)
14603             .nr(4)
14604             .kr(2)
14605             .sr(4)
14606             .m(m)
14607             .n(n)
14608             .k(k)
14609             .iterations(1)
14610             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14611         }
14612       }
14613     }
14614   }
14615 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8)14616   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8) {
14617     TEST_REQUIRES_X86_AVX;
14618     for (size_t k = 9; k < 16; k++) {
14619       GemmMicrokernelTester()
14620         .mr(1)
14621         .nr(4)
14622         .kr(2)
14623         .sr(4)
14624         .m(1)
14625         .n(4)
14626         .k(k)
14627         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14628     }
14629   }
14630 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_subtile)14631   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_subtile) {
14632     TEST_REQUIRES_X86_AVX;
14633     for (size_t k = 9; k < 16; k++) {
14634       for (uint32_t n = 1; n <= 4; n++) {
14635         for (uint32_t m = 1; m <= 1; m++) {
14636           GemmMicrokernelTester()
14637             .mr(1)
14638             .nr(4)
14639             .kr(2)
14640             .sr(4)
14641             .m(m)
14642             .n(n)
14643             .k(k)
14644             .iterations(1)
14645             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14646         }
14647       }
14648     }
14649   }
14650 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8)14651   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8) {
14652     TEST_REQUIRES_X86_AVX;
14653     for (size_t k = 16; k <= 80; k += 8) {
14654       GemmMicrokernelTester()
14655         .mr(1)
14656         .nr(4)
14657         .kr(2)
14658         .sr(4)
14659         .m(1)
14660         .n(4)
14661         .k(k)
14662         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14663     }
14664   }
14665 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_subtile)14666   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_subtile) {
14667     TEST_REQUIRES_X86_AVX;
14668     for (size_t k = 16; k <= 80; k += 8) {
14669       for (uint32_t n = 1; n <= 4; n++) {
14670         for (uint32_t m = 1; m <= 1; m++) {
14671           GemmMicrokernelTester()
14672             .mr(1)
14673             .nr(4)
14674             .kr(2)
14675             .sr(4)
14676             .m(m)
14677             .n(n)
14678             .k(k)
14679             .iterations(1)
14680             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14681         }
14682       }
14683     }
14684   }
14685 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4)14686   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4) {
14687     TEST_REQUIRES_X86_AVX;
14688     for (uint32_t n = 5; n < 8; n++) {
14689       for (size_t k = 1; k <= 40; k += 9) {
14690         GemmMicrokernelTester()
14691           .mr(1)
14692           .nr(4)
14693           .kr(2)
14694           .sr(4)
14695           .m(1)
14696           .n(n)
14697           .k(k)
14698           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14699       }
14700     }
14701   }
14702 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_cn)14703   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
14704     TEST_REQUIRES_X86_AVX;
14705     for (uint32_t n = 5; n < 8; n++) {
14706       for (size_t k = 1; k <= 40; k += 9) {
14707         GemmMicrokernelTester()
14708           .mr(1)
14709           .nr(4)
14710           .kr(2)
14711           .sr(4)
14712           .m(1)
14713           .n(n)
14714           .k(k)
14715           .cn_stride(7)
14716           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14717       }
14718     }
14719   }
14720 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_subtile)14721   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_subtile) {
14722     TEST_REQUIRES_X86_AVX;
14723     for (uint32_t n = 5; n < 8; n++) {
14724       for (size_t k = 1; k <= 40; k += 9) {
14725         for (uint32_t m = 1; m <= 1; m++) {
14726           GemmMicrokernelTester()
14727             .mr(1)
14728             .nr(4)
14729             .kr(2)
14730             .sr(4)
14731             .m(m)
14732             .n(n)
14733             .k(k)
14734             .iterations(1)
14735             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14736         }
14737       }
14738     }
14739   }
14740 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4)14741   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4) {
14742     TEST_REQUIRES_X86_AVX;
14743     for (uint32_t n = 8; n <= 12; n += 4) {
14744       for (size_t k = 1; k <= 40; k += 9) {
14745         GemmMicrokernelTester()
14746           .mr(1)
14747           .nr(4)
14748           .kr(2)
14749           .sr(4)
14750           .m(1)
14751           .n(n)
14752           .k(k)
14753           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14754       }
14755     }
14756   }
14757 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_cn)14758   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_cn) {
14759     TEST_REQUIRES_X86_AVX;
14760     for (uint32_t n = 8; n <= 12; n += 4) {
14761       for (size_t k = 1; k <= 40; k += 9) {
14762         GemmMicrokernelTester()
14763           .mr(1)
14764           .nr(4)
14765           .kr(2)
14766           .sr(4)
14767           .m(1)
14768           .n(n)
14769           .k(k)
14770           .cn_stride(7)
14771           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14772       }
14773     }
14774   }
14775 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_subtile)14776   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_subtile) {
14777     TEST_REQUIRES_X86_AVX;
14778     for (uint32_t n = 8; n <= 12; n += 4) {
14779       for (size_t k = 1; k <= 40; k += 9) {
14780         for (uint32_t m = 1; m <= 1; m++) {
14781           GemmMicrokernelTester()
14782             .mr(1)
14783             .nr(4)
14784             .kr(2)
14785             .sr(4)
14786             .m(m)
14787             .n(n)
14788             .k(k)
14789             .iterations(1)
14790             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14791         }
14792       }
14793     }
14794   }
14795 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel)14796   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel) {
14797     TEST_REQUIRES_X86_AVX;
14798     for (size_t k = 1; k <= 40; k += 9) {
14799       GemmMicrokernelTester()
14800         .mr(1)
14801         .nr(4)
14802         .kr(2)
14803         .sr(4)
14804         .m(1)
14805         .n(4)
14806         .k(k)
14807         .ks(3)
14808         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14809     }
14810   }
14811 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel_subtile)14812   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel_subtile) {
14813     TEST_REQUIRES_X86_AVX;
14814     for (size_t k = 1; k <= 40; k += 9) {
14815       for (uint32_t n = 1; n <= 4; n++) {
14816         for (uint32_t m = 1; m <= 1; m++) {
14817           GemmMicrokernelTester()
14818             .mr(1)
14819             .nr(4)
14820             .kr(2)
14821             .sr(4)
14822             .m(m)
14823             .n(n)
14824             .k(k)
14825             .ks(3)
14826             .iterations(1)
14827             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14828         }
14829       }
14830     }
14831   }
14832 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_small_kernel)14833   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
14834     TEST_REQUIRES_X86_AVX;
14835     for (uint32_t n = 5; n < 8; n++) {
14836       for (size_t k = 1; k <= 40; k += 9) {
14837         GemmMicrokernelTester()
14838           .mr(1)
14839           .nr(4)
14840           .kr(2)
14841           .sr(4)
14842           .m(1)
14843           .n(n)
14844           .k(k)
14845           .ks(3)
14846           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14847       }
14848     }
14849   }
14850 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_small_kernel)14851   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_small_kernel) {
14852     TEST_REQUIRES_X86_AVX;
14853     for (uint32_t n = 8; n <= 12; n += 4) {
14854       for (size_t k = 1; k <= 40; k += 9) {
14855         GemmMicrokernelTester()
14856           .mr(1)
14857           .nr(4)
14858           .kr(2)
14859           .sr(4)
14860           .m(1)
14861           .n(n)
14862           .k(k)
14863           .ks(3)
14864           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14865       }
14866     }
14867   }
14868 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm_subtile)14869   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm_subtile) {
14870     TEST_REQUIRES_X86_AVX;
14871     for (size_t k = 1; k <= 40; k += 9) {
14872       for (uint32_t n = 1; n <= 4; n++) {
14873         for (uint32_t m = 1; m <= 1; m++) {
14874           GemmMicrokernelTester()
14875             .mr(1)
14876             .nr(4)
14877             .kr(2)
14878             .sr(4)
14879             .m(m)
14880             .n(n)
14881             .k(k)
14882             .cm_stride(7)
14883             .iterations(1)
14884             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14885         }
14886       }
14887     }
14888   }
14889 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,a_offset)14890   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, a_offset) {
14891     TEST_REQUIRES_X86_AVX;
14892     for (size_t k = 1; k <= 40; k += 9) {
14893       GemmMicrokernelTester()
14894         .mr(1)
14895         .nr(4)
14896         .kr(2)
14897         .sr(4)
14898         .m(1)
14899         .n(4)
14900         .k(k)
14901         .ks(3)
14902         .a_offset(43)
14903         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14904     }
14905   }
14906 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,zero)14907   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, zero) {
14908     TEST_REQUIRES_X86_AVX;
14909     for (size_t k = 1; k <= 40; k += 9) {
14910       for (uint32_t mz = 0; mz < 1; mz++) {
14911         GemmMicrokernelTester()
14912           .mr(1)
14913           .nr(4)
14914           .kr(2)
14915           .sr(4)
14916           .m(1)
14917           .n(4)
14918           .k(k)
14919           .ks(3)
14920           .a_offset(43)
14921           .zero_index(mz)
14922           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14923       }
14924     }
14925   }
14926 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmin)14927   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmin) {
14928     TEST_REQUIRES_X86_AVX;
14929     GemmMicrokernelTester()
14930       .mr(1)
14931       .nr(4)
14932       .kr(2)
14933       .sr(4)
14934       .m(1)
14935       .n(4)
14936       .k(8)
14937       .qmin(128)
14938       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14939   }
14940 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmax)14941   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmax) {
14942     TEST_REQUIRES_X86_AVX;
14943     GemmMicrokernelTester()
14944       .mr(1)
14945       .nr(4)
14946       .kr(2)
14947       .sr(4)
14948       .m(1)
14949       .n(4)
14950       .k(8)
14951       .qmax(128)
14952       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14953   }
14954 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm)14955   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm) {
14956     TEST_REQUIRES_X86_AVX;
14957     GemmMicrokernelTester()
14958       .mr(1)
14959       .nr(4)
14960       .kr(2)
14961       .sr(4)
14962       .m(1)
14963       .n(4)
14964       .k(8)
14965       .cm_stride(7)
14966       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14967   }
14968 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_a_zero_point)14969   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_a_zero_point) {
14970     TEST_REQUIRES_X86_AVX;
14971     for (size_t k = 1; k <= 40; k += 9) {
14972       GemmMicrokernelTester()
14973         .mr(1)
14974         .nr(4)
14975         .kr(2)
14976         .sr(4)
14977         .m(1)
14978         .n(4)
14979         .k(k)
14980         .a_zero_point(0)
14981         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14982     }
14983   }
14984 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_b_zero_point)14985   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_b_zero_point) {
14986     TEST_REQUIRES_X86_AVX;
14987     for (size_t k = 1; k <= 40; k += 9) {
14988       GemmMicrokernelTester()
14989         .mr(1)
14990         .nr(4)
14991         .kr(2)
14992         .sr(4)
14993         .m(1)
14994         .n(4)
14995         .k(k)
14996         .b_zero_point(0)
14997         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14998     }
14999   }
15000 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,no_zero_point)15001   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, no_zero_point) {
15002     TEST_REQUIRES_X86_AVX;
15003     for (size_t k = 1; k <= 40; k += 9) {
15004       GemmMicrokernelTester()
15005         .mr(1)
15006         .nr(4)
15007         .kr(2)
15008         .sr(4)
15009         .m(1)
15010         .n(4)
15011         .k(k)
15012         .a_zero_point(0)
15013         .b_zero_point(0)
15014         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15015     }
15016   }
15017 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15018 
15019 
15020 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8)15021   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8) {
15022     TEST_REQUIRES_X86_XOP;
15023     GemmMicrokernelTester()
15024       .mr(1)
15025       .nr(4)
15026       .kr(2)
15027       .sr(4)
15028       .m(1)
15029       .n(4)
15030       .k(8)
15031       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15032   }
15033 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cn)15034   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cn) {
15035     TEST_REQUIRES_X86_XOP;
15036     GemmMicrokernelTester()
15037       .mr(1)
15038       .nr(4)
15039       .kr(2)
15040       .sr(4)
15041       .m(1)
15042       .n(4)
15043       .k(8)
15044       .cn_stride(7)
15045       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15046   }
15047 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile)15048   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile) {
15049     TEST_REQUIRES_X86_XOP;
15050     for (uint32_t n = 1; n <= 4; n++) {
15051       for (uint32_t m = 1; m <= 1; m++) {
15052         GemmMicrokernelTester()
15053           .mr(1)
15054           .nr(4)
15055           .kr(2)
15056           .sr(4)
15057           .m(m)
15058           .n(n)
15059           .k(8)
15060           .iterations(1)
15061           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15062       }
15063     }
15064   }
15065 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_m)15066   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
15067     TEST_REQUIRES_X86_XOP;
15068     for (uint32_t m = 1; m <= 1; m++) {
15069       GemmMicrokernelTester()
15070         .mr(1)
15071         .nr(4)
15072         .kr(2)
15073         .sr(4)
15074         .m(m)
15075         .n(4)
15076         .k(8)
15077         .iterations(1)
15078         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15079     }
15080   }
15081 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_n)15082   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
15083     TEST_REQUIRES_X86_XOP;
15084     for (uint32_t n = 1; n <= 4; n++) {
15085       GemmMicrokernelTester()
15086         .mr(1)
15087         .nr(4)
15088         .kr(2)
15089         .sr(4)
15090         .m(1)
15091         .n(n)
15092         .k(8)
15093         .iterations(1)
15094         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15095     }
15096   }
15097 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8)15098   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8) {
15099     TEST_REQUIRES_X86_XOP;
15100     for (size_t k = 1; k < 8; k++) {
15101       GemmMicrokernelTester()
15102         .mr(1)
15103         .nr(4)
15104         .kr(2)
15105         .sr(4)
15106         .m(1)
15107         .n(4)
15108         .k(k)
15109         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15110     }
15111   }
15112 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_subtile)15113   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_subtile) {
15114     TEST_REQUIRES_X86_XOP;
15115     for (size_t k = 1; k < 8; k++) {
15116       for (uint32_t n = 1; n <= 4; n++) {
15117         for (uint32_t m = 1; m <= 1; m++) {
15118           GemmMicrokernelTester()
15119             .mr(1)
15120             .nr(4)
15121             .kr(2)
15122             .sr(4)
15123             .m(m)
15124             .n(n)
15125             .k(k)
15126             .iterations(1)
15127             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15128         }
15129       }
15130     }
15131   }
15132 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8)15133   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8) {
15134     TEST_REQUIRES_X86_XOP;
15135     for (size_t k = 9; k < 16; k++) {
15136       GemmMicrokernelTester()
15137         .mr(1)
15138         .nr(4)
15139         .kr(2)
15140         .sr(4)
15141         .m(1)
15142         .n(4)
15143         .k(k)
15144         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15145     }
15146   }
15147 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_subtile)15148   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_subtile) {
15149     TEST_REQUIRES_X86_XOP;
15150     for (size_t k = 9; k < 16; k++) {
15151       for (uint32_t n = 1; n <= 4; n++) {
15152         for (uint32_t m = 1; m <= 1; m++) {
15153           GemmMicrokernelTester()
15154             .mr(1)
15155             .nr(4)
15156             .kr(2)
15157             .sr(4)
15158             .m(m)
15159             .n(n)
15160             .k(k)
15161             .iterations(1)
15162             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15163         }
15164       }
15165     }
15166   }
15167 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8)15168   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8) {
15169     TEST_REQUIRES_X86_XOP;
15170     for (size_t k = 16; k <= 80; k += 8) {
15171       GemmMicrokernelTester()
15172         .mr(1)
15173         .nr(4)
15174         .kr(2)
15175         .sr(4)
15176         .m(1)
15177         .n(4)
15178         .k(k)
15179         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15180     }
15181   }
15182 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_subtile)15183   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_subtile) {
15184     TEST_REQUIRES_X86_XOP;
15185     for (size_t k = 16; k <= 80; k += 8) {
15186       for (uint32_t n = 1; n <= 4; n++) {
15187         for (uint32_t m = 1; m <= 1; m++) {
15188           GemmMicrokernelTester()
15189             .mr(1)
15190             .nr(4)
15191             .kr(2)
15192             .sr(4)
15193             .m(m)
15194             .n(n)
15195             .k(k)
15196             .iterations(1)
15197             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15198         }
15199       }
15200     }
15201   }
15202 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4)15203   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4) {
15204     TEST_REQUIRES_X86_XOP;
15205     for (uint32_t n = 5; n < 8; n++) {
15206       for (size_t k = 1; k <= 40; k += 9) {
15207         GemmMicrokernelTester()
15208           .mr(1)
15209           .nr(4)
15210           .kr(2)
15211           .sr(4)
15212           .m(1)
15213           .n(n)
15214           .k(k)
15215           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15216       }
15217     }
15218   }
15219 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_cn)15220   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
15221     TEST_REQUIRES_X86_XOP;
15222     for (uint32_t n = 5; n < 8; n++) {
15223       for (size_t k = 1; k <= 40; k += 9) {
15224         GemmMicrokernelTester()
15225           .mr(1)
15226           .nr(4)
15227           .kr(2)
15228           .sr(4)
15229           .m(1)
15230           .n(n)
15231           .k(k)
15232           .cn_stride(7)
15233           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15234       }
15235     }
15236   }
15237 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_subtile)15238   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_subtile) {
15239     TEST_REQUIRES_X86_XOP;
15240     for (uint32_t n = 5; n < 8; n++) {
15241       for (size_t k = 1; k <= 40; k += 9) {
15242         for (uint32_t m = 1; m <= 1; m++) {
15243           GemmMicrokernelTester()
15244             .mr(1)
15245             .nr(4)
15246             .kr(2)
15247             .sr(4)
15248             .m(m)
15249             .n(n)
15250             .k(k)
15251             .iterations(1)
15252             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15253         }
15254       }
15255     }
15256   }
15257 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4)15258   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4) {
15259     TEST_REQUIRES_X86_XOP;
15260     for (uint32_t n = 8; n <= 12; n += 4) {
15261       for (size_t k = 1; k <= 40; k += 9) {
15262         GemmMicrokernelTester()
15263           .mr(1)
15264           .nr(4)
15265           .kr(2)
15266           .sr(4)
15267           .m(1)
15268           .n(n)
15269           .k(k)
15270           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15271       }
15272     }
15273   }
15274 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_cn)15275   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_cn) {
15276     TEST_REQUIRES_X86_XOP;
15277     for (uint32_t n = 8; n <= 12; n += 4) {
15278       for (size_t k = 1; k <= 40; k += 9) {
15279         GemmMicrokernelTester()
15280           .mr(1)
15281           .nr(4)
15282           .kr(2)
15283           .sr(4)
15284           .m(1)
15285           .n(n)
15286           .k(k)
15287           .cn_stride(7)
15288           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15289       }
15290     }
15291   }
15292 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_subtile)15293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_subtile) {
15294     TEST_REQUIRES_X86_XOP;
15295     for (uint32_t n = 8; n <= 12; n += 4) {
15296       for (size_t k = 1; k <= 40; k += 9) {
15297         for (uint32_t m = 1; m <= 1; m++) {
15298           GemmMicrokernelTester()
15299             .mr(1)
15300             .nr(4)
15301             .kr(2)
15302             .sr(4)
15303             .m(m)
15304             .n(n)
15305             .k(k)
15306             .iterations(1)
15307             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15308         }
15309       }
15310     }
15311   }
15312 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel)15313   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel) {
15314     TEST_REQUIRES_X86_XOP;
15315     for (size_t k = 1; k <= 40; k += 9) {
15316       GemmMicrokernelTester()
15317         .mr(1)
15318         .nr(4)
15319         .kr(2)
15320         .sr(4)
15321         .m(1)
15322         .n(4)
15323         .k(k)
15324         .ks(3)
15325         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15326     }
15327   }
15328 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel_subtile)15329   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel_subtile) {
15330     TEST_REQUIRES_X86_XOP;
15331     for (size_t k = 1; k <= 40; k += 9) {
15332       for (uint32_t n = 1; n <= 4; n++) {
15333         for (uint32_t m = 1; m <= 1; m++) {
15334           GemmMicrokernelTester()
15335             .mr(1)
15336             .nr(4)
15337             .kr(2)
15338             .sr(4)
15339             .m(m)
15340             .n(n)
15341             .k(k)
15342             .ks(3)
15343             .iterations(1)
15344             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15345         }
15346       }
15347     }
15348   }
15349 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_small_kernel)15350   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
15351     TEST_REQUIRES_X86_XOP;
15352     for (uint32_t n = 5; n < 8; n++) {
15353       for (size_t k = 1; k <= 40; k += 9) {
15354         GemmMicrokernelTester()
15355           .mr(1)
15356           .nr(4)
15357           .kr(2)
15358           .sr(4)
15359           .m(1)
15360           .n(n)
15361           .k(k)
15362           .ks(3)
15363           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15364       }
15365     }
15366   }
15367 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_small_kernel)15368   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_small_kernel) {
15369     TEST_REQUIRES_X86_XOP;
15370     for (uint32_t n = 8; n <= 12; n += 4) {
15371       for (size_t k = 1; k <= 40; k += 9) {
15372         GemmMicrokernelTester()
15373           .mr(1)
15374           .nr(4)
15375           .kr(2)
15376           .sr(4)
15377           .m(1)
15378           .n(n)
15379           .k(k)
15380           .ks(3)
15381           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15382       }
15383     }
15384   }
15385 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm_subtile)15386   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm_subtile) {
15387     TEST_REQUIRES_X86_XOP;
15388     for (size_t k = 1; k <= 40; k += 9) {
15389       for (uint32_t n = 1; n <= 4; n++) {
15390         for (uint32_t m = 1; m <= 1; m++) {
15391           GemmMicrokernelTester()
15392             .mr(1)
15393             .nr(4)
15394             .kr(2)
15395             .sr(4)
15396             .m(m)
15397             .n(n)
15398             .k(k)
15399             .cm_stride(7)
15400             .iterations(1)
15401             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15402         }
15403       }
15404     }
15405   }
15406 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,a_offset)15407   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, a_offset) {
15408     TEST_REQUIRES_X86_XOP;
15409     for (size_t k = 1; k <= 40; k += 9) {
15410       GemmMicrokernelTester()
15411         .mr(1)
15412         .nr(4)
15413         .kr(2)
15414         .sr(4)
15415         .m(1)
15416         .n(4)
15417         .k(k)
15418         .ks(3)
15419         .a_offset(43)
15420         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15421     }
15422   }
15423 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,zero)15424   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, zero) {
15425     TEST_REQUIRES_X86_XOP;
15426     for (size_t k = 1; k <= 40; k += 9) {
15427       for (uint32_t mz = 0; mz < 1; mz++) {
15428         GemmMicrokernelTester()
15429           .mr(1)
15430           .nr(4)
15431           .kr(2)
15432           .sr(4)
15433           .m(1)
15434           .n(4)
15435           .k(k)
15436           .ks(3)
15437           .a_offset(43)
15438           .zero_index(mz)
15439           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15440       }
15441     }
15442   }
15443 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmin)15444   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmin) {
15445     TEST_REQUIRES_X86_XOP;
15446     GemmMicrokernelTester()
15447       .mr(1)
15448       .nr(4)
15449       .kr(2)
15450       .sr(4)
15451       .m(1)
15452       .n(4)
15453       .k(8)
15454       .qmin(128)
15455       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15456   }
15457 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmax)15458   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmax) {
15459     TEST_REQUIRES_X86_XOP;
15460     GemmMicrokernelTester()
15461       .mr(1)
15462       .nr(4)
15463       .kr(2)
15464       .sr(4)
15465       .m(1)
15466       .n(4)
15467       .k(8)
15468       .qmax(128)
15469       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15470   }
15471 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm)15472   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm) {
15473     TEST_REQUIRES_X86_XOP;
15474     GemmMicrokernelTester()
15475       .mr(1)
15476       .nr(4)
15477       .kr(2)
15478       .sr(4)
15479       .m(1)
15480       .n(4)
15481       .k(8)
15482       .cm_stride(7)
15483       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15484   }
15485 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_a_zero_point)15486   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_a_zero_point) {
15487     TEST_REQUIRES_X86_XOP;
15488     for (size_t k = 1; k <= 40; k += 9) {
15489       GemmMicrokernelTester()
15490         .mr(1)
15491         .nr(4)
15492         .kr(2)
15493         .sr(4)
15494         .m(1)
15495         .n(4)
15496         .k(k)
15497         .a_zero_point(0)
15498         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15499     }
15500   }
15501 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_b_zero_point)15502   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_b_zero_point) {
15503     TEST_REQUIRES_X86_XOP;
15504     for (size_t k = 1; k <= 40; k += 9) {
15505       GemmMicrokernelTester()
15506         .mr(1)
15507         .nr(4)
15508         .kr(2)
15509         .sr(4)
15510         .m(1)
15511         .n(4)
15512         .k(k)
15513         .b_zero_point(0)
15514         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15515     }
15516   }
15517 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,no_zero_point)15518   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, no_zero_point) {
15519     TEST_REQUIRES_X86_XOP;
15520     for (size_t k = 1; k <= 40; k += 9) {
15521       GemmMicrokernelTester()
15522         .mr(1)
15523         .nr(4)
15524         .kr(2)
15525         .sr(4)
15526         .m(1)
15527         .n(4)
15528         .k(k)
15529         .a_zero_point(0)
15530         .b_zero_point(0)
15531         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15532     }
15533   }
15534 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15535 
15536 
15537 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8)15538   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8) {
15539     TEST_REQUIRES_X86_AVX;
15540     GemmMicrokernelTester()
15541       .mr(2)
15542       .nr(4)
15543       .kr(2)
15544       .sr(4)
15545       .m(2)
15546       .n(4)
15547       .k(8)
15548       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15549   }
15550 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cn)15551   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cn) {
15552     TEST_REQUIRES_X86_AVX;
15553     GemmMicrokernelTester()
15554       .mr(2)
15555       .nr(4)
15556       .kr(2)
15557       .sr(4)
15558       .m(2)
15559       .n(4)
15560       .k(8)
15561       .cn_stride(7)
15562       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15563   }
15564 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile)15565   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile) {
15566     TEST_REQUIRES_X86_AVX;
15567     for (uint32_t n = 1; n <= 4; n++) {
15568       for (uint32_t m = 1; m <= 2; m++) {
15569         GemmMicrokernelTester()
15570           .mr(2)
15571           .nr(4)
15572           .kr(2)
15573           .sr(4)
15574           .m(m)
15575           .n(n)
15576           .k(8)
15577           .iterations(1)
15578           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15579       }
15580     }
15581   }
15582 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_m)15583   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
15584     TEST_REQUIRES_X86_AVX;
15585     for (uint32_t m = 1; m <= 2; m++) {
15586       GemmMicrokernelTester()
15587         .mr(2)
15588         .nr(4)
15589         .kr(2)
15590         .sr(4)
15591         .m(m)
15592         .n(4)
15593         .k(8)
15594         .iterations(1)
15595         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15596     }
15597   }
15598 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_n)15599   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
15600     TEST_REQUIRES_X86_AVX;
15601     for (uint32_t n = 1; n <= 4; n++) {
15602       GemmMicrokernelTester()
15603         .mr(2)
15604         .nr(4)
15605         .kr(2)
15606         .sr(4)
15607         .m(2)
15608         .n(n)
15609         .k(8)
15610         .iterations(1)
15611         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15612     }
15613   }
15614 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8)15615   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8) {
15616     TEST_REQUIRES_X86_AVX;
15617     for (size_t k = 1; k < 8; k++) {
15618       GemmMicrokernelTester()
15619         .mr(2)
15620         .nr(4)
15621         .kr(2)
15622         .sr(4)
15623         .m(2)
15624         .n(4)
15625         .k(k)
15626         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15627     }
15628   }
15629 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_subtile)15630   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_subtile) {
15631     TEST_REQUIRES_X86_AVX;
15632     for (size_t k = 1; k < 8; k++) {
15633       for (uint32_t n = 1; n <= 4; n++) {
15634         for (uint32_t m = 1; m <= 2; m++) {
15635           GemmMicrokernelTester()
15636             .mr(2)
15637             .nr(4)
15638             .kr(2)
15639             .sr(4)
15640             .m(m)
15641             .n(n)
15642             .k(k)
15643             .iterations(1)
15644             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15645         }
15646       }
15647     }
15648   }
15649 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8)15650   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8) {
15651     TEST_REQUIRES_X86_AVX;
15652     for (size_t k = 9; k < 16; k++) {
15653       GemmMicrokernelTester()
15654         .mr(2)
15655         .nr(4)
15656         .kr(2)
15657         .sr(4)
15658         .m(2)
15659         .n(4)
15660         .k(k)
15661         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15662     }
15663   }
15664 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_subtile)15665   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_subtile) {
15666     TEST_REQUIRES_X86_AVX;
15667     for (size_t k = 9; k < 16; k++) {
15668       for (uint32_t n = 1; n <= 4; n++) {
15669         for (uint32_t m = 1; m <= 2; m++) {
15670           GemmMicrokernelTester()
15671             .mr(2)
15672             .nr(4)
15673             .kr(2)
15674             .sr(4)
15675             .m(m)
15676             .n(n)
15677             .k(k)
15678             .iterations(1)
15679             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15680         }
15681       }
15682     }
15683   }
15684 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8)15685   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8) {
15686     TEST_REQUIRES_X86_AVX;
15687     for (size_t k = 16; k <= 80; k += 8) {
15688       GemmMicrokernelTester()
15689         .mr(2)
15690         .nr(4)
15691         .kr(2)
15692         .sr(4)
15693         .m(2)
15694         .n(4)
15695         .k(k)
15696         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15697     }
15698   }
15699 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_subtile)15700   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_subtile) {
15701     TEST_REQUIRES_X86_AVX;
15702     for (size_t k = 16; k <= 80; k += 8) {
15703       for (uint32_t n = 1; n <= 4; n++) {
15704         for (uint32_t m = 1; m <= 2; m++) {
15705           GemmMicrokernelTester()
15706             .mr(2)
15707             .nr(4)
15708             .kr(2)
15709             .sr(4)
15710             .m(m)
15711             .n(n)
15712             .k(k)
15713             .iterations(1)
15714             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15715         }
15716       }
15717     }
15718   }
15719 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4)15720   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4) {
15721     TEST_REQUIRES_X86_AVX;
15722     for (uint32_t n = 5; n < 8; n++) {
15723       for (size_t k = 1; k <= 40; k += 9) {
15724         GemmMicrokernelTester()
15725           .mr(2)
15726           .nr(4)
15727           .kr(2)
15728           .sr(4)
15729           .m(2)
15730           .n(n)
15731           .k(k)
15732           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15733       }
15734     }
15735   }
15736 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_cn)15737   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
15738     TEST_REQUIRES_X86_AVX;
15739     for (uint32_t n = 5; n < 8; n++) {
15740       for (size_t k = 1; k <= 40; k += 9) {
15741         GemmMicrokernelTester()
15742           .mr(2)
15743           .nr(4)
15744           .kr(2)
15745           .sr(4)
15746           .m(2)
15747           .n(n)
15748           .k(k)
15749           .cn_stride(7)
15750           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15751       }
15752     }
15753   }
15754 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_subtile)15755   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_subtile) {
15756     TEST_REQUIRES_X86_AVX;
15757     for (uint32_t n = 5; n < 8; n++) {
15758       for (size_t k = 1; k <= 40; k += 9) {
15759         for (uint32_t m = 1; m <= 2; m++) {
15760           GemmMicrokernelTester()
15761             .mr(2)
15762             .nr(4)
15763             .kr(2)
15764             .sr(4)
15765             .m(m)
15766             .n(n)
15767             .k(k)
15768             .iterations(1)
15769             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15770         }
15771       }
15772     }
15773   }
15774 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4)15775   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4) {
15776     TEST_REQUIRES_X86_AVX;
15777     for (uint32_t n = 8; n <= 12; n += 4) {
15778       for (size_t k = 1; k <= 40; k += 9) {
15779         GemmMicrokernelTester()
15780           .mr(2)
15781           .nr(4)
15782           .kr(2)
15783           .sr(4)
15784           .m(2)
15785           .n(n)
15786           .k(k)
15787           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15788       }
15789     }
15790   }
15791 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_cn)15792   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_cn) {
15793     TEST_REQUIRES_X86_AVX;
15794     for (uint32_t n = 8; n <= 12; n += 4) {
15795       for (size_t k = 1; k <= 40; k += 9) {
15796         GemmMicrokernelTester()
15797           .mr(2)
15798           .nr(4)
15799           .kr(2)
15800           .sr(4)
15801           .m(2)
15802           .n(n)
15803           .k(k)
15804           .cn_stride(7)
15805           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15806       }
15807     }
15808   }
15809 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_subtile)15810   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_subtile) {
15811     TEST_REQUIRES_X86_AVX;
15812     for (uint32_t n = 8; n <= 12; n += 4) {
15813       for (size_t k = 1; k <= 40; k += 9) {
15814         for (uint32_t m = 1; m <= 2; m++) {
15815           GemmMicrokernelTester()
15816             .mr(2)
15817             .nr(4)
15818             .kr(2)
15819             .sr(4)
15820             .m(m)
15821             .n(n)
15822             .k(k)
15823             .iterations(1)
15824             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15825         }
15826       }
15827     }
15828   }
15829 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel)15830   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel) {
15831     TEST_REQUIRES_X86_AVX;
15832     for (size_t k = 1; k <= 40; k += 9) {
15833       GemmMicrokernelTester()
15834         .mr(2)
15835         .nr(4)
15836         .kr(2)
15837         .sr(4)
15838         .m(2)
15839         .n(4)
15840         .k(k)
15841         .ks(3)
15842         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15843     }
15844   }
15845 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel_subtile)15846   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel_subtile) {
15847     TEST_REQUIRES_X86_AVX;
15848     for (size_t k = 1; k <= 40; k += 9) {
15849       for (uint32_t n = 1; n <= 4; n++) {
15850         for (uint32_t m = 1; m <= 2; m++) {
15851           GemmMicrokernelTester()
15852             .mr(2)
15853             .nr(4)
15854             .kr(2)
15855             .sr(4)
15856             .m(m)
15857             .n(n)
15858             .k(k)
15859             .ks(3)
15860             .iterations(1)
15861             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15862         }
15863       }
15864     }
15865   }
15866 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_small_kernel)15867   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
15868     TEST_REQUIRES_X86_AVX;
15869     for (uint32_t n = 5; n < 8; n++) {
15870       for (size_t k = 1; k <= 40; k += 9) {
15871         GemmMicrokernelTester()
15872           .mr(2)
15873           .nr(4)
15874           .kr(2)
15875           .sr(4)
15876           .m(2)
15877           .n(n)
15878           .k(k)
15879           .ks(3)
15880           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15881       }
15882     }
15883   }
15884 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_small_kernel)15885   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_small_kernel) {
15886     TEST_REQUIRES_X86_AVX;
15887     for (uint32_t n = 8; n <= 12; n += 4) {
15888       for (size_t k = 1; k <= 40; k += 9) {
15889         GemmMicrokernelTester()
15890           .mr(2)
15891           .nr(4)
15892           .kr(2)
15893           .sr(4)
15894           .m(2)
15895           .n(n)
15896           .k(k)
15897           .ks(3)
15898           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15899       }
15900     }
15901   }
15902 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm_subtile)15903   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm_subtile) {
15904     TEST_REQUIRES_X86_AVX;
15905     for (size_t k = 1; k <= 40; k += 9) {
15906       for (uint32_t n = 1; n <= 4; n++) {
15907         for (uint32_t m = 1; m <= 2; m++) {
15908           GemmMicrokernelTester()
15909             .mr(2)
15910             .nr(4)
15911             .kr(2)
15912             .sr(4)
15913             .m(m)
15914             .n(n)
15915             .k(k)
15916             .cm_stride(7)
15917             .iterations(1)
15918             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15919         }
15920       }
15921     }
15922   }
15923 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,a_offset)15924   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, a_offset) {
15925     TEST_REQUIRES_X86_AVX;
15926     for (size_t k = 1; k <= 40; k += 9) {
15927       GemmMicrokernelTester()
15928         .mr(2)
15929         .nr(4)
15930         .kr(2)
15931         .sr(4)
15932         .m(2)
15933         .n(4)
15934         .k(k)
15935         .ks(3)
15936         .a_offset(83)
15937         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15938     }
15939   }
15940 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,zero)15941   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, zero) {
15942     TEST_REQUIRES_X86_AVX;
15943     for (size_t k = 1; k <= 40; k += 9) {
15944       for (uint32_t mz = 0; mz < 2; mz++) {
15945         GemmMicrokernelTester()
15946           .mr(2)
15947           .nr(4)
15948           .kr(2)
15949           .sr(4)
15950           .m(2)
15951           .n(4)
15952           .k(k)
15953           .ks(3)
15954           .a_offset(83)
15955           .zero_index(mz)
15956           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15957       }
15958     }
15959   }
15960 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmin)15961   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmin) {
15962     TEST_REQUIRES_X86_AVX;
15963     GemmMicrokernelTester()
15964       .mr(2)
15965       .nr(4)
15966       .kr(2)
15967       .sr(4)
15968       .m(2)
15969       .n(4)
15970       .k(8)
15971       .qmin(128)
15972       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15973   }
15974 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmax)15975   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmax) {
15976     TEST_REQUIRES_X86_AVX;
15977     GemmMicrokernelTester()
15978       .mr(2)
15979       .nr(4)
15980       .kr(2)
15981       .sr(4)
15982       .m(2)
15983       .n(4)
15984       .k(8)
15985       .qmax(128)
15986       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15987   }
15988 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm)15989   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm) {
15990     TEST_REQUIRES_X86_AVX;
15991     GemmMicrokernelTester()
15992       .mr(2)
15993       .nr(4)
15994       .kr(2)
15995       .sr(4)
15996       .m(2)
15997       .n(4)
15998       .k(8)
15999       .cm_stride(7)
16000       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16001   }
16002 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_a_zero_point)16003   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_a_zero_point) {
16004     TEST_REQUIRES_X86_AVX;
16005     for (size_t k = 1; k <= 40; k += 9) {
16006       GemmMicrokernelTester()
16007         .mr(2)
16008         .nr(4)
16009         .kr(2)
16010         .sr(4)
16011         .m(2)
16012         .n(4)
16013         .k(k)
16014         .a_zero_point(0)
16015         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16016     }
16017   }
16018 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_b_zero_point)16019   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_b_zero_point) {
16020     TEST_REQUIRES_X86_AVX;
16021     for (size_t k = 1; k <= 40; k += 9) {
16022       GemmMicrokernelTester()
16023         .mr(2)
16024         .nr(4)
16025         .kr(2)
16026         .sr(4)
16027         .m(2)
16028         .n(4)
16029         .k(k)
16030         .b_zero_point(0)
16031         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16032     }
16033   }
16034 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,no_zero_point)16035   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, no_zero_point) {
16036     TEST_REQUIRES_X86_AVX;
16037     for (size_t k = 1; k <= 40; k += 9) {
16038       GemmMicrokernelTester()
16039         .mr(2)
16040         .nr(4)
16041         .kr(2)
16042         .sr(4)
16043         .m(2)
16044         .n(4)
16045         .k(k)
16046         .a_zero_point(0)
16047         .b_zero_point(0)
16048         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16049     }
16050   }
16051 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16052 
16053 
16054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8)16055   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8) {
16056     TEST_REQUIRES_X86_XOP;
16057     GemmMicrokernelTester()
16058       .mr(2)
16059       .nr(4)
16060       .kr(2)
16061       .sr(4)
16062       .m(2)
16063       .n(4)
16064       .k(8)
16065       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16066   }
16067 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cn)16068   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cn) {
16069     TEST_REQUIRES_X86_XOP;
16070     GemmMicrokernelTester()
16071       .mr(2)
16072       .nr(4)
16073       .kr(2)
16074       .sr(4)
16075       .m(2)
16076       .n(4)
16077       .k(8)
16078       .cn_stride(7)
16079       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16080   }
16081 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile)16082   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile) {
16083     TEST_REQUIRES_X86_XOP;
16084     for (uint32_t n = 1; n <= 4; n++) {
16085       for (uint32_t m = 1; m <= 2; m++) {
16086         GemmMicrokernelTester()
16087           .mr(2)
16088           .nr(4)
16089           .kr(2)
16090           .sr(4)
16091           .m(m)
16092           .n(n)
16093           .k(8)
16094           .iterations(1)
16095           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16096       }
16097     }
16098   }
16099 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_m)16100   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
16101     TEST_REQUIRES_X86_XOP;
16102     for (uint32_t m = 1; m <= 2; m++) {
16103       GemmMicrokernelTester()
16104         .mr(2)
16105         .nr(4)
16106         .kr(2)
16107         .sr(4)
16108         .m(m)
16109         .n(4)
16110         .k(8)
16111         .iterations(1)
16112         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16113     }
16114   }
16115 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_n)16116   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
16117     TEST_REQUIRES_X86_XOP;
16118     for (uint32_t n = 1; n <= 4; n++) {
16119       GemmMicrokernelTester()
16120         .mr(2)
16121         .nr(4)
16122         .kr(2)
16123         .sr(4)
16124         .m(2)
16125         .n(n)
16126         .k(8)
16127         .iterations(1)
16128         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16129     }
16130   }
16131 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8)16132   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8) {
16133     TEST_REQUIRES_X86_XOP;
16134     for (size_t k = 1; k < 8; k++) {
16135       GemmMicrokernelTester()
16136         .mr(2)
16137         .nr(4)
16138         .kr(2)
16139         .sr(4)
16140         .m(2)
16141         .n(4)
16142         .k(k)
16143         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16144     }
16145   }
16146 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_subtile)16147   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_subtile) {
16148     TEST_REQUIRES_X86_XOP;
16149     for (size_t k = 1; k < 8; k++) {
16150       for (uint32_t n = 1; n <= 4; n++) {
16151         for (uint32_t m = 1; m <= 2; m++) {
16152           GemmMicrokernelTester()
16153             .mr(2)
16154             .nr(4)
16155             .kr(2)
16156             .sr(4)
16157             .m(m)
16158             .n(n)
16159             .k(k)
16160             .iterations(1)
16161             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16162         }
16163       }
16164     }
16165   }
16166 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8)16167   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8) {
16168     TEST_REQUIRES_X86_XOP;
16169     for (size_t k = 9; k < 16; k++) {
16170       GemmMicrokernelTester()
16171         .mr(2)
16172         .nr(4)
16173         .kr(2)
16174         .sr(4)
16175         .m(2)
16176         .n(4)
16177         .k(k)
16178         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16179     }
16180   }
16181 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_subtile)16182   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_subtile) {
16183     TEST_REQUIRES_X86_XOP;
16184     for (size_t k = 9; k < 16; k++) {
16185       for (uint32_t n = 1; n <= 4; n++) {
16186         for (uint32_t m = 1; m <= 2; m++) {
16187           GemmMicrokernelTester()
16188             .mr(2)
16189             .nr(4)
16190             .kr(2)
16191             .sr(4)
16192             .m(m)
16193             .n(n)
16194             .k(k)
16195             .iterations(1)
16196             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16197         }
16198       }
16199     }
16200   }
16201 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8)16202   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8) {
16203     TEST_REQUIRES_X86_XOP;
16204     for (size_t k = 16; k <= 80; k += 8) {
16205       GemmMicrokernelTester()
16206         .mr(2)
16207         .nr(4)
16208         .kr(2)
16209         .sr(4)
16210         .m(2)
16211         .n(4)
16212         .k(k)
16213         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16214     }
16215   }
16216 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_subtile)16217   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_subtile) {
16218     TEST_REQUIRES_X86_XOP;
16219     for (size_t k = 16; k <= 80; k += 8) {
16220       for (uint32_t n = 1; n <= 4; n++) {
16221         for (uint32_t m = 1; m <= 2; m++) {
16222           GemmMicrokernelTester()
16223             .mr(2)
16224             .nr(4)
16225             .kr(2)
16226             .sr(4)
16227             .m(m)
16228             .n(n)
16229             .k(k)
16230             .iterations(1)
16231             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16232         }
16233       }
16234     }
16235   }
16236 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4)16237   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4) {
16238     TEST_REQUIRES_X86_XOP;
16239     for (uint32_t n = 5; n < 8; n++) {
16240       for (size_t k = 1; k <= 40; k += 9) {
16241         GemmMicrokernelTester()
16242           .mr(2)
16243           .nr(4)
16244           .kr(2)
16245           .sr(4)
16246           .m(2)
16247           .n(n)
16248           .k(k)
16249           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16250       }
16251     }
16252   }
16253 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_cn)16254   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
16255     TEST_REQUIRES_X86_XOP;
16256     for (uint32_t n = 5; n < 8; n++) {
16257       for (size_t k = 1; k <= 40; k += 9) {
16258         GemmMicrokernelTester()
16259           .mr(2)
16260           .nr(4)
16261           .kr(2)
16262           .sr(4)
16263           .m(2)
16264           .n(n)
16265           .k(k)
16266           .cn_stride(7)
16267           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16268       }
16269     }
16270   }
16271 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_subtile)16272   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_subtile) {
16273     TEST_REQUIRES_X86_XOP;
16274     for (uint32_t n = 5; n < 8; n++) {
16275       for (size_t k = 1; k <= 40; k += 9) {
16276         for (uint32_t m = 1; m <= 2; m++) {
16277           GemmMicrokernelTester()
16278             .mr(2)
16279             .nr(4)
16280             .kr(2)
16281             .sr(4)
16282             .m(m)
16283             .n(n)
16284             .k(k)
16285             .iterations(1)
16286             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16287         }
16288       }
16289     }
16290   }
16291 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4)16292   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4) {
16293     TEST_REQUIRES_X86_XOP;
16294     for (uint32_t n = 8; n <= 12; n += 4) {
16295       for (size_t k = 1; k <= 40; k += 9) {
16296         GemmMicrokernelTester()
16297           .mr(2)
16298           .nr(4)
16299           .kr(2)
16300           .sr(4)
16301           .m(2)
16302           .n(n)
16303           .k(k)
16304           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16305       }
16306     }
16307   }
16308 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_cn)16309   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_cn) {
16310     TEST_REQUIRES_X86_XOP;
16311     for (uint32_t n = 8; n <= 12; n += 4) {
16312       for (size_t k = 1; k <= 40; k += 9) {
16313         GemmMicrokernelTester()
16314           .mr(2)
16315           .nr(4)
16316           .kr(2)
16317           .sr(4)
16318           .m(2)
16319           .n(n)
16320           .k(k)
16321           .cn_stride(7)
16322           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16323       }
16324     }
16325   }
16326 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_subtile)16327   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_subtile) {
16328     TEST_REQUIRES_X86_XOP;
16329     for (uint32_t n = 8; n <= 12; n += 4) {
16330       for (size_t k = 1; k <= 40; k += 9) {
16331         for (uint32_t m = 1; m <= 2; m++) {
16332           GemmMicrokernelTester()
16333             .mr(2)
16334             .nr(4)
16335             .kr(2)
16336             .sr(4)
16337             .m(m)
16338             .n(n)
16339             .k(k)
16340             .iterations(1)
16341             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16342         }
16343       }
16344     }
16345   }
16346 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel)16347   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel) {
16348     TEST_REQUIRES_X86_XOP;
16349     for (size_t k = 1; k <= 40; k += 9) {
16350       GemmMicrokernelTester()
16351         .mr(2)
16352         .nr(4)
16353         .kr(2)
16354         .sr(4)
16355         .m(2)
16356         .n(4)
16357         .k(k)
16358         .ks(3)
16359         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16360     }
16361   }
16362 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel_subtile)16363   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel_subtile) {
16364     TEST_REQUIRES_X86_XOP;
16365     for (size_t k = 1; k <= 40; k += 9) {
16366       for (uint32_t n = 1; n <= 4; n++) {
16367         for (uint32_t m = 1; m <= 2; m++) {
16368           GemmMicrokernelTester()
16369             .mr(2)
16370             .nr(4)
16371             .kr(2)
16372             .sr(4)
16373             .m(m)
16374             .n(n)
16375             .k(k)
16376             .ks(3)
16377             .iterations(1)
16378             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16379         }
16380       }
16381     }
16382   }
16383 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_small_kernel)16384   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
16385     TEST_REQUIRES_X86_XOP;
16386     for (uint32_t n = 5; n < 8; n++) {
16387       for (size_t k = 1; k <= 40; k += 9) {
16388         GemmMicrokernelTester()
16389           .mr(2)
16390           .nr(4)
16391           .kr(2)
16392           .sr(4)
16393           .m(2)
16394           .n(n)
16395           .k(k)
16396           .ks(3)
16397           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16398       }
16399     }
16400   }
16401 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_small_kernel)16402   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_small_kernel) {
16403     TEST_REQUIRES_X86_XOP;
16404     for (uint32_t n = 8; n <= 12; n += 4) {
16405       for (size_t k = 1; k <= 40; k += 9) {
16406         GemmMicrokernelTester()
16407           .mr(2)
16408           .nr(4)
16409           .kr(2)
16410           .sr(4)
16411           .m(2)
16412           .n(n)
16413           .k(k)
16414           .ks(3)
16415           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16416       }
16417     }
16418   }
16419 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm_subtile)16420   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm_subtile) {
16421     TEST_REQUIRES_X86_XOP;
16422     for (size_t k = 1; k <= 40; k += 9) {
16423       for (uint32_t n = 1; n <= 4; n++) {
16424         for (uint32_t m = 1; m <= 2; m++) {
16425           GemmMicrokernelTester()
16426             .mr(2)
16427             .nr(4)
16428             .kr(2)
16429             .sr(4)
16430             .m(m)
16431             .n(n)
16432             .k(k)
16433             .cm_stride(7)
16434             .iterations(1)
16435             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16436         }
16437       }
16438     }
16439   }
16440 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,a_offset)16441   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, a_offset) {
16442     TEST_REQUIRES_X86_XOP;
16443     for (size_t k = 1; k <= 40; k += 9) {
16444       GemmMicrokernelTester()
16445         .mr(2)
16446         .nr(4)
16447         .kr(2)
16448         .sr(4)
16449         .m(2)
16450         .n(4)
16451         .k(k)
16452         .ks(3)
16453         .a_offset(83)
16454         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16455     }
16456   }
16457 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,zero)16458   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, zero) {
16459     TEST_REQUIRES_X86_XOP;
16460     for (size_t k = 1; k <= 40; k += 9) {
16461       for (uint32_t mz = 0; mz < 2; mz++) {
16462         GemmMicrokernelTester()
16463           .mr(2)
16464           .nr(4)
16465           .kr(2)
16466           .sr(4)
16467           .m(2)
16468           .n(4)
16469           .k(k)
16470           .ks(3)
16471           .a_offset(83)
16472           .zero_index(mz)
16473           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16474       }
16475     }
16476   }
16477 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmin)16478   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmin) {
16479     TEST_REQUIRES_X86_XOP;
16480     GemmMicrokernelTester()
16481       .mr(2)
16482       .nr(4)
16483       .kr(2)
16484       .sr(4)
16485       .m(2)
16486       .n(4)
16487       .k(8)
16488       .qmin(128)
16489       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16490   }
16491 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmax)16492   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmax) {
16493     TEST_REQUIRES_X86_XOP;
16494     GemmMicrokernelTester()
16495       .mr(2)
16496       .nr(4)
16497       .kr(2)
16498       .sr(4)
16499       .m(2)
16500       .n(4)
16501       .k(8)
16502       .qmax(128)
16503       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16504   }
16505 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm)16506   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm) {
16507     TEST_REQUIRES_X86_XOP;
16508     GemmMicrokernelTester()
16509       .mr(2)
16510       .nr(4)
16511       .kr(2)
16512       .sr(4)
16513       .m(2)
16514       .n(4)
16515       .k(8)
16516       .cm_stride(7)
16517       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16518   }
16519 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_a_zero_point)16520   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_a_zero_point) {
16521     TEST_REQUIRES_X86_XOP;
16522     for (size_t k = 1; k <= 40; k += 9) {
16523       GemmMicrokernelTester()
16524         .mr(2)
16525         .nr(4)
16526         .kr(2)
16527         .sr(4)
16528         .m(2)
16529         .n(4)
16530         .k(k)
16531         .a_zero_point(0)
16532         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16533     }
16534   }
16535 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_b_zero_point)16536   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_b_zero_point) {
16537     TEST_REQUIRES_X86_XOP;
16538     for (size_t k = 1; k <= 40; k += 9) {
16539       GemmMicrokernelTester()
16540         .mr(2)
16541         .nr(4)
16542         .kr(2)
16543         .sr(4)
16544         .m(2)
16545         .n(4)
16546         .k(k)
16547         .b_zero_point(0)
16548         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16549     }
16550   }
16551 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,no_zero_point)16552   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, no_zero_point) {
16553     TEST_REQUIRES_X86_XOP;
16554     for (size_t k = 1; k <= 40; k += 9) {
16555       GemmMicrokernelTester()
16556         .mr(2)
16557         .nr(4)
16558         .kr(2)
16559         .sr(4)
16560         .m(2)
16561         .n(4)
16562         .k(k)
16563         .a_zero_point(0)
16564         .b_zero_point(0)
16565         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16566     }
16567   }
16568 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16569 
16570 
16571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8)16572   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8) {
16573     TEST_REQUIRES_X86_AVX;
16574     GemmMicrokernelTester()
16575       .mr(4)
16576       .nr(4)
16577       .kr(2)
16578       .sr(4)
16579       .m(4)
16580       .n(4)
16581       .k(8)
16582       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16583   }
16584 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cn)16585   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cn) {
16586     TEST_REQUIRES_X86_AVX;
16587     GemmMicrokernelTester()
16588       .mr(4)
16589       .nr(4)
16590       .kr(2)
16591       .sr(4)
16592       .m(4)
16593       .n(4)
16594       .k(8)
16595       .cn_stride(7)
16596       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16597   }
16598 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile)16599   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile) {
16600     TEST_REQUIRES_X86_AVX;
16601     for (uint32_t n = 1; n <= 4; n++) {
16602       for (uint32_t m = 1; m <= 4; m++) {
16603         GemmMicrokernelTester()
16604           .mr(4)
16605           .nr(4)
16606           .kr(2)
16607           .sr(4)
16608           .m(m)
16609           .n(n)
16610           .k(8)
16611           .iterations(1)
16612           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16613       }
16614     }
16615   }
16616 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_m)16617   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
16618     TEST_REQUIRES_X86_AVX;
16619     for (uint32_t m = 1; m <= 4; m++) {
16620       GemmMicrokernelTester()
16621         .mr(4)
16622         .nr(4)
16623         .kr(2)
16624         .sr(4)
16625         .m(m)
16626         .n(4)
16627         .k(8)
16628         .iterations(1)
16629         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16630     }
16631   }
16632 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_n)16633   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
16634     TEST_REQUIRES_X86_AVX;
16635     for (uint32_t n = 1; n <= 4; n++) {
16636       GemmMicrokernelTester()
16637         .mr(4)
16638         .nr(4)
16639         .kr(2)
16640         .sr(4)
16641         .m(4)
16642         .n(n)
16643         .k(8)
16644         .iterations(1)
16645         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16646     }
16647   }
16648 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8)16649   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8) {
16650     TEST_REQUIRES_X86_AVX;
16651     for (size_t k = 1; k < 8; k++) {
16652       GemmMicrokernelTester()
16653         .mr(4)
16654         .nr(4)
16655         .kr(2)
16656         .sr(4)
16657         .m(4)
16658         .n(4)
16659         .k(k)
16660         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16661     }
16662   }
16663 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_subtile)16664   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_subtile) {
16665     TEST_REQUIRES_X86_AVX;
16666     for (size_t k = 1; k < 8; k++) {
16667       for (uint32_t n = 1; n <= 4; n++) {
16668         for (uint32_t m = 1; m <= 4; m++) {
16669           GemmMicrokernelTester()
16670             .mr(4)
16671             .nr(4)
16672             .kr(2)
16673             .sr(4)
16674             .m(m)
16675             .n(n)
16676             .k(k)
16677             .iterations(1)
16678             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16679         }
16680       }
16681     }
16682   }
16683 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8)16684   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8) {
16685     TEST_REQUIRES_X86_AVX;
16686     for (size_t k = 9; k < 16; k++) {
16687       GemmMicrokernelTester()
16688         .mr(4)
16689         .nr(4)
16690         .kr(2)
16691         .sr(4)
16692         .m(4)
16693         .n(4)
16694         .k(k)
16695         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16696     }
16697   }
16698 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_subtile)16699   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_subtile) {
16700     TEST_REQUIRES_X86_AVX;
16701     for (size_t k = 9; k < 16; k++) {
16702       for (uint32_t n = 1; n <= 4; n++) {
16703         for (uint32_t m = 1; m <= 4; m++) {
16704           GemmMicrokernelTester()
16705             .mr(4)
16706             .nr(4)
16707             .kr(2)
16708             .sr(4)
16709             .m(m)
16710             .n(n)
16711             .k(k)
16712             .iterations(1)
16713             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16714         }
16715       }
16716     }
16717   }
16718 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8)16719   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8) {
16720     TEST_REQUIRES_X86_AVX;
16721     for (size_t k = 16; k <= 80; k += 8) {
16722       GemmMicrokernelTester()
16723         .mr(4)
16724         .nr(4)
16725         .kr(2)
16726         .sr(4)
16727         .m(4)
16728         .n(4)
16729         .k(k)
16730         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16731     }
16732   }
16733 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_subtile)16734   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_subtile) {
16735     TEST_REQUIRES_X86_AVX;
16736     for (size_t k = 16; k <= 80; k += 8) {
16737       for (uint32_t n = 1; n <= 4; n++) {
16738         for (uint32_t m = 1; m <= 4; m++) {
16739           GemmMicrokernelTester()
16740             .mr(4)
16741             .nr(4)
16742             .kr(2)
16743             .sr(4)
16744             .m(m)
16745             .n(n)
16746             .k(k)
16747             .iterations(1)
16748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16749         }
16750       }
16751     }
16752   }
16753 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4)16754   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4) {
16755     TEST_REQUIRES_X86_AVX;
16756     for (uint32_t n = 5; n < 8; n++) {
16757       for (size_t k = 1; k <= 40; k += 9) {
16758         GemmMicrokernelTester()
16759           .mr(4)
16760           .nr(4)
16761           .kr(2)
16762           .sr(4)
16763           .m(4)
16764           .n(n)
16765           .k(k)
16766           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16767       }
16768     }
16769   }
16770 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_cn)16771   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
16772     TEST_REQUIRES_X86_AVX;
16773     for (uint32_t n = 5; n < 8; n++) {
16774       for (size_t k = 1; k <= 40; k += 9) {
16775         GemmMicrokernelTester()
16776           .mr(4)
16777           .nr(4)
16778           .kr(2)
16779           .sr(4)
16780           .m(4)
16781           .n(n)
16782           .k(k)
16783           .cn_stride(7)
16784           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16785       }
16786     }
16787   }
16788 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_subtile)16789   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_subtile) {
16790     TEST_REQUIRES_X86_AVX;
16791     for (uint32_t n = 5; n < 8; n++) {
16792       for (size_t k = 1; k <= 40; k += 9) {
16793         for (uint32_t m = 1; m <= 4; m++) {
16794           GemmMicrokernelTester()
16795             .mr(4)
16796             .nr(4)
16797             .kr(2)
16798             .sr(4)
16799             .m(m)
16800             .n(n)
16801             .k(k)
16802             .iterations(1)
16803             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16804         }
16805       }
16806     }
16807   }
16808 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4)16809   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4) {
16810     TEST_REQUIRES_X86_AVX;
16811     for (uint32_t n = 8; n <= 12; n += 4) {
16812       for (size_t k = 1; k <= 40; k += 9) {
16813         GemmMicrokernelTester()
16814           .mr(4)
16815           .nr(4)
16816           .kr(2)
16817           .sr(4)
16818           .m(4)
16819           .n(n)
16820           .k(k)
16821           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16822       }
16823     }
16824   }
16825 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_cn)16826   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_cn) {
16827     TEST_REQUIRES_X86_AVX;
16828     for (uint32_t n = 8; n <= 12; n += 4) {
16829       for (size_t k = 1; k <= 40; k += 9) {
16830         GemmMicrokernelTester()
16831           .mr(4)
16832           .nr(4)
16833           .kr(2)
16834           .sr(4)
16835           .m(4)
16836           .n(n)
16837           .k(k)
16838           .cn_stride(7)
16839           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16840       }
16841     }
16842   }
16843 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_subtile)16844   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_subtile) {
16845     TEST_REQUIRES_X86_AVX;
16846     for (uint32_t n = 8; n <= 12; n += 4) {
16847       for (size_t k = 1; k <= 40; k += 9) {
16848         for (uint32_t m = 1; m <= 4; m++) {
16849           GemmMicrokernelTester()
16850             .mr(4)
16851             .nr(4)
16852             .kr(2)
16853             .sr(4)
16854             .m(m)
16855             .n(n)
16856             .k(k)
16857             .iterations(1)
16858             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16859         }
16860       }
16861     }
16862   }
16863 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel)16864   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel) {
16865     TEST_REQUIRES_X86_AVX;
16866     for (size_t k = 1; k <= 40; k += 9) {
16867       GemmMicrokernelTester()
16868         .mr(4)
16869         .nr(4)
16870         .kr(2)
16871         .sr(4)
16872         .m(4)
16873         .n(4)
16874         .k(k)
16875         .ks(3)
16876         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16877     }
16878   }
16879 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel_subtile)16880   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel_subtile) {
16881     TEST_REQUIRES_X86_AVX;
16882     for (size_t k = 1; k <= 40; k += 9) {
16883       for (uint32_t n = 1; n <= 4; n++) {
16884         for (uint32_t m = 1; m <= 4; m++) {
16885           GemmMicrokernelTester()
16886             .mr(4)
16887             .nr(4)
16888             .kr(2)
16889             .sr(4)
16890             .m(m)
16891             .n(n)
16892             .k(k)
16893             .ks(3)
16894             .iterations(1)
16895             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16896         }
16897       }
16898     }
16899   }
16900 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_small_kernel)16901   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
16902     TEST_REQUIRES_X86_AVX;
16903     for (uint32_t n = 5; n < 8; n++) {
16904       for (size_t k = 1; k <= 40; k += 9) {
16905         GemmMicrokernelTester()
16906           .mr(4)
16907           .nr(4)
16908           .kr(2)
16909           .sr(4)
16910           .m(4)
16911           .n(n)
16912           .k(k)
16913           .ks(3)
16914           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16915       }
16916     }
16917   }
16918 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_small_kernel)16919   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_small_kernel) {
16920     TEST_REQUIRES_X86_AVX;
16921     for (uint32_t n = 8; n <= 12; n += 4) {
16922       for (size_t k = 1; k <= 40; k += 9) {
16923         GemmMicrokernelTester()
16924           .mr(4)
16925           .nr(4)
16926           .kr(2)
16927           .sr(4)
16928           .m(4)
16929           .n(n)
16930           .k(k)
16931           .ks(3)
16932           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16933       }
16934     }
16935   }
16936 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm_subtile)16937   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm_subtile) {
16938     TEST_REQUIRES_X86_AVX;
16939     for (size_t k = 1; k <= 40; k += 9) {
16940       for (uint32_t n = 1; n <= 4; n++) {
16941         for (uint32_t m = 1; m <= 4; m++) {
16942           GemmMicrokernelTester()
16943             .mr(4)
16944             .nr(4)
16945             .kr(2)
16946             .sr(4)
16947             .m(m)
16948             .n(n)
16949             .k(k)
16950             .cm_stride(7)
16951             .iterations(1)
16952             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16953         }
16954       }
16955     }
16956   }
16957 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,a_offset)16958   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, a_offset) {
16959     TEST_REQUIRES_X86_AVX;
16960     for (size_t k = 1; k <= 40; k += 9) {
16961       GemmMicrokernelTester()
16962         .mr(4)
16963         .nr(4)
16964         .kr(2)
16965         .sr(4)
16966         .m(4)
16967         .n(4)
16968         .k(k)
16969         .ks(3)
16970         .a_offset(163)
16971         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16972     }
16973   }
16974 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,zero)16975   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, zero) {
16976     TEST_REQUIRES_X86_AVX;
16977     for (size_t k = 1; k <= 40; k += 9) {
16978       for (uint32_t mz = 0; mz < 4; mz++) {
16979         GemmMicrokernelTester()
16980           .mr(4)
16981           .nr(4)
16982           .kr(2)
16983           .sr(4)
16984           .m(4)
16985           .n(4)
16986           .k(k)
16987           .ks(3)
16988           .a_offset(163)
16989           .zero_index(mz)
16990           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16991       }
16992     }
16993   }
16994 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmin)16995   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmin) {
16996     TEST_REQUIRES_X86_AVX;
16997     GemmMicrokernelTester()
16998       .mr(4)
16999       .nr(4)
17000       .kr(2)
17001       .sr(4)
17002       .m(4)
17003       .n(4)
17004       .k(8)
17005       .qmin(128)
17006       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17007   }
17008 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmax)17009   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmax) {
17010     TEST_REQUIRES_X86_AVX;
17011     GemmMicrokernelTester()
17012       .mr(4)
17013       .nr(4)
17014       .kr(2)
17015       .sr(4)
17016       .m(4)
17017       .n(4)
17018       .k(8)
17019       .qmax(128)
17020       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17021   }
17022 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm)17023   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm) {
17024     TEST_REQUIRES_X86_AVX;
17025     GemmMicrokernelTester()
17026       .mr(4)
17027       .nr(4)
17028       .kr(2)
17029       .sr(4)
17030       .m(4)
17031       .n(4)
17032       .k(8)
17033       .cm_stride(7)
17034       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17035   }
17036 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_a_zero_point)17037   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_a_zero_point) {
17038     TEST_REQUIRES_X86_AVX;
17039     for (size_t k = 1; k <= 40; k += 9) {
17040       GemmMicrokernelTester()
17041         .mr(4)
17042         .nr(4)
17043         .kr(2)
17044         .sr(4)
17045         .m(4)
17046         .n(4)
17047         .k(k)
17048         .a_zero_point(0)
17049         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17050     }
17051   }
17052 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_b_zero_point)17053   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_b_zero_point) {
17054     TEST_REQUIRES_X86_AVX;
17055     for (size_t k = 1; k <= 40; k += 9) {
17056       GemmMicrokernelTester()
17057         .mr(4)
17058         .nr(4)
17059         .kr(2)
17060         .sr(4)
17061         .m(4)
17062         .n(4)
17063         .k(k)
17064         .b_zero_point(0)
17065         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17066     }
17067   }
17068 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,no_zero_point)17069   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, no_zero_point) {
17070     TEST_REQUIRES_X86_AVX;
17071     for (size_t k = 1; k <= 40; k += 9) {
17072       GemmMicrokernelTester()
17073         .mr(4)
17074         .nr(4)
17075         .kr(2)
17076         .sr(4)
17077         .m(4)
17078         .n(4)
17079         .k(k)
17080         .a_zero_point(0)
17081         .b_zero_point(0)
17082         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17083     }
17084   }
17085 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17086 
17087 
17088 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8)17089   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8) {
17090     TEST_REQUIRES_X86_XOP;
17091     GemmMicrokernelTester()
17092       .mr(4)
17093       .nr(4)
17094       .kr(2)
17095       .sr(4)
17096       .m(4)
17097       .n(4)
17098       .k(8)
17099       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17100   }
17101 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cn)17102   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cn) {
17103     TEST_REQUIRES_X86_XOP;
17104     GemmMicrokernelTester()
17105       .mr(4)
17106       .nr(4)
17107       .kr(2)
17108       .sr(4)
17109       .m(4)
17110       .n(4)
17111       .k(8)
17112       .cn_stride(7)
17113       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17114   }
17115 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile)17116   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile) {
17117     TEST_REQUIRES_X86_XOP;
17118     for (uint32_t n = 1; n <= 4; n++) {
17119       for (uint32_t m = 1; m <= 4; m++) {
17120         GemmMicrokernelTester()
17121           .mr(4)
17122           .nr(4)
17123           .kr(2)
17124           .sr(4)
17125           .m(m)
17126           .n(n)
17127           .k(8)
17128           .iterations(1)
17129           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17130       }
17131     }
17132   }
17133 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_m)17134   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
17135     TEST_REQUIRES_X86_XOP;
17136     for (uint32_t m = 1; m <= 4; m++) {
17137       GemmMicrokernelTester()
17138         .mr(4)
17139         .nr(4)
17140         .kr(2)
17141         .sr(4)
17142         .m(m)
17143         .n(4)
17144         .k(8)
17145         .iterations(1)
17146         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17147     }
17148   }
17149 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_n)17150   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
17151     TEST_REQUIRES_X86_XOP;
17152     for (uint32_t n = 1; n <= 4; n++) {
17153       GemmMicrokernelTester()
17154         .mr(4)
17155         .nr(4)
17156         .kr(2)
17157         .sr(4)
17158         .m(4)
17159         .n(n)
17160         .k(8)
17161         .iterations(1)
17162         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17163     }
17164   }
17165 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8)17166   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8) {
17167     TEST_REQUIRES_X86_XOP;
17168     for (size_t k = 1; k < 8; k++) {
17169       GemmMicrokernelTester()
17170         .mr(4)
17171         .nr(4)
17172         .kr(2)
17173         .sr(4)
17174         .m(4)
17175         .n(4)
17176         .k(k)
17177         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17178     }
17179   }
17180 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_subtile)17181   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_subtile) {
17182     TEST_REQUIRES_X86_XOP;
17183     for (size_t k = 1; k < 8; k++) {
17184       for (uint32_t n = 1; n <= 4; n++) {
17185         for (uint32_t m = 1; m <= 4; m++) {
17186           GemmMicrokernelTester()
17187             .mr(4)
17188             .nr(4)
17189             .kr(2)
17190             .sr(4)
17191             .m(m)
17192             .n(n)
17193             .k(k)
17194             .iterations(1)
17195             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17196         }
17197       }
17198     }
17199   }
17200 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8)17201   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8) {
17202     TEST_REQUIRES_X86_XOP;
17203     for (size_t k = 9; k < 16; k++) {
17204       GemmMicrokernelTester()
17205         .mr(4)
17206         .nr(4)
17207         .kr(2)
17208         .sr(4)
17209         .m(4)
17210         .n(4)
17211         .k(k)
17212         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17213     }
17214   }
17215 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_subtile)17216   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_subtile) {
17217     TEST_REQUIRES_X86_XOP;
17218     for (size_t k = 9; k < 16; k++) {
17219       for (uint32_t n = 1; n <= 4; n++) {
17220         for (uint32_t m = 1; m <= 4; m++) {
17221           GemmMicrokernelTester()
17222             .mr(4)
17223             .nr(4)
17224             .kr(2)
17225             .sr(4)
17226             .m(m)
17227             .n(n)
17228             .k(k)
17229             .iterations(1)
17230             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17231         }
17232       }
17233     }
17234   }
17235 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8)17236   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8) {
17237     TEST_REQUIRES_X86_XOP;
17238     for (size_t k = 16; k <= 80; k += 8) {
17239       GemmMicrokernelTester()
17240         .mr(4)
17241         .nr(4)
17242         .kr(2)
17243         .sr(4)
17244         .m(4)
17245         .n(4)
17246         .k(k)
17247         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17248     }
17249   }
17250 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_subtile)17251   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_subtile) {
17252     TEST_REQUIRES_X86_XOP;
17253     for (size_t k = 16; k <= 80; k += 8) {
17254       for (uint32_t n = 1; n <= 4; n++) {
17255         for (uint32_t m = 1; m <= 4; m++) {
17256           GemmMicrokernelTester()
17257             .mr(4)
17258             .nr(4)
17259             .kr(2)
17260             .sr(4)
17261             .m(m)
17262             .n(n)
17263             .k(k)
17264             .iterations(1)
17265             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17266         }
17267       }
17268     }
17269   }
17270 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4)17271   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4) {
17272     TEST_REQUIRES_X86_XOP;
17273     for (uint32_t n = 5; n < 8; n++) {
17274       for (size_t k = 1; k <= 40; k += 9) {
17275         GemmMicrokernelTester()
17276           .mr(4)
17277           .nr(4)
17278           .kr(2)
17279           .sr(4)
17280           .m(4)
17281           .n(n)
17282           .k(k)
17283           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17284       }
17285     }
17286   }
17287 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_cn)17288   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
17289     TEST_REQUIRES_X86_XOP;
17290     for (uint32_t n = 5; n < 8; n++) {
17291       for (size_t k = 1; k <= 40; k += 9) {
17292         GemmMicrokernelTester()
17293           .mr(4)
17294           .nr(4)
17295           .kr(2)
17296           .sr(4)
17297           .m(4)
17298           .n(n)
17299           .k(k)
17300           .cn_stride(7)
17301           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17302       }
17303     }
17304   }
17305 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_subtile)17306   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_subtile) {
17307     TEST_REQUIRES_X86_XOP;
17308     for (uint32_t n = 5; n < 8; n++) {
17309       for (size_t k = 1; k <= 40; k += 9) {
17310         for (uint32_t m = 1; m <= 4; m++) {
17311           GemmMicrokernelTester()
17312             .mr(4)
17313             .nr(4)
17314             .kr(2)
17315             .sr(4)
17316             .m(m)
17317             .n(n)
17318             .k(k)
17319             .iterations(1)
17320             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17321         }
17322       }
17323     }
17324   }
17325 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4)17326   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4) {
17327     TEST_REQUIRES_X86_XOP;
17328     for (uint32_t n = 8; n <= 12; n += 4) {
17329       for (size_t k = 1; k <= 40; k += 9) {
17330         GemmMicrokernelTester()
17331           .mr(4)
17332           .nr(4)
17333           .kr(2)
17334           .sr(4)
17335           .m(4)
17336           .n(n)
17337           .k(k)
17338           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17339       }
17340     }
17341   }
17342 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_cn)17343   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_cn) {
17344     TEST_REQUIRES_X86_XOP;
17345     for (uint32_t n = 8; n <= 12; n += 4) {
17346       for (size_t k = 1; k <= 40; k += 9) {
17347         GemmMicrokernelTester()
17348           .mr(4)
17349           .nr(4)
17350           .kr(2)
17351           .sr(4)
17352           .m(4)
17353           .n(n)
17354           .k(k)
17355           .cn_stride(7)
17356           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17357       }
17358     }
17359   }
17360 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_subtile)17361   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_subtile) {
17362     TEST_REQUIRES_X86_XOP;
17363     for (uint32_t n = 8; n <= 12; n += 4) {
17364       for (size_t k = 1; k <= 40; k += 9) {
17365         for (uint32_t m = 1; m <= 4; m++) {
17366           GemmMicrokernelTester()
17367             .mr(4)
17368             .nr(4)
17369             .kr(2)
17370             .sr(4)
17371             .m(m)
17372             .n(n)
17373             .k(k)
17374             .iterations(1)
17375             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17376         }
17377       }
17378     }
17379   }
17380 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel)17381   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel) {
17382     TEST_REQUIRES_X86_XOP;
17383     for (size_t k = 1; k <= 40; k += 9) {
17384       GemmMicrokernelTester()
17385         .mr(4)
17386         .nr(4)
17387         .kr(2)
17388         .sr(4)
17389         .m(4)
17390         .n(4)
17391         .k(k)
17392         .ks(3)
17393         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17394     }
17395   }
17396 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel_subtile)17397   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel_subtile) {
17398     TEST_REQUIRES_X86_XOP;
17399     for (size_t k = 1; k <= 40; k += 9) {
17400       for (uint32_t n = 1; n <= 4; n++) {
17401         for (uint32_t m = 1; m <= 4; m++) {
17402           GemmMicrokernelTester()
17403             .mr(4)
17404             .nr(4)
17405             .kr(2)
17406             .sr(4)
17407             .m(m)
17408             .n(n)
17409             .k(k)
17410             .ks(3)
17411             .iterations(1)
17412             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17413         }
17414       }
17415     }
17416   }
17417 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_small_kernel)17418   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
17419     TEST_REQUIRES_X86_XOP;
17420     for (uint32_t n = 5; n < 8; n++) {
17421       for (size_t k = 1; k <= 40; k += 9) {
17422         GemmMicrokernelTester()
17423           .mr(4)
17424           .nr(4)
17425           .kr(2)
17426           .sr(4)
17427           .m(4)
17428           .n(n)
17429           .k(k)
17430           .ks(3)
17431           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17432       }
17433     }
17434   }
17435 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_small_kernel)17436   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_small_kernel) {
17437     TEST_REQUIRES_X86_XOP;
17438     for (uint32_t n = 8; n <= 12; n += 4) {
17439       for (size_t k = 1; k <= 40; k += 9) {
17440         GemmMicrokernelTester()
17441           .mr(4)
17442           .nr(4)
17443           .kr(2)
17444           .sr(4)
17445           .m(4)
17446           .n(n)
17447           .k(k)
17448           .ks(3)
17449           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17450       }
17451     }
17452   }
17453 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm_subtile)17454   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm_subtile) {
17455     TEST_REQUIRES_X86_XOP;
17456     for (size_t k = 1; k <= 40; k += 9) {
17457       for (uint32_t n = 1; n <= 4; n++) {
17458         for (uint32_t m = 1; m <= 4; m++) {
17459           GemmMicrokernelTester()
17460             .mr(4)
17461             .nr(4)
17462             .kr(2)
17463             .sr(4)
17464             .m(m)
17465             .n(n)
17466             .k(k)
17467             .cm_stride(7)
17468             .iterations(1)
17469             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17470         }
17471       }
17472     }
17473   }
17474 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,a_offset)17475   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, a_offset) {
17476     TEST_REQUIRES_X86_XOP;
17477     for (size_t k = 1; k <= 40; k += 9) {
17478       GemmMicrokernelTester()
17479         .mr(4)
17480         .nr(4)
17481         .kr(2)
17482         .sr(4)
17483         .m(4)
17484         .n(4)
17485         .k(k)
17486         .ks(3)
17487         .a_offset(163)
17488         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17489     }
17490   }
17491 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,zero)17492   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, zero) {
17493     TEST_REQUIRES_X86_XOP;
17494     for (size_t k = 1; k <= 40; k += 9) {
17495       for (uint32_t mz = 0; mz < 4; mz++) {
17496         GemmMicrokernelTester()
17497           .mr(4)
17498           .nr(4)
17499           .kr(2)
17500           .sr(4)
17501           .m(4)
17502           .n(4)
17503           .k(k)
17504           .ks(3)
17505           .a_offset(163)
17506           .zero_index(mz)
17507           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17508       }
17509     }
17510   }
17511 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmin)17512   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmin) {
17513     TEST_REQUIRES_X86_XOP;
17514     GemmMicrokernelTester()
17515       .mr(4)
17516       .nr(4)
17517       .kr(2)
17518       .sr(4)
17519       .m(4)
17520       .n(4)
17521       .k(8)
17522       .qmin(128)
17523       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17524   }
17525 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmax)17526   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmax) {
17527     TEST_REQUIRES_X86_XOP;
17528     GemmMicrokernelTester()
17529       .mr(4)
17530       .nr(4)
17531       .kr(2)
17532       .sr(4)
17533       .m(4)
17534       .n(4)
17535       .k(8)
17536       .qmax(128)
17537       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17538   }
17539 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm)17540   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm) {
17541     TEST_REQUIRES_X86_XOP;
17542     GemmMicrokernelTester()
17543       .mr(4)
17544       .nr(4)
17545       .kr(2)
17546       .sr(4)
17547       .m(4)
17548       .n(4)
17549       .k(8)
17550       .cm_stride(7)
17551       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17552   }
17553 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_a_zero_point)17554   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_a_zero_point) {
17555     TEST_REQUIRES_X86_XOP;
17556     for (size_t k = 1; k <= 40; k += 9) {
17557       GemmMicrokernelTester()
17558         .mr(4)
17559         .nr(4)
17560         .kr(2)
17561         .sr(4)
17562         .m(4)
17563         .n(4)
17564         .k(k)
17565         .a_zero_point(0)
17566         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17567     }
17568   }
17569 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_b_zero_point)17570   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_b_zero_point) {
17571     TEST_REQUIRES_X86_XOP;
17572     for (size_t k = 1; k <= 40; k += 9) {
17573       GemmMicrokernelTester()
17574         .mr(4)
17575         .nr(4)
17576         .kr(2)
17577         .sr(4)
17578         .m(4)
17579         .n(4)
17580         .k(k)
17581         .b_zero_point(0)
17582         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17583     }
17584   }
17585 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,no_zero_point)17586   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, no_zero_point) {
17587     TEST_REQUIRES_X86_XOP;
17588     for (size_t k = 1; k <= 40; k += 9) {
17589       GemmMicrokernelTester()
17590         .mr(4)
17591         .nr(4)
17592         .kr(2)
17593         .sr(4)
17594         .m(4)
17595         .n(4)
17596         .k(k)
17597         .a_zero_point(0)
17598         .b_zero_point(0)
17599         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17600     }
17601   }
17602 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17603 
17604 
17605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)17606   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
17607     TEST_REQUIRES_X86_SSE2;
17608     GemmMicrokernelTester()
17609       .mr(1)
17610       .nr(4)
17611       .kr(2)
17612       .sr(4)
17613       .m(1)
17614       .n(4)
17615       .k(8)
17616       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17617   }
17618 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)17619   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
17620     TEST_REQUIRES_X86_SSE2;
17621     GemmMicrokernelTester()
17622       .mr(1)
17623       .nr(4)
17624       .kr(2)
17625       .sr(4)
17626       .m(1)
17627       .n(4)
17628       .k(8)
17629       .cn_stride(7)
17630       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17631   }
17632 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)17633   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
17634     TEST_REQUIRES_X86_SSE2;
17635     for (uint32_t n = 1; n <= 4; n++) {
17636       for (uint32_t m = 1; m <= 1; m++) {
17637         GemmMicrokernelTester()
17638           .mr(1)
17639           .nr(4)
17640           .kr(2)
17641           .sr(4)
17642           .m(m)
17643           .n(n)
17644           .k(8)
17645           .iterations(1)
17646           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17647       }
17648     }
17649   }
17650 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)17651   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
17652     TEST_REQUIRES_X86_SSE2;
17653     for (uint32_t m = 1; m <= 1; m++) {
17654       GemmMicrokernelTester()
17655         .mr(1)
17656         .nr(4)
17657         .kr(2)
17658         .sr(4)
17659         .m(m)
17660         .n(4)
17661         .k(8)
17662         .iterations(1)
17663         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17664     }
17665   }
17666 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)17667   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
17668     TEST_REQUIRES_X86_SSE2;
17669     for (uint32_t n = 1; n <= 4; n++) {
17670       GemmMicrokernelTester()
17671         .mr(1)
17672         .nr(4)
17673         .kr(2)
17674         .sr(4)
17675         .m(1)
17676         .n(n)
17677         .k(8)
17678         .iterations(1)
17679         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17680     }
17681   }
17682 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)17683   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
17684     TEST_REQUIRES_X86_SSE2;
17685     for (size_t k = 1; k < 8; k++) {
17686       GemmMicrokernelTester()
17687         .mr(1)
17688         .nr(4)
17689         .kr(2)
17690         .sr(4)
17691         .m(1)
17692         .n(4)
17693         .k(k)
17694         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17695     }
17696   }
17697 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)17698   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
17699     TEST_REQUIRES_X86_SSE2;
17700     for (size_t k = 1; k < 8; k++) {
17701       for (uint32_t n = 1; n <= 4; n++) {
17702         for (uint32_t m = 1; m <= 1; m++) {
17703           GemmMicrokernelTester()
17704             .mr(1)
17705             .nr(4)
17706             .kr(2)
17707             .sr(4)
17708             .m(m)
17709             .n(n)
17710             .k(k)
17711             .iterations(1)
17712             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17713         }
17714       }
17715     }
17716   }
17717 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)17718   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
17719     TEST_REQUIRES_X86_SSE2;
17720     for (size_t k = 9; k < 16; k++) {
17721       GemmMicrokernelTester()
17722         .mr(1)
17723         .nr(4)
17724         .kr(2)
17725         .sr(4)
17726         .m(1)
17727         .n(4)
17728         .k(k)
17729         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17730     }
17731   }
17732 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)17733   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
17734     TEST_REQUIRES_X86_SSE2;
17735     for (size_t k = 9; k < 16; k++) {
17736       for (uint32_t n = 1; n <= 4; n++) {
17737         for (uint32_t m = 1; m <= 1; m++) {
17738           GemmMicrokernelTester()
17739             .mr(1)
17740             .nr(4)
17741             .kr(2)
17742             .sr(4)
17743             .m(m)
17744             .n(n)
17745             .k(k)
17746             .iterations(1)
17747             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17748         }
17749       }
17750     }
17751   }
17752 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)17753   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
17754     TEST_REQUIRES_X86_SSE2;
17755     for (size_t k = 16; k <= 80; k += 8) {
17756       GemmMicrokernelTester()
17757         .mr(1)
17758         .nr(4)
17759         .kr(2)
17760         .sr(4)
17761         .m(1)
17762         .n(4)
17763         .k(k)
17764         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17765     }
17766   }
17767 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)17768   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
17769     TEST_REQUIRES_X86_SSE2;
17770     for (size_t k = 16; k <= 80; k += 8) {
17771       for (uint32_t n = 1; n <= 4; n++) {
17772         for (uint32_t m = 1; m <= 1; m++) {
17773           GemmMicrokernelTester()
17774             .mr(1)
17775             .nr(4)
17776             .kr(2)
17777             .sr(4)
17778             .m(m)
17779             .n(n)
17780             .k(k)
17781             .iterations(1)
17782             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17783         }
17784       }
17785     }
17786   }
17787 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)17788   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
17789     TEST_REQUIRES_X86_SSE2;
17790     for (uint32_t n = 5; n < 8; n++) {
17791       for (size_t k = 1; k <= 40; k += 9) {
17792         GemmMicrokernelTester()
17793           .mr(1)
17794           .nr(4)
17795           .kr(2)
17796           .sr(4)
17797           .m(1)
17798           .n(n)
17799           .k(k)
17800           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17801       }
17802     }
17803   }
17804 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)17805   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
17806     TEST_REQUIRES_X86_SSE2;
17807     for (uint32_t n = 5; n < 8; n++) {
17808       for (size_t k = 1; k <= 40; k += 9) {
17809         GemmMicrokernelTester()
17810           .mr(1)
17811           .nr(4)
17812           .kr(2)
17813           .sr(4)
17814           .m(1)
17815           .n(n)
17816           .k(k)
17817           .cn_stride(7)
17818           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17819       }
17820     }
17821   }
17822 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)17823   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
17824     TEST_REQUIRES_X86_SSE2;
17825     for (uint32_t n = 5; n < 8; n++) {
17826       for (size_t k = 1; k <= 40; k += 9) {
17827         for (uint32_t m = 1; m <= 1; m++) {
17828           GemmMicrokernelTester()
17829             .mr(1)
17830             .nr(4)
17831             .kr(2)
17832             .sr(4)
17833             .m(m)
17834             .n(n)
17835             .k(k)
17836             .iterations(1)
17837             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17838         }
17839       }
17840     }
17841   }
17842 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)17843   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
17844     TEST_REQUIRES_X86_SSE2;
17845     for (uint32_t n = 8; n <= 12; n += 4) {
17846       for (size_t k = 1; k <= 40; k += 9) {
17847         GemmMicrokernelTester()
17848           .mr(1)
17849           .nr(4)
17850           .kr(2)
17851           .sr(4)
17852           .m(1)
17853           .n(n)
17854           .k(k)
17855           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17856       }
17857     }
17858   }
17859 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)17860   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
17861     TEST_REQUIRES_X86_SSE2;
17862     for (uint32_t n = 8; n <= 12; n += 4) {
17863       for (size_t k = 1; k <= 40; k += 9) {
17864         GemmMicrokernelTester()
17865           .mr(1)
17866           .nr(4)
17867           .kr(2)
17868           .sr(4)
17869           .m(1)
17870           .n(n)
17871           .k(k)
17872           .cn_stride(7)
17873           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17874       }
17875     }
17876   }
17877 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)17878   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
17879     TEST_REQUIRES_X86_SSE2;
17880     for (uint32_t n = 8; n <= 12; n += 4) {
17881       for (size_t k = 1; k <= 40; k += 9) {
17882         for (uint32_t m = 1; m <= 1; m++) {
17883           GemmMicrokernelTester()
17884             .mr(1)
17885             .nr(4)
17886             .kr(2)
17887             .sr(4)
17888             .m(m)
17889             .n(n)
17890             .k(k)
17891             .iterations(1)
17892             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17893         }
17894       }
17895     }
17896   }
17897 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel)17898   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel) {
17899     TEST_REQUIRES_X86_SSE2;
17900     for (size_t k = 1; k <= 40; k += 9) {
17901       GemmMicrokernelTester()
17902         .mr(1)
17903         .nr(4)
17904         .kr(2)
17905         .sr(4)
17906         .m(1)
17907         .n(4)
17908         .k(k)
17909         .ks(3)
17910         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17911     }
17912   }
17913 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel_subtile)17914   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel_subtile) {
17915     TEST_REQUIRES_X86_SSE2;
17916     for (size_t k = 1; k <= 40; k += 9) {
17917       for (uint32_t n = 1; n <= 4; n++) {
17918         for (uint32_t m = 1; m <= 1; m++) {
17919           GemmMicrokernelTester()
17920             .mr(1)
17921             .nr(4)
17922             .kr(2)
17923             .sr(4)
17924             .m(m)
17925             .n(n)
17926             .k(k)
17927             .ks(3)
17928             .iterations(1)
17929             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17930         }
17931       }
17932     }
17933   }
17934 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_small_kernel)17935   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
17936     TEST_REQUIRES_X86_SSE2;
17937     for (uint32_t n = 5; n < 8; n++) {
17938       for (size_t k = 1; k <= 40; k += 9) {
17939         GemmMicrokernelTester()
17940           .mr(1)
17941           .nr(4)
17942           .kr(2)
17943           .sr(4)
17944           .m(1)
17945           .n(n)
17946           .k(k)
17947           .ks(3)
17948           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17949       }
17950     }
17951   }
17952 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_small_kernel)17953   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
17954     TEST_REQUIRES_X86_SSE2;
17955     for (uint32_t n = 8; n <= 12; n += 4) {
17956       for (size_t k = 1; k <= 40; k += 9) {
17957         GemmMicrokernelTester()
17958           .mr(1)
17959           .nr(4)
17960           .kr(2)
17961           .sr(4)
17962           .m(1)
17963           .n(n)
17964           .k(k)
17965           .ks(3)
17966           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17967       }
17968     }
17969   }
17970 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)17971   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
17972     TEST_REQUIRES_X86_SSE2;
17973     for (size_t k = 1; k <= 40; k += 9) {
17974       for (uint32_t n = 1; n <= 4; n++) {
17975         for (uint32_t m = 1; m <= 1; m++) {
17976           GemmMicrokernelTester()
17977             .mr(1)
17978             .nr(4)
17979             .kr(2)
17980             .sr(4)
17981             .m(m)
17982             .n(n)
17983             .k(k)
17984             .cm_stride(7)
17985             .iterations(1)
17986             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17987         }
17988       }
17989     }
17990   }
17991 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,a_offset)17992   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, a_offset) {
17993     TEST_REQUIRES_X86_SSE2;
17994     for (size_t k = 1; k <= 40; k += 9) {
17995       GemmMicrokernelTester()
17996         .mr(1)
17997         .nr(4)
17998         .kr(2)
17999         .sr(4)
18000         .m(1)
18001         .n(4)
18002         .k(k)
18003         .ks(3)
18004         .a_offset(43)
18005         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18006     }
18007   }
18008 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,zero)18009   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, zero) {
18010     TEST_REQUIRES_X86_SSE2;
18011     for (size_t k = 1; k <= 40; k += 9) {
18012       for (uint32_t mz = 0; mz < 1; mz++) {
18013         GemmMicrokernelTester()
18014           .mr(1)
18015           .nr(4)
18016           .kr(2)
18017           .sr(4)
18018           .m(1)
18019           .n(4)
18020           .k(k)
18021           .ks(3)
18022           .a_offset(43)
18023           .zero_index(mz)
18024           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18025       }
18026     }
18027   }
18028 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)18029   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
18030     TEST_REQUIRES_X86_SSE2;
18031     GemmMicrokernelTester()
18032       .mr(1)
18033       .nr(4)
18034       .kr(2)
18035       .sr(4)
18036       .m(1)
18037       .n(4)
18038       .k(8)
18039       .qmin(128)
18040       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18041   }
18042 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)18043   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
18044     TEST_REQUIRES_X86_SSE2;
18045     GemmMicrokernelTester()
18046       .mr(1)
18047       .nr(4)
18048       .kr(2)
18049       .sr(4)
18050       .m(1)
18051       .n(4)
18052       .k(8)
18053       .qmax(128)
18054       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18055   }
18056 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)18057   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
18058     TEST_REQUIRES_X86_SSE2;
18059     GemmMicrokernelTester()
18060       .mr(1)
18061       .nr(4)
18062       .kr(2)
18063       .sr(4)
18064       .m(1)
18065       .n(4)
18066       .k(8)
18067       .cm_stride(7)
18068       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18069   }
18070 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_a_zero_point)18071   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_a_zero_point) {
18072     TEST_REQUIRES_X86_SSE2;
18073     for (size_t k = 1; k <= 40; k += 9) {
18074       GemmMicrokernelTester()
18075         .mr(1)
18076         .nr(4)
18077         .kr(2)
18078         .sr(4)
18079         .m(1)
18080         .n(4)
18081         .k(k)
18082         .a_zero_point(0)
18083         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18084     }
18085   }
18086 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_b_zero_point)18087   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_b_zero_point) {
18088     TEST_REQUIRES_X86_SSE2;
18089     for (size_t k = 1; k <= 40; k += 9) {
18090       GemmMicrokernelTester()
18091         .mr(1)
18092         .nr(4)
18093         .kr(2)
18094         .sr(4)
18095         .m(1)
18096         .n(4)
18097         .k(k)
18098         .b_zero_point(0)
18099         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18100     }
18101   }
18102 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,no_zero_point)18103   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, no_zero_point) {
18104     TEST_REQUIRES_X86_SSE2;
18105     for (size_t k = 1; k <= 40; k += 9) {
18106       GemmMicrokernelTester()
18107         .mr(1)
18108         .nr(4)
18109         .kr(2)
18110         .sr(4)
18111         .m(1)
18112         .n(4)
18113         .k(k)
18114         .a_zero_point(0)
18115         .b_zero_point(0)
18116         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18117     }
18118   }
18119 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18120 
18121 
18122 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)18123   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
18124     TEST_REQUIRES_X86_SSE2;
18125     GemmMicrokernelTester()
18126       .mr(4)
18127       .nr(4)
18128       .kr(2)
18129       .sr(4)
18130       .m(4)
18131       .n(4)
18132       .k(8)
18133       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18134   }
18135 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)18136   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
18137     TEST_REQUIRES_X86_SSE2;
18138     GemmMicrokernelTester()
18139       .mr(4)
18140       .nr(4)
18141       .kr(2)
18142       .sr(4)
18143       .m(4)
18144       .n(4)
18145       .k(8)
18146       .cn_stride(7)
18147       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18148   }
18149 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)18150   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
18151     TEST_REQUIRES_X86_SSE2;
18152     for (uint32_t n = 1; n <= 4; n++) {
18153       for (uint32_t m = 1; m <= 4; m++) {
18154         GemmMicrokernelTester()
18155           .mr(4)
18156           .nr(4)
18157           .kr(2)
18158           .sr(4)
18159           .m(m)
18160           .n(n)
18161           .k(8)
18162           .iterations(1)
18163           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18164       }
18165     }
18166   }
18167 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)18168   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
18169     TEST_REQUIRES_X86_SSE2;
18170     for (uint32_t m = 1; m <= 4; m++) {
18171       GemmMicrokernelTester()
18172         .mr(4)
18173         .nr(4)
18174         .kr(2)
18175         .sr(4)
18176         .m(m)
18177         .n(4)
18178         .k(8)
18179         .iterations(1)
18180         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18181     }
18182   }
18183 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)18184   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
18185     TEST_REQUIRES_X86_SSE2;
18186     for (uint32_t n = 1; n <= 4; n++) {
18187       GemmMicrokernelTester()
18188         .mr(4)
18189         .nr(4)
18190         .kr(2)
18191         .sr(4)
18192         .m(4)
18193         .n(n)
18194         .k(8)
18195         .iterations(1)
18196         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18197     }
18198   }
18199 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)18200   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
18201     TEST_REQUIRES_X86_SSE2;
18202     for (size_t k = 1; k < 8; k++) {
18203       GemmMicrokernelTester()
18204         .mr(4)
18205         .nr(4)
18206         .kr(2)
18207         .sr(4)
18208         .m(4)
18209         .n(4)
18210         .k(k)
18211         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18212     }
18213   }
18214 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)18215   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
18216     TEST_REQUIRES_X86_SSE2;
18217     for (size_t k = 1; k < 8; k++) {
18218       for (uint32_t n = 1; n <= 4; n++) {
18219         for (uint32_t m = 1; m <= 4; m++) {
18220           GemmMicrokernelTester()
18221             .mr(4)
18222             .nr(4)
18223             .kr(2)
18224             .sr(4)
18225             .m(m)
18226             .n(n)
18227             .k(k)
18228             .iterations(1)
18229             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18230         }
18231       }
18232     }
18233   }
18234 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)18235   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
18236     TEST_REQUIRES_X86_SSE2;
18237     for (size_t k = 9; k < 16; k++) {
18238       GemmMicrokernelTester()
18239         .mr(4)
18240         .nr(4)
18241         .kr(2)
18242         .sr(4)
18243         .m(4)
18244         .n(4)
18245         .k(k)
18246         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18247     }
18248   }
18249 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)18250   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
18251     TEST_REQUIRES_X86_SSE2;
18252     for (size_t k = 9; k < 16; k++) {
18253       for (uint32_t n = 1; n <= 4; n++) {
18254         for (uint32_t m = 1; m <= 4; m++) {
18255           GemmMicrokernelTester()
18256             .mr(4)
18257             .nr(4)
18258             .kr(2)
18259             .sr(4)
18260             .m(m)
18261             .n(n)
18262             .k(k)
18263             .iterations(1)
18264             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18265         }
18266       }
18267     }
18268   }
18269 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)18270   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
18271     TEST_REQUIRES_X86_SSE2;
18272     for (size_t k = 16; k <= 80; k += 8) {
18273       GemmMicrokernelTester()
18274         .mr(4)
18275         .nr(4)
18276         .kr(2)
18277         .sr(4)
18278         .m(4)
18279         .n(4)
18280         .k(k)
18281         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18282     }
18283   }
18284 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)18285   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
18286     TEST_REQUIRES_X86_SSE2;
18287     for (size_t k = 16; k <= 80; k += 8) {
18288       for (uint32_t n = 1; n <= 4; n++) {
18289         for (uint32_t m = 1; m <= 4; m++) {
18290           GemmMicrokernelTester()
18291             .mr(4)
18292             .nr(4)
18293             .kr(2)
18294             .sr(4)
18295             .m(m)
18296             .n(n)
18297             .k(k)
18298             .iterations(1)
18299             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18300         }
18301       }
18302     }
18303   }
18304 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)18305   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
18306     TEST_REQUIRES_X86_SSE2;
18307     for (uint32_t n = 5; n < 8; n++) {
18308       for (size_t k = 1; k <= 40; k += 9) {
18309         GemmMicrokernelTester()
18310           .mr(4)
18311           .nr(4)
18312           .kr(2)
18313           .sr(4)
18314           .m(4)
18315           .n(n)
18316           .k(k)
18317           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18318       }
18319     }
18320   }
18321 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)18322   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
18323     TEST_REQUIRES_X86_SSE2;
18324     for (uint32_t n = 5; n < 8; n++) {
18325       for (size_t k = 1; k <= 40; k += 9) {
18326         GemmMicrokernelTester()
18327           .mr(4)
18328           .nr(4)
18329           .kr(2)
18330           .sr(4)
18331           .m(4)
18332           .n(n)
18333           .k(k)
18334           .cn_stride(7)
18335           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18336       }
18337     }
18338   }
18339 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)18340   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
18341     TEST_REQUIRES_X86_SSE2;
18342     for (uint32_t n = 5; n < 8; n++) {
18343       for (size_t k = 1; k <= 40; k += 9) {
18344         for (uint32_t m = 1; m <= 4; m++) {
18345           GemmMicrokernelTester()
18346             .mr(4)
18347             .nr(4)
18348             .kr(2)
18349             .sr(4)
18350             .m(m)
18351             .n(n)
18352             .k(k)
18353             .iterations(1)
18354             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18355         }
18356       }
18357     }
18358   }
18359 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)18360   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
18361     TEST_REQUIRES_X86_SSE2;
18362     for (uint32_t n = 8; n <= 12; n += 4) {
18363       for (size_t k = 1; k <= 40; k += 9) {
18364         GemmMicrokernelTester()
18365           .mr(4)
18366           .nr(4)
18367           .kr(2)
18368           .sr(4)
18369           .m(4)
18370           .n(n)
18371           .k(k)
18372           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18373       }
18374     }
18375   }
18376 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)18377   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
18378     TEST_REQUIRES_X86_SSE2;
18379     for (uint32_t n = 8; n <= 12; n += 4) {
18380       for (size_t k = 1; k <= 40; k += 9) {
18381         GemmMicrokernelTester()
18382           .mr(4)
18383           .nr(4)
18384           .kr(2)
18385           .sr(4)
18386           .m(4)
18387           .n(n)
18388           .k(k)
18389           .cn_stride(7)
18390           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18391       }
18392     }
18393   }
18394 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)18395   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
18396     TEST_REQUIRES_X86_SSE2;
18397     for (uint32_t n = 8; n <= 12; n += 4) {
18398       for (size_t k = 1; k <= 40; k += 9) {
18399         for (uint32_t m = 1; m <= 4; m++) {
18400           GemmMicrokernelTester()
18401             .mr(4)
18402             .nr(4)
18403             .kr(2)
18404             .sr(4)
18405             .m(m)
18406             .n(n)
18407             .k(k)
18408             .iterations(1)
18409             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18410         }
18411       }
18412     }
18413   }
18414 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel)18415   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel) {
18416     TEST_REQUIRES_X86_SSE2;
18417     for (size_t k = 1; k <= 40; k += 9) {
18418       GemmMicrokernelTester()
18419         .mr(4)
18420         .nr(4)
18421         .kr(2)
18422         .sr(4)
18423         .m(4)
18424         .n(4)
18425         .k(k)
18426         .ks(3)
18427         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18428     }
18429   }
18430 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel_subtile)18431   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel_subtile) {
18432     TEST_REQUIRES_X86_SSE2;
18433     for (size_t k = 1; k <= 40; k += 9) {
18434       for (uint32_t n = 1; n <= 4; n++) {
18435         for (uint32_t m = 1; m <= 4; m++) {
18436           GemmMicrokernelTester()
18437             .mr(4)
18438             .nr(4)
18439             .kr(2)
18440             .sr(4)
18441             .m(m)
18442             .n(n)
18443             .k(k)
18444             .ks(3)
18445             .iterations(1)
18446             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18447         }
18448       }
18449     }
18450   }
18451 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_small_kernel)18452   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
18453     TEST_REQUIRES_X86_SSE2;
18454     for (uint32_t n = 5; n < 8; n++) {
18455       for (size_t k = 1; k <= 40; k += 9) {
18456         GemmMicrokernelTester()
18457           .mr(4)
18458           .nr(4)
18459           .kr(2)
18460           .sr(4)
18461           .m(4)
18462           .n(n)
18463           .k(k)
18464           .ks(3)
18465           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18466       }
18467     }
18468   }
18469 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_small_kernel)18470   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
18471     TEST_REQUIRES_X86_SSE2;
18472     for (uint32_t n = 8; n <= 12; n += 4) {
18473       for (size_t k = 1; k <= 40; k += 9) {
18474         GemmMicrokernelTester()
18475           .mr(4)
18476           .nr(4)
18477           .kr(2)
18478           .sr(4)
18479           .m(4)
18480           .n(n)
18481           .k(k)
18482           .ks(3)
18483           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18484       }
18485     }
18486   }
18487 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)18488   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
18489     TEST_REQUIRES_X86_SSE2;
18490     for (size_t k = 1; k <= 40; k += 9) {
18491       for (uint32_t n = 1; n <= 4; n++) {
18492         for (uint32_t m = 1; m <= 4; m++) {
18493           GemmMicrokernelTester()
18494             .mr(4)
18495             .nr(4)
18496             .kr(2)
18497             .sr(4)
18498             .m(m)
18499             .n(n)
18500             .k(k)
18501             .cm_stride(7)
18502             .iterations(1)
18503             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18504         }
18505       }
18506     }
18507   }
18508 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,a_offset)18509   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, a_offset) {
18510     TEST_REQUIRES_X86_SSE2;
18511     for (size_t k = 1; k <= 40; k += 9) {
18512       GemmMicrokernelTester()
18513         .mr(4)
18514         .nr(4)
18515         .kr(2)
18516         .sr(4)
18517         .m(4)
18518         .n(4)
18519         .k(k)
18520         .ks(3)
18521         .a_offset(163)
18522         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18523     }
18524   }
18525 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,zero)18526   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, zero) {
18527     TEST_REQUIRES_X86_SSE2;
18528     for (size_t k = 1; k <= 40; k += 9) {
18529       for (uint32_t mz = 0; mz < 4; mz++) {
18530         GemmMicrokernelTester()
18531           .mr(4)
18532           .nr(4)
18533           .kr(2)
18534           .sr(4)
18535           .m(4)
18536           .n(4)
18537           .k(k)
18538           .ks(3)
18539           .a_offset(163)
18540           .zero_index(mz)
18541           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18542       }
18543     }
18544   }
18545 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)18546   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
18547     TEST_REQUIRES_X86_SSE2;
18548     GemmMicrokernelTester()
18549       .mr(4)
18550       .nr(4)
18551       .kr(2)
18552       .sr(4)
18553       .m(4)
18554       .n(4)
18555       .k(8)
18556       .qmin(128)
18557       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18558   }
18559 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)18560   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
18561     TEST_REQUIRES_X86_SSE2;
18562     GemmMicrokernelTester()
18563       .mr(4)
18564       .nr(4)
18565       .kr(2)
18566       .sr(4)
18567       .m(4)
18568       .n(4)
18569       .k(8)
18570       .qmax(128)
18571       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18572   }
18573 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)18574   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
18575     TEST_REQUIRES_X86_SSE2;
18576     GemmMicrokernelTester()
18577       .mr(4)
18578       .nr(4)
18579       .kr(2)
18580       .sr(4)
18581       .m(4)
18582       .n(4)
18583       .k(8)
18584       .cm_stride(7)
18585       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18586   }
18587 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_a_zero_point)18588   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_a_zero_point) {
18589     TEST_REQUIRES_X86_SSE2;
18590     for (size_t k = 1; k <= 40; k += 9) {
18591       GemmMicrokernelTester()
18592         .mr(4)
18593         .nr(4)
18594         .kr(2)
18595         .sr(4)
18596         .m(4)
18597         .n(4)
18598         .k(k)
18599         .a_zero_point(0)
18600         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18601     }
18602   }
18603 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_b_zero_point)18604   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_b_zero_point) {
18605     TEST_REQUIRES_X86_SSE2;
18606     for (size_t k = 1; k <= 40; k += 9) {
18607       GemmMicrokernelTester()
18608         .mr(4)
18609         .nr(4)
18610         .kr(2)
18611         .sr(4)
18612         .m(4)
18613         .n(4)
18614         .k(k)
18615         .b_zero_point(0)
18616         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18617     }
18618   }
18619 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,no_zero_point)18620   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, no_zero_point) {
18621     TEST_REQUIRES_X86_SSE2;
18622     for (size_t k = 1; k <= 40; k += 9) {
18623       GemmMicrokernelTester()
18624         .mr(4)
18625         .nr(4)
18626         .kr(2)
18627         .sr(4)
18628         .m(4)
18629         .n(4)
18630         .k(k)
18631         .a_zero_point(0)
18632         .b_zero_point(0)
18633         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18634     }
18635   }
18636 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18637 
18638 
18639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)18640   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
18641     TEST_REQUIRES_X86_SSE41;
18642     GemmMicrokernelTester()
18643       .mr(4)
18644       .nr(4)
18645       .kr(2)
18646       .sr(4)
18647       .m(4)
18648       .n(4)
18649       .k(8)
18650       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18651   }
18652 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)18653   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
18654     TEST_REQUIRES_X86_SSE41;
18655     GemmMicrokernelTester()
18656       .mr(4)
18657       .nr(4)
18658       .kr(2)
18659       .sr(4)
18660       .m(4)
18661       .n(4)
18662       .k(8)
18663       .cn_stride(7)
18664       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18665   }
18666 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)18667   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
18668     TEST_REQUIRES_X86_SSE41;
18669     for (uint32_t n = 1; n <= 4; n++) {
18670       for (uint32_t m = 1; m <= 4; m++) {
18671         GemmMicrokernelTester()
18672           .mr(4)
18673           .nr(4)
18674           .kr(2)
18675           .sr(4)
18676           .m(m)
18677           .n(n)
18678           .k(8)
18679           .iterations(1)
18680           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18681       }
18682     }
18683   }
18684 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)18685   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
18686     TEST_REQUIRES_X86_SSE41;
18687     for (uint32_t m = 1; m <= 4; m++) {
18688       GemmMicrokernelTester()
18689         .mr(4)
18690         .nr(4)
18691         .kr(2)
18692         .sr(4)
18693         .m(m)
18694         .n(4)
18695         .k(8)
18696         .iterations(1)
18697         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18698     }
18699   }
18700 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)18701   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
18702     TEST_REQUIRES_X86_SSE41;
18703     for (uint32_t n = 1; n <= 4; n++) {
18704       GemmMicrokernelTester()
18705         .mr(4)
18706         .nr(4)
18707         .kr(2)
18708         .sr(4)
18709         .m(4)
18710         .n(n)
18711         .k(8)
18712         .iterations(1)
18713         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18714     }
18715   }
18716 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)18717   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
18718     TEST_REQUIRES_X86_SSE41;
18719     for (size_t k = 1; k < 8; k++) {
18720       GemmMicrokernelTester()
18721         .mr(4)
18722         .nr(4)
18723         .kr(2)
18724         .sr(4)
18725         .m(4)
18726         .n(4)
18727         .k(k)
18728         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18729     }
18730   }
18731 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)18732   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
18733     TEST_REQUIRES_X86_SSE41;
18734     for (size_t k = 1; k < 8; k++) {
18735       for (uint32_t n = 1; n <= 4; n++) {
18736         for (uint32_t m = 1; m <= 4; m++) {
18737           GemmMicrokernelTester()
18738             .mr(4)
18739             .nr(4)
18740             .kr(2)
18741             .sr(4)
18742             .m(m)
18743             .n(n)
18744             .k(k)
18745             .iterations(1)
18746             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18747         }
18748       }
18749     }
18750   }
18751 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)18752   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
18753     TEST_REQUIRES_X86_SSE41;
18754     for (size_t k = 9; k < 16; k++) {
18755       GemmMicrokernelTester()
18756         .mr(4)
18757         .nr(4)
18758         .kr(2)
18759         .sr(4)
18760         .m(4)
18761         .n(4)
18762         .k(k)
18763         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18764     }
18765   }
18766 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)18767   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
18768     TEST_REQUIRES_X86_SSE41;
18769     for (size_t k = 9; k < 16; k++) {
18770       for (uint32_t n = 1; n <= 4; n++) {
18771         for (uint32_t m = 1; m <= 4; m++) {
18772           GemmMicrokernelTester()
18773             .mr(4)
18774             .nr(4)
18775             .kr(2)
18776             .sr(4)
18777             .m(m)
18778             .n(n)
18779             .k(k)
18780             .iterations(1)
18781             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18782         }
18783       }
18784     }
18785   }
18786 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)18787   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
18788     TEST_REQUIRES_X86_SSE41;
18789     for (size_t k = 16; k <= 80; k += 8) {
18790       GemmMicrokernelTester()
18791         .mr(4)
18792         .nr(4)
18793         .kr(2)
18794         .sr(4)
18795         .m(4)
18796         .n(4)
18797         .k(k)
18798         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18799     }
18800   }
18801 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)18802   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
18803     TEST_REQUIRES_X86_SSE41;
18804     for (size_t k = 16; k <= 80; k += 8) {
18805       for (uint32_t n = 1; n <= 4; n++) {
18806         for (uint32_t m = 1; m <= 4; m++) {
18807           GemmMicrokernelTester()
18808             .mr(4)
18809             .nr(4)
18810             .kr(2)
18811             .sr(4)
18812             .m(m)
18813             .n(n)
18814             .k(k)
18815             .iterations(1)
18816             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18817         }
18818       }
18819     }
18820   }
18821 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)18822   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
18823     TEST_REQUIRES_X86_SSE41;
18824     for (uint32_t n = 5; n < 8; n++) {
18825       for (size_t k = 1; k <= 40; k += 9) {
18826         GemmMicrokernelTester()
18827           .mr(4)
18828           .nr(4)
18829           .kr(2)
18830           .sr(4)
18831           .m(4)
18832           .n(n)
18833           .k(k)
18834           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18835       }
18836     }
18837   }
18838 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)18839   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
18840     TEST_REQUIRES_X86_SSE41;
18841     for (uint32_t n = 5; n < 8; n++) {
18842       for (size_t k = 1; k <= 40; k += 9) {
18843         GemmMicrokernelTester()
18844           .mr(4)
18845           .nr(4)
18846           .kr(2)
18847           .sr(4)
18848           .m(4)
18849           .n(n)
18850           .k(k)
18851           .cn_stride(7)
18852           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18853       }
18854     }
18855   }
18856 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)18857   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
18858     TEST_REQUIRES_X86_SSE41;
18859     for (uint32_t n = 5; n < 8; n++) {
18860       for (size_t k = 1; k <= 40; k += 9) {
18861         for (uint32_t m = 1; m <= 4; m++) {
18862           GemmMicrokernelTester()
18863             .mr(4)
18864             .nr(4)
18865             .kr(2)
18866             .sr(4)
18867             .m(m)
18868             .n(n)
18869             .k(k)
18870             .iterations(1)
18871             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18872         }
18873       }
18874     }
18875   }
18876 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)18877   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
18878     TEST_REQUIRES_X86_SSE41;
18879     for (uint32_t n = 8; n <= 12; n += 4) {
18880       for (size_t k = 1; k <= 40; k += 9) {
18881         GemmMicrokernelTester()
18882           .mr(4)
18883           .nr(4)
18884           .kr(2)
18885           .sr(4)
18886           .m(4)
18887           .n(n)
18888           .k(k)
18889           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18890       }
18891     }
18892   }
18893 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)18894   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
18895     TEST_REQUIRES_X86_SSE41;
18896     for (uint32_t n = 8; n <= 12; n += 4) {
18897       for (size_t k = 1; k <= 40; k += 9) {
18898         GemmMicrokernelTester()
18899           .mr(4)
18900           .nr(4)
18901           .kr(2)
18902           .sr(4)
18903           .m(4)
18904           .n(n)
18905           .k(k)
18906           .cn_stride(7)
18907           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18908       }
18909     }
18910   }
18911 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)18912   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
18913     TEST_REQUIRES_X86_SSE41;
18914     for (uint32_t n = 8; n <= 12; n += 4) {
18915       for (size_t k = 1; k <= 40; k += 9) {
18916         for (uint32_t m = 1; m <= 4; m++) {
18917           GemmMicrokernelTester()
18918             .mr(4)
18919             .nr(4)
18920             .kr(2)
18921             .sr(4)
18922             .m(m)
18923             .n(n)
18924             .k(k)
18925             .iterations(1)
18926             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18927         }
18928       }
18929     }
18930   }
18931 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)18932   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
18933     TEST_REQUIRES_X86_SSE41;
18934     for (size_t k = 1; k <= 40; k += 9) {
18935       GemmMicrokernelTester()
18936         .mr(4)
18937         .nr(4)
18938         .kr(2)
18939         .sr(4)
18940         .m(4)
18941         .n(4)
18942         .k(k)
18943         .ks(3)
18944         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18945     }
18946   }
18947 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)18948   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
18949     TEST_REQUIRES_X86_SSE41;
18950     for (size_t k = 1; k <= 40; k += 9) {
18951       for (uint32_t n = 1; n <= 4; n++) {
18952         for (uint32_t m = 1; m <= 4; m++) {
18953           GemmMicrokernelTester()
18954             .mr(4)
18955             .nr(4)
18956             .kr(2)
18957             .sr(4)
18958             .m(m)
18959             .n(n)
18960             .k(k)
18961             .ks(3)
18962             .iterations(1)
18963             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18964         }
18965       }
18966     }
18967   }
18968 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)18969   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
18970     TEST_REQUIRES_X86_SSE41;
18971     for (uint32_t n = 5; n < 8; n++) {
18972       for (size_t k = 1; k <= 40; k += 9) {
18973         GemmMicrokernelTester()
18974           .mr(4)
18975           .nr(4)
18976           .kr(2)
18977           .sr(4)
18978           .m(4)
18979           .n(n)
18980           .k(k)
18981           .ks(3)
18982           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18983       }
18984     }
18985   }
18986 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)18987   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
18988     TEST_REQUIRES_X86_SSE41;
18989     for (uint32_t n = 8; n <= 12; n += 4) {
18990       for (size_t k = 1; k <= 40; k += 9) {
18991         GemmMicrokernelTester()
18992           .mr(4)
18993           .nr(4)
18994           .kr(2)
18995           .sr(4)
18996           .m(4)
18997           .n(n)
18998           .k(k)
18999           .ks(3)
19000           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19001       }
19002     }
19003   }
19004 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)19005   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
19006     TEST_REQUIRES_X86_SSE41;
19007     for (size_t k = 1; k <= 40; k += 9) {
19008       for (uint32_t n = 1; n <= 4; n++) {
19009         for (uint32_t m = 1; m <= 4; m++) {
19010           GemmMicrokernelTester()
19011             .mr(4)
19012             .nr(4)
19013             .kr(2)
19014             .sr(4)
19015             .m(m)
19016             .n(n)
19017             .k(k)
19018             .cm_stride(7)
19019             .iterations(1)
19020             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19021         }
19022       }
19023     }
19024   }
19025 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)19026   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
19027     TEST_REQUIRES_X86_SSE41;
19028     for (size_t k = 1; k <= 40; k += 9) {
19029       GemmMicrokernelTester()
19030         .mr(4)
19031         .nr(4)
19032         .kr(2)
19033         .sr(4)
19034         .m(4)
19035         .n(4)
19036         .k(k)
19037         .ks(3)
19038         .a_offset(163)
19039         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19040     }
19041   }
19042 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)19043   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
19044     TEST_REQUIRES_X86_SSE41;
19045     for (size_t k = 1; k <= 40; k += 9) {
19046       for (uint32_t mz = 0; mz < 4; mz++) {
19047         GemmMicrokernelTester()
19048           .mr(4)
19049           .nr(4)
19050           .kr(2)
19051           .sr(4)
19052           .m(4)
19053           .n(4)
19054           .k(k)
19055           .ks(3)
19056           .a_offset(163)
19057           .zero_index(mz)
19058           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19059       }
19060     }
19061   }
19062 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)19063   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
19064     TEST_REQUIRES_X86_SSE41;
19065     GemmMicrokernelTester()
19066       .mr(4)
19067       .nr(4)
19068       .kr(2)
19069       .sr(4)
19070       .m(4)
19071       .n(4)
19072       .k(8)
19073       .qmin(128)
19074       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19075   }
19076 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)19077   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
19078     TEST_REQUIRES_X86_SSE41;
19079     GemmMicrokernelTester()
19080       .mr(4)
19081       .nr(4)
19082       .kr(2)
19083       .sr(4)
19084       .m(4)
19085       .n(4)
19086       .k(8)
19087       .qmax(128)
19088       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19089   }
19090 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)19091   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
19092     TEST_REQUIRES_X86_SSE41;
19093     GemmMicrokernelTester()
19094       .mr(4)
19095       .nr(4)
19096       .kr(2)
19097       .sr(4)
19098       .m(4)
19099       .n(4)
19100       .k(8)
19101       .cm_stride(7)
19102       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19103   }
19104 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_a_zero_point)19105   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_a_zero_point) {
19106     TEST_REQUIRES_X86_SSE41;
19107     for (size_t k = 1; k <= 40; k += 9) {
19108       GemmMicrokernelTester()
19109         .mr(4)
19110         .nr(4)
19111         .kr(2)
19112         .sr(4)
19113         .m(4)
19114         .n(4)
19115         .k(k)
19116         .a_zero_point(0)
19117         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19118     }
19119   }
19120 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_b_zero_point)19121   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_b_zero_point) {
19122     TEST_REQUIRES_X86_SSE41;
19123     for (size_t k = 1; k <= 40; k += 9) {
19124       GemmMicrokernelTester()
19125         .mr(4)
19126         .nr(4)
19127         .kr(2)
19128         .sr(4)
19129         .m(4)
19130         .n(4)
19131         .k(k)
19132         .b_zero_point(0)
19133         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19134     }
19135   }
19136 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,no_zero_point)19137   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, no_zero_point) {
19138     TEST_REQUIRES_X86_SSE41;
19139     for (size_t k = 1; k <= 40; k += 9) {
19140       GemmMicrokernelTester()
19141         .mr(4)
19142         .nr(4)
19143         .kr(2)
19144         .sr(4)
19145         .m(4)
19146         .n(4)
19147         .k(k)
19148         .a_zero_point(0)
19149         .b_zero_point(0)
19150         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19151     }
19152   }
19153 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19154 
19155 
19156 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)19157   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
19158     TEST_REQUIRES_X86_XOP;
19159     GemmMicrokernelTester()
19160       .mr(1)
19161       .nr(4)
19162       .kr(2)
19163       .sr(4)
19164       .m(1)
19165       .n(4)
19166       .k(8)
19167       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19168   }
19169 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)19170   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
19171     TEST_REQUIRES_X86_XOP;
19172     GemmMicrokernelTester()
19173       .mr(1)
19174       .nr(4)
19175       .kr(2)
19176       .sr(4)
19177       .m(1)
19178       .n(4)
19179       .k(8)
19180       .cn_stride(7)
19181       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19182   }
19183 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)19184   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
19185     TEST_REQUIRES_X86_XOP;
19186     for (uint32_t n = 1; n <= 4; n++) {
19187       for (uint32_t m = 1; m <= 1; m++) {
19188         GemmMicrokernelTester()
19189           .mr(1)
19190           .nr(4)
19191           .kr(2)
19192           .sr(4)
19193           .m(m)
19194           .n(n)
19195           .k(8)
19196           .iterations(1)
19197           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19198       }
19199     }
19200   }
19201 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)19202   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19203     TEST_REQUIRES_X86_XOP;
19204     for (uint32_t m = 1; m <= 1; m++) {
19205       GemmMicrokernelTester()
19206         .mr(1)
19207         .nr(4)
19208         .kr(2)
19209         .sr(4)
19210         .m(m)
19211         .n(4)
19212         .k(8)
19213         .iterations(1)
19214         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19215     }
19216   }
19217 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)19218   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19219     TEST_REQUIRES_X86_XOP;
19220     for (uint32_t n = 1; n <= 4; n++) {
19221       GemmMicrokernelTester()
19222         .mr(1)
19223         .nr(4)
19224         .kr(2)
19225         .sr(4)
19226         .m(1)
19227         .n(n)
19228         .k(8)
19229         .iterations(1)
19230         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19231     }
19232   }
19233 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)19234   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
19235     TEST_REQUIRES_X86_XOP;
19236     for (size_t k = 1; k < 8; k++) {
19237       GemmMicrokernelTester()
19238         .mr(1)
19239         .nr(4)
19240         .kr(2)
19241         .sr(4)
19242         .m(1)
19243         .n(4)
19244         .k(k)
19245         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19246     }
19247   }
19248 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)19249   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
19250     TEST_REQUIRES_X86_XOP;
19251     for (size_t k = 1; k < 8; k++) {
19252       for (uint32_t n = 1; n <= 4; n++) {
19253         for (uint32_t m = 1; m <= 1; m++) {
19254           GemmMicrokernelTester()
19255             .mr(1)
19256             .nr(4)
19257             .kr(2)
19258             .sr(4)
19259             .m(m)
19260             .n(n)
19261             .k(k)
19262             .iterations(1)
19263             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19264         }
19265       }
19266     }
19267   }
19268 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)19269   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
19270     TEST_REQUIRES_X86_XOP;
19271     for (size_t k = 9; k < 16; k++) {
19272       GemmMicrokernelTester()
19273         .mr(1)
19274         .nr(4)
19275         .kr(2)
19276         .sr(4)
19277         .m(1)
19278         .n(4)
19279         .k(k)
19280         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19281     }
19282   }
19283 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)19284   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
19285     TEST_REQUIRES_X86_XOP;
19286     for (size_t k = 9; k < 16; k++) {
19287       for (uint32_t n = 1; n <= 4; n++) {
19288         for (uint32_t m = 1; m <= 1; m++) {
19289           GemmMicrokernelTester()
19290             .mr(1)
19291             .nr(4)
19292             .kr(2)
19293             .sr(4)
19294             .m(m)
19295             .n(n)
19296             .k(k)
19297             .iterations(1)
19298             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19299         }
19300       }
19301     }
19302   }
19303 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)19304   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
19305     TEST_REQUIRES_X86_XOP;
19306     for (size_t k = 16; k <= 80; k += 8) {
19307       GemmMicrokernelTester()
19308         .mr(1)
19309         .nr(4)
19310         .kr(2)
19311         .sr(4)
19312         .m(1)
19313         .n(4)
19314         .k(k)
19315         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19316     }
19317   }
19318 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)19319   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
19320     TEST_REQUIRES_X86_XOP;
19321     for (size_t k = 16; k <= 80; k += 8) {
19322       for (uint32_t n = 1; n <= 4; n++) {
19323         for (uint32_t m = 1; m <= 1; m++) {
19324           GemmMicrokernelTester()
19325             .mr(1)
19326             .nr(4)
19327             .kr(2)
19328             .sr(4)
19329             .m(m)
19330             .n(n)
19331             .k(k)
19332             .iterations(1)
19333             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19334         }
19335       }
19336     }
19337   }
19338 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)19339   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
19340     TEST_REQUIRES_X86_XOP;
19341     for (uint32_t n = 5; n < 8; n++) {
19342       for (size_t k = 1; k <= 40; k += 9) {
19343         GemmMicrokernelTester()
19344           .mr(1)
19345           .nr(4)
19346           .kr(2)
19347           .sr(4)
19348           .m(1)
19349           .n(n)
19350           .k(k)
19351           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19352       }
19353     }
19354   }
19355 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)19356   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19357     TEST_REQUIRES_X86_XOP;
19358     for (uint32_t n = 5; n < 8; n++) {
19359       for (size_t k = 1; k <= 40; k += 9) {
19360         GemmMicrokernelTester()
19361           .mr(1)
19362           .nr(4)
19363           .kr(2)
19364           .sr(4)
19365           .m(1)
19366           .n(n)
19367           .k(k)
19368           .cn_stride(7)
19369           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19370       }
19371     }
19372   }
19373 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)19374   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
19375     TEST_REQUIRES_X86_XOP;
19376     for (uint32_t n = 5; n < 8; n++) {
19377       for (size_t k = 1; k <= 40; k += 9) {
19378         for (uint32_t m = 1; m <= 1; m++) {
19379           GemmMicrokernelTester()
19380             .mr(1)
19381             .nr(4)
19382             .kr(2)
19383             .sr(4)
19384             .m(m)
19385             .n(n)
19386             .k(k)
19387             .iterations(1)
19388             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19389         }
19390       }
19391     }
19392   }
19393 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)19394   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
19395     TEST_REQUIRES_X86_XOP;
19396     for (uint32_t n = 8; n <= 12; n += 4) {
19397       for (size_t k = 1; k <= 40; k += 9) {
19398         GemmMicrokernelTester()
19399           .mr(1)
19400           .nr(4)
19401           .kr(2)
19402           .sr(4)
19403           .m(1)
19404           .n(n)
19405           .k(k)
19406           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19407       }
19408     }
19409   }
19410 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)19411   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19412     TEST_REQUIRES_X86_XOP;
19413     for (uint32_t n = 8; n <= 12; n += 4) {
19414       for (size_t k = 1; k <= 40; k += 9) {
19415         GemmMicrokernelTester()
19416           .mr(1)
19417           .nr(4)
19418           .kr(2)
19419           .sr(4)
19420           .m(1)
19421           .n(n)
19422           .k(k)
19423           .cn_stride(7)
19424           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19425       }
19426     }
19427   }
19428 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)19429   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
19430     TEST_REQUIRES_X86_XOP;
19431     for (uint32_t n = 8; n <= 12; n += 4) {
19432       for (size_t k = 1; k <= 40; k += 9) {
19433         for (uint32_t m = 1; m <= 1; m++) {
19434           GemmMicrokernelTester()
19435             .mr(1)
19436             .nr(4)
19437             .kr(2)
19438             .sr(4)
19439             .m(m)
19440             .n(n)
19441             .k(k)
19442             .iterations(1)
19443             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19444         }
19445       }
19446     }
19447   }
19448 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel)19449   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel) {
19450     TEST_REQUIRES_X86_XOP;
19451     for (size_t k = 1; k <= 40; k += 9) {
19452       GemmMicrokernelTester()
19453         .mr(1)
19454         .nr(4)
19455         .kr(2)
19456         .sr(4)
19457         .m(1)
19458         .n(4)
19459         .k(k)
19460         .ks(3)
19461         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19462     }
19463   }
19464 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel_subtile)19465   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel_subtile) {
19466     TEST_REQUIRES_X86_XOP;
19467     for (size_t k = 1; k <= 40; k += 9) {
19468       for (uint32_t n = 1; n <= 4; n++) {
19469         for (uint32_t m = 1; m <= 1; m++) {
19470           GemmMicrokernelTester()
19471             .mr(1)
19472             .nr(4)
19473             .kr(2)
19474             .sr(4)
19475             .m(m)
19476             .n(n)
19477             .k(k)
19478             .ks(3)
19479             .iterations(1)
19480             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19481         }
19482       }
19483     }
19484   }
19485 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_small_kernel)19486   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
19487     TEST_REQUIRES_X86_XOP;
19488     for (uint32_t n = 5; n < 8; n++) {
19489       for (size_t k = 1; k <= 40; k += 9) {
19490         GemmMicrokernelTester()
19491           .mr(1)
19492           .nr(4)
19493           .kr(2)
19494           .sr(4)
19495           .m(1)
19496           .n(n)
19497           .k(k)
19498           .ks(3)
19499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19500       }
19501     }
19502   }
19503 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_small_kernel)19504   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_small_kernel) {
19505     TEST_REQUIRES_X86_XOP;
19506     for (uint32_t n = 8; n <= 12; n += 4) {
19507       for (size_t k = 1; k <= 40; k += 9) {
19508         GemmMicrokernelTester()
19509           .mr(1)
19510           .nr(4)
19511           .kr(2)
19512           .sr(4)
19513           .m(1)
19514           .n(n)
19515           .k(k)
19516           .ks(3)
19517           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19518       }
19519     }
19520   }
19521 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)19522   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
19523     TEST_REQUIRES_X86_XOP;
19524     for (size_t k = 1; k <= 40; k += 9) {
19525       for (uint32_t n = 1; n <= 4; n++) {
19526         for (uint32_t m = 1; m <= 1; m++) {
19527           GemmMicrokernelTester()
19528             .mr(1)
19529             .nr(4)
19530             .kr(2)
19531             .sr(4)
19532             .m(m)
19533             .n(n)
19534             .k(k)
19535             .cm_stride(7)
19536             .iterations(1)
19537             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19538         }
19539       }
19540     }
19541   }
19542 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,a_offset)19543   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, a_offset) {
19544     TEST_REQUIRES_X86_XOP;
19545     for (size_t k = 1; k <= 40; k += 9) {
19546       GemmMicrokernelTester()
19547         .mr(1)
19548         .nr(4)
19549         .kr(2)
19550         .sr(4)
19551         .m(1)
19552         .n(4)
19553         .k(k)
19554         .ks(3)
19555         .a_offset(43)
19556         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19557     }
19558   }
19559 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,zero)19560   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, zero) {
19561     TEST_REQUIRES_X86_XOP;
19562     for (size_t k = 1; k <= 40; k += 9) {
19563       for (uint32_t mz = 0; mz < 1; mz++) {
19564         GemmMicrokernelTester()
19565           .mr(1)
19566           .nr(4)
19567           .kr(2)
19568           .sr(4)
19569           .m(1)
19570           .n(4)
19571           .k(k)
19572           .ks(3)
19573           .a_offset(43)
19574           .zero_index(mz)
19575           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19576       }
19577     }
19578   }
19579 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)19580   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
19581     TEST_REQUIRES_X86_XOP;
19582     GemmMicrokernelTester()
19583       .mr(1)
19584       .nr(4)
19585       .kr(2)
19586       .sr(4)
19587       .m(1)
19588       .n(4)
19589       .k(8)
19590       .qmin(128)
19591       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19592   }
19593 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)19594   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
19595     TEST_REQUIRES_X86_XOP;
19596     GemmMicrokernelTester()
19597       .mr(1)
19598       .nr(4)
19599       .kr(2)
19600       .sr(4)
19601       .m(1)
19602       .n(4)
19603       .k(8)
19604       .qmax(128)
19605       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19606   }
19607 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)19608   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
19609     TEST_REQUIRES_X86_XOP;
19610     GemmMicrokernelTester()
19611       .mr(1)
19612       .nr(4)
19613       .kr(2)
19614       .sr(4)
19615       .m(1)
19616       .n(4)
19617       .k(8)
19618       .cm_stride(7)
19619       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19620   }
19621 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_a_zero_point)19622   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_a_zero_point) {
19623     TEST_REQUIRES_X86_XOP;
19624     for (size_t k = 1; k <= 40; k += 9) {
19625       GemmMicrokernelTester()
19626         .mr(1)
19627         .nr(4)
19628         .kr(2)
19629         .sr(4)
19630         .m(1)
19631         .n(4)
19632         .k(k)
19633         .a_zero_point(0)
19634         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19635     }
19636   }
19637 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_b_zero_point)19638   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_b_zero_point) {
19639     TEST_REQUIRES_X86_XOP;
19640     for (size_t k = 1; k <= 40; k += 9) {
19641       GemmMicrokernelTester()
19642         .mr(1)
19643         .nr(4)
19644         .kr(2)
19645         .sr(4)
19646         .m(1)
19647         .n(4)
19648         .k(k)
19649         .b_zero_point(0)
19650         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19651     }
19652   }
19653 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,no_zero_point)19654   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, no_zero_point) {
19655     TEST_REQUIRES_X86_XOP;
19656     for (size_t k = 1; k <= 40; k += 9) {
19657       GemmMicrokernelTester()
19658         .mr(1)
19659         .nr(4)
19660         .kr(2)
19661         .sr(4)
19662         .m(1)
19663         .n(4)
19664         .k(k)
19665         .a_zero_point(0)
19666         .b_zero_point(0)
19667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19668     }
19669   }
19670 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19671 
19672 
19673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)19674   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
19675     TEST_REQUIRES_X86_XOP;
19676     GemmMicrokernelTester()
19677       .mr(2)
19678       .nr(4)
19679       .kr(2)
19680       .sr(4)
19681       .m(2)
19682       .n(4)
19683       .k(8)
19684       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19685   }
19686 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)19687   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
19688     TEST_REQUIRES_X86_XOP;
19689     GemmMicrokernelTester()
19690       .mr(2)
19691       .nr(4)
19692       .kr(2)
19693       .sr(4)
19694       .m(2)
19695       .n(4)
19696       .k(8)
19697       .cn_stride(7)
19698       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19699   }
19700 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)19701   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
19702     TEST_REQUIRES_X86_XOP;
19703     for (uint32_t n = 1; n <= 4; n++) {
19704       for (uint32_t m = 1; m <= 2; m++) {
19705         GemmMicrokernelTester()
19706           .mr(2)
19707           .nr(4)
19708           .kr(2)
19709           .sr(4)
19710           .m(m)
19711           .n(n)
19712           .k(8)
19713           .iterations(1)
19714           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19715       }
19716     }
19717   }
19718 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)19719   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19720     TEST_REQUIRES_X86_XOP;
19721     for (uint32_t m = 1; m <= 2; m++) {
19722       GemmMicrokernelTester()
19723         .mr(2)
19724         .nr(4)
19725         .kr(2)
19726         .sr(4)
19727         .m(m)
19728         .n(4)
19729         .k(8)
19730         .iterations(1)
19731         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19732     }
19733   }
19734 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)19735   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19736     TEST_REQUIRES_X86_XOP;
19737     for (uint32_t n = 1; n <= 4; n++) {
19738       GemmMicrokernelTester()
19739         .mr(2)
19740         .nr(4)
19741         .kr(2)
19742         .sr(4)
19743         .m(2)
19744         .n(n)
19745         .k(8)
19746         .iterations(1)
19747         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19748     }
19749   }
19750 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)19751   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
19752     TEST_REQUIRES_X86_XOP;
19753     for (size_t k = 1; k < 8; k++) {
19754       GemmMicrokernelTester()
19755         .mr(2)
19756         .nr(4)
19757         .kr(2)
19758         .sr(4)
19759         .m(2)
19760         .n(4)
19761         .k(k)
19762         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19763     }
19764   }
19765 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)19766   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
19767     TEST_REQUIRES_X86_XOP;
19768     for (size_t k = 1; k < 8; k++) {
19769       for (uint32_t n = 1; n <= 4; n++) {
19770         for (uint32_t m = 1; m <= 2; m++) {
19771           GemmMicrokernelTester()
19772             .mr(2)
19773             .nr(4)
19774             .kr(2)
19775             .sr(4)
19776             .m(m)
19777             .n(n)
19778             .k(k)
19779             .iterations(1)
19780             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19781         }
19782       }
19783     }
19784   }
19785 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)19786   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
19787     TEST_REQUIRES_X86_XOP;
19788     for (size_t k = 9; k < 16; k++) {
19789       GemmMicrokernelTester()
19790         .mr(2)
19791         .nr(4)
19792         .kr(2)
19793         .sr(4)
19794         .m(2)
19795         .n(4)
19796         .k(k)
19797         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19798     }
19799   }
19800 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)19801   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
19802     TEST_REQUIRES_X86_XOP;
19803     for (size_t k = 9; k < 16; k++) {
19804       for (uint32_t n = 1; n <= 4; n++) {
19805         for (uint32_t m = 1; m <= 2; m++) {
19806           GemmMicrokernelTester()
19807             .mr(2)
19808             .nr(4)
19809             .kr(2)
19810             .sr(4)
19811             .m(m)
19812             .n(n)
19813             .k(k)
19814             .iterations(1)
19815             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19816         }
19817       }
19818     }
19819   }
19820 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)19821   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
19822     TEST_REQUIRES_X86_XOP;
19823     for (size_t k = 16; k <= 80; k += 8) {
19824       GemmMicrokernelTester()
19825         .mr(2)
19826         .nr(4)
19827         .kr(2)
19828         .sr(4)
19829         .m(2)
19830         .n(4)
19831         .k(k)
19832         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19833     }
19834   }
19835 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)19836   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
19837     TEST_REQUIRES_X86_XOP;
19838     for (size_t k = 16; k <= 80; k += 8) {
19839       for (uint32_t n = 1; n <= 4; n++) {
19840         for (uint32_t m = 1; m <= 2; m++) {
19841           GemmMicrokernelTester()
19842             .mr(2)
19843             .nr(4)
19844             .kr(2)
19845             .sr(4)
19846             .m(m)
19847             .n(n)
19848             .k(k)
19849             .iterations(1)
19850             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19851         }
19852       }
19853     }
19854   }
19855 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)19856   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
19857     TEST_REQUIRES_X86_XOP;
19858     for (uint32_t n = 5; n < 8; n++) {
19859       for (size_t k = 1; k <= 40; k += 9) {
19860         GemmMicrokernelTester()
19861           .mr(2)
19862           .nr(4)
19863           .kr(2)
19864           .sr(4)
19865           .m(2)
19866           .n(n)
19867           .k(k)
19868           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19869       }
19870     }
19871   }
19872 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)19873   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19874     TEST_REQUIRES_X86_XOP;
19875     for (uint32_t n = 5; n < 8; n++) {
19876       for (size_t k = 1; k <= 40; k += 9) {
19877         GemmMicrokernelTester()
19878           .mr(2)
19879           .nr(4)
19880           .kr(2)
19881           .sr(4)
19882           .m(2)
19883           .n(n)
19884           .k(k)
19885           .cn_stride(7)
19886           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19887       }
19888     }
19889   }
19890 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)19891   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
19892     TEST_REQUIRES_X86_XOP;
19893     for (uint32_t n = 5; n < 8; n++) {
19894       for (size_t k = 1; k <= 40; k += 9) {
19895         for (uint32_t m = 1; m <= 2; m++) {
19896           GemmMicrokernelTester()
19897             .mr(2)
19898             .nr(4)
19899             .kr(2)
19900             .sr(4)
19901             .m(m)
19902             .n(n)
19903             .k(k)
19904             .iterations(1)
19905             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19906         }
19907       }
19908     }
19909   }
19910 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)19911   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
19912     TEST_REQUIRES_X86_XOP;
19913     for (uint32_t n = 8; n <= 12; n += 4) {
19914       for (size_t k = 1; k <= 40; k += 9) {
19915         GemmMicrokernelTester()
19916           .mr(2)
19917           .nr(4)
19918           .kr(2)
19919           .sr(4)
19920           .m(2)
19921           .n(n)
19922           .k(k)
19923           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19924       }
19925     }
19926   }
19927 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)19928   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19929     TEST_REQUIRES_X86_XOP;
19930     for (uint32_t n = 8; n <= 12; n += 4) {
19931       for (size_t k = 1; k <= 40; k += 9) {
19932         GemmMicrokernelTester()
19933           .mr(2)
19934           .nr(4)
19935           .kr(2)
19936           .sr(4)
19937           .m(2)
19938           .n(n)
19939           .k(k)
19940           .cn_stride(7)
19941           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19942       }
19943     }
19944   }
19945 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)19946   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
19947     TEST_REQUIRES_X86_XOP;
19948     for (uint32_t n = 8; n <= 12; n += 4) {
19949       for (size_t k = 1; k <= 40; k += 9) {
19950         for (uint32_t m = 1; m <= 2; m++) {
19951           GemmMicrokernelTester()
19952             .mr(2)
19953             .nr(4)
19954             .kr(2)
19955             .sr(4)
19956             .m(m)
19957             .n(n)
19958             .k(k)
19959             .iterations(1)
19960             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19961         }
19962       }
19963     }
19964   }
19965 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel)19966   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel) {
19967     TEST_REQUIRES_X86_XOP;
19968     for (size_t k = 1; k <= 40; k += 9) {
19969       GemmMicrokernelTester()
19970         .mr(2)
19971         .nr(4)
19972         .kr(2)
19973         .sr(4)
19974         .m(2)
19975         .n(4)
19976         .k(k)
19977         .ks(3)
19978         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19979     }
19980   }
19981 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel_subtile)19982   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel_subtile) {
19983     TEST_REQUIRES_X86_XOP;
19984     for (size_t k = 1; k <= 40; k += 9) {
19985       for (uint32_t n = 1; n <= 4; n++) {
19986         for (uint32_t m = 1; m <= 2; m++) {
19987           GemmMicrokernelTester()
19988             .mr(2)
19989             .nr(4)
19990             .kr(2)
19991             .sr(4)
19992             .m(m)
19993             .n(n)
19994             .k(k)
19995             .ks(3)
19996             .iterations(1)
19997             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19998         }
19999       }
20000     }
20001   }
20002 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_small_kernel)20003   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
20004     TEST_REQUIRES_X86_XOP;
20005     for (uint32_t n = 5; n < 8; n++) {
20006       for (size_t k = 1; k <= 40; k += 9) {
20007         GemmMicrokernelTester()
20008           .mr(2)
20009           .nr(4)
20010           .kr(2)
20011           .sr(4)
20012           .m(2)
20013           .n(n)
20014           .k(k)
20015           .ks(3)
20016           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20017       }
20018     }
20019   }
20020 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_small_kernel)20021   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_small_kernel) {
20022     TEST_REQUIRES_X86_XOP;
20023     for (uint32_t n = 8; n <= 12; n += 4) {
20024       for (size_t k = 1; k <= 40; k += 9) {
20025         GemmMicrokernelTester()
20026           .mr(2)
20027           .nr(4)
20028           .kr(2)
20029           .sr(4)
20030           .m(2)
20031           .n(n)
20032           .k(k)
20033           .ks(3)
20034           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20035       }
20036     }
20037   }
20038 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)20039   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
20040     TEST_REQUIRES_X86_XOP;
20041     for (size_t k = 1; k <= 40; k += 9) {
20042       for (uint32_t n = 1; n <= 4; n++) {
20043         for (uint32_t m = 1; m <= 2; m++) {
20044           GemmMicrokernelTester()
20045             .mr(2)
20046             .nr(4)
20047             .kr(2)
20048             .sr(4)
20049             .m(m)
20050             .n(n)
20051             .k(k)
20052             .cm_stride(7)
20053             .iterations(1)
20054             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20055         }
20056       }
20057     }
20058   }
20059 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,a_offset)20060   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, a_offset) {
20061     TEST_REQUIRES_X86_XOP;
20062     for (size_t k = 1; k <= 40; k += 9) {
20063       GemmMicrokernelTester()
20064         .mr(2)
20065         .nr(4)
20066         .kr(2)
20067         .sr(4)
20068         .m(2)
20069         .n(4)
20070         .k(k)
20071         .ks(3)
20072         .a_offset(83)
20073         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20074     }
20075   }
20076 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,zero)20077   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, zero) {
20078     TEST_REQUIRES_X86_XOP;
20079     for (size_t k = 1; k <= 40; k += 9) {
20080       for (uint32_t mz = 0; mz < 2; mz++) {
20081         GemmMicrokernelTester()
20082           .mr(2)
20083           .nr(4)
20084           .kr(2)
20085           .sr(4)
20086           .m(2)
20087           .n(4)
20088           .k(k)
20089           .ks(3)
20090           .a_offset(83)
20091           .zero_index(mz)
20092           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20093       }
20094     }
20095   }
20096 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)20097   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
20098     TEST_REQUIRES_X86_XOP;
20099     GemmMicrokernelTester()
20100       .mr(2)
20101       .nr(4)
20102       .kr(2)
20103       .sr(4)
20104       .m(2)
20105       .n(4)
20106       .k(8)
20107       .qmin(128)
20108       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20109   }
20110 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)20111   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
20112     TEST_REQUIRES_X86_XOP;
20113     GemmMicrokernelTester()
20114       .mr(2)
20115       .nr(4)
20116       .kr(2)
20117       .sr(4)
20118       .m(2)
20119       .n(4)
20120       .k(8)
20121       .qmax(128)
20122       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20123   }
20124 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)20125   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
20126     TEST_REQUIRES_X86_XOP;
20127     GemmMicrokernelTester()
20128       .mr(2)
20129       .nr(4)
20130       .kr(2)
20131       .sr(4)
20132       .m(2)
20133       .n(4)
20134       .k(8)
20135       .cm_stride(7)
20136       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20137   }
20138 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_a_zero_point)20139   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_a_zero_point) {
20140     TEST_REQUIRES_X86_XOP;
20141     for (size_t k = 1; k <= 40; k += 9) {
20142       GemmMicrokernelTester()
20143         .mr(2)
20144         .nr(4)
20145         .kr(2)
20146         .sr(4)
20147         .m(2)
20148         .n(4)
20149         .k(k)
20150         .a_zero_point(0)
20151         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20152     }
20153   }
20154 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_b_zero_point)20155   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_b_zero_point) {
20156     TEST_REQUIRES_X86_XOP;
20157     for (size_t k = 1; k <= 40; k += 9) {
20158       GemmMicrokernelTester()
20159         .mr(2)
20160         .nr(4)
20161         .kr(2)
20162         .sr(4)
20163         .m(2)
20164         .n(4)
20165         .k(k)
20166         .b_zero_point(0)
20167         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20168     }
20169   }
20170 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,no_zero_point)20171   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, no_zero_point) {
20172     TEST_REQUIRES_X86_XOP;
20173     for (size_t k = 1; k <= 40; k += 9) {
20174       GemmMicrokernelTester()
20175         .mr(2)
20176         .nr(4)
20177         .kr(2)
20178         .sr(4)
20179         .m(2)
20180         .n(4)
20181         .k(k)
20182         .a_zero_point(0)
20183         .b_zero_point(0)
20184         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20185     }
20186   }
20187 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20188 
20189 
20190 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)20191   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
20192     TEST_REQUIRES_X86_AVX;
20193     GemmMicrokernelTester()
20194       .mr(3)
20195       .nr(4)
20196       .kr(2)
20197       .sr(4)
20198       .m(3)
20199       .n(4)
20200       .k(8)
20201       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20202   }
20203 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)20204   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
20205     TEST_REQUIRES_X86_AVX;
20206     GemmMicrokernelTester()
20207       .mr(3)
20208       .nr(4)
20209       .kr(2)
20210       .sr(4)
20211       .m(3)
20212       .n(4)
20213       .k(8)
20214       .cn_stride(7)
20215       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20216   }
20217 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)20218   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
20219     TEST_REQUIRES_X86_AVX;
20220     for (uint32_t n = 1; n <= 4; n++) {
20221       for (uint32_t m = 1; m <= 3; m++) {
20222         GemmMicrokernelTester()
20223           .mr(3)
20224           .nr(4)
20225           .kr(2)
20226           .sr(4)
20227           .m(m)
20228           .n(n)
20229           .k(8)
20230           .iterations(1)
20231           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20232       }
20233     }
20234   }
20235 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)20236   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
20237     TEST_REQUIRES_X86_AVX;
20238     for (uint32_t m = 1; m <= 3; m++) {
20239       GemmMicrokernelTester()
20240         .mr(3)
20241         .nr(4)
20242         .kr(2)
20243         .sr(4)
20244         .m(m)
20245         .n(4)
20246         .k(8)
20247         .iterations(1)
20248         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20249     }
20250   }
20251 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)20252   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
20253     TEST_REQUIRES_X86_AVX;
20254     for (uint32_t n = 1; n <= 4; n++) {
20255       GemmMicrokernelTester()
20256         .mr(3)
20257         .nr(4)
20258         .kr(2)
20259         .sr(4)
20260         .m(3)
20261         .n(n)
20262         .k(8)
20263         .iterations(1)
20264         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20265     }
20266   }
20267 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)20268   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
20269     TEST_REQUIRES_X86_AVX;
20270     for (size_t k = 1; k < 8; k++) {
20271       GemmMicrokernelTester()
20272         .mr(3)
20273         .nr(4)
20274         .kr(2)
20275         .sr(4)
20276         .m(3)
20277         .n(4)
20278         .k(k)
20279         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20280     }
20281   }
20282 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)20283   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
20284     TEST_REQUIRES_X86_AVX;
20285     for (size_t k = 1; k < 8; k++) {
20286       for (uint32_t n = 1; n <= 4; n++) {
20287         for (uint32_t m = 1; m <= 3; m++) {
20288           GemmMicrokernelTester()
20289             .mr(3)
20290             .nr(4)
20291             .kr(2)
20292             .sr(4)
20293             .m(m)
20294             .n(n)
20295             .k(k)
20296             .iterations(1)
20297             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20298         }
20299       }
20300     }
20301   }
20302 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)20303   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
20304     TEST_REQUIRES_X86_AVX;
20305     for (size_t k = 9; k < 16; k++) {
20306       GemmMicrokernelTester()
20307         .mr(3)
20308         .nr(4)
20309         .kr(2)
20310         .sr(4)
20311         .m(3)
20312         .n(4)
20313         .k(k)
20314         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20315     }
20316   }
20317 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)20318   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
20319     TEST_REQUIRES_X86_AVX;
20320     for (size_t k = 9; k < 16; k++) {
20321       for (uint32_t n = 1; n <= 4; n++) {
20322         for (uint32_t m = 1; m <= 3; m++) {
20323           GemmMicrokernelTester()
20324             .mr(3)
20325             .nr(4)
20326             .kr(2)
20327             .sr(4)
20328             .m(m)
20329             .n(n)
20330             .k(k)
20331             .iterations(1)
20332             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20333         }
20334       }
20335     }
20336   }
20337 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)20338   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
20339     TEST_REQUIRES_X86_AVX;
20340     for (size_t k = 16; k <= 80; k += 8) {
20341       GemmMicrokernelTester()
20342         .mr(3)
20343         .nr(4)
20344         .kr(2)
20345         .sr(4)
20346         .m(3)
20347         .n(4)
20348         .k(k)
20349         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20350     }
20351   }
20352 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)20353   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
20354     TEST_REQUIRES_X86_AVX;
20355     for (size_t k = 16; k <= 80; k += 8) {
20356       for (uint32_t n = 1; n <= 4; n++) {
20357         for (uint32_t m = 1; m <= 3; m++) {
20358           GemmMicrokernelTester()
20359             .mr(3)
20360             .nr(4)
20361             .kr(2)
20362             .sr(4)
20363             .m(m)
20364             .n(n)
20365             .k(k)
20366             .iterations(1)
20367             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20368         }
20369       }
20370     }
20371   }
20372 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)20373   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
20374     TEST_REQUIRES_X86_AVX;
20375     for (uint32_t n = 5; n < 8; n++) {
20376       for (size_t k = 1; k <= 40; k += 9) {
20377         GemmMicrokernelTester()
20378           .mr(3)
20379           .nr(4)
20380           .kr(2)
20381           .sr(4)
20382           .m(3)
20383           .n(n)
20384           .k(k)
20385           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20386       }
20387     }
20388   }
20389 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)20390   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
20391     TEST_REQUIRES_X86_AVX;
20392     for (uint32_t n = 5; n < 8; n++) {
20393       for (size_t k = 1; k <= 40; k += 9) {
20394         GemmMicrokernelTester()
20395           .mr(3)
20396           .nr(4)
20397           .kr(2)
20398           .sr(4)
20399           .m(3)
20400           .n(n)
20401           .k(k)
20402           .cn_stride(7)
20403           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20404       }
20405     }
20406   }
20407 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)20408   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
20409     TEST_REQUIRES_X86_AVX;
20410     for (uint32_t n = 5; n < 8; n++) {
20411       for (size_t k = 1; k <= 40; k += 9) {
20412         for (uint32_t m = 1; m <= 3; m++) {
20413           GemmMicrokernelTester()
20414             .mr(3)
20415             .nr(4)
20416             .kr(2)
20417             .sr(4)
20418             .m(m)
20419             .n(n)
20420             .k(k)
20421             .iterations(1)
20422             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20423         }
20424       }
20425     }
20426   }
20427 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)20428   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
20429     TEST_REQUIRES_X86_AVX;
20430     for (uint32_t n = 8; n <= 12; n += 4) {
20431       for (size_t k = 1; k <= 40; k += 9) {
20432         GemmMicrokernelTester()
20433           .mr(3)
20434           .nr(4)
20435           .kr(2)
20436           .sr(4)
20437           .m(3)
20438           .n(n)
20439           .k(k)
20440           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20441       }
20442     }
20443   }
20444 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)20445   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
20446     TEST_REQUIRES_X86_AVX;
20447     for (uint32_t n = 8; n <= 12; n += 4) {
20448       for (size_t k = 1; k <= 40; k += 9) {
20449         GemmMicrokernelTester()
20450           .mr(3)
20451           .nr(4)
20452           .kr(2)
20453           .sr(4)
20454           .m(3)
20455           .n(n)
20456           .k(k)
20457           .cn_stride(7)
20458           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20459       }
20460     }
20461   }
20462 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)20463   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
20464     TEST_REQUIRES_X86_AVX;
20465     for (uint32_t n = 8; n <= 12; n += 4) {
20466       for (size_t k = 1; k <= 40; k += 9) {
20467         for (uint32_t m = 1; m <= 3; m++) {
20468           GemmMicrokernelTester()
20469             .mr(3)
20470             .nr(4)
20471             .kr(2)
20472             .sr(4)
20473             .m(m)
20474             .n(n)
20475             .k(k)
20476             .iterations(1)
20477             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20478         }
20479       }
20480     }
20481   }
20482 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel)20483   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel) {
20484     TEST_REQUIRES_X86_AVX;
20485     for (size_t k = 1; k <= 40; k += 9) {
20486       GemmMicrokernelTester()
20487         .mr(3)
20488         .nr(4)
20489         .kr(2)
20490         .sr(4)
20491         .m(3)
20492         .n(4)
20493         .k(k)
20494         .ks(3)
20495         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20496     }
20497   }
20498 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel_subtile)20499   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel_subtile) {
20500     TEST_REQUIRES_X86_AVX;
20501     for (size_t k = 1; k <= 40; k += 9) {
20502       for (uint32_t n = 1; n <= 4; n++) {
20503         for (uint32_t m = 1; m <= 3; m++) {
20504           GemmMicrokernelTester()
20505             .mr(3)
20506             .nr(4)
20507             .kr(2)
20508             .sr(4)
20509             .m(m)
20510             .n(n)
20511             .k(k)
20512             .ks(3)
20513             .iterations(1)
20514             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20515         }
20516       }
20517     }
20518   }
20519 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_small_kernel)20520   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
20521     TEST_REQUIRES_X86_AVX;
20522     for (uint32_t n = 5; n < 8; n++) {
20523       for (size_t k = 1; k <= 40; k += 9) {
20524         GemmMicrokernelTester()
20525           .mr(3)
20526           .nr(4)
20527           .kr(2)
20528           .sr(4)
20529           .m(3)
20530           .n(n)
20531           .k(k)
20532           .ks(3)
20533           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20534       }
20535     }
20536   }
20537 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_small_kernel)20538   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_small_kernel) {
20539     TEST_REQUIRES_X86_AVX;
20540     for (uint32_t n = 8; n <= 12; n += 4) {
20541       for (size_t k = 1; k <= 40; k += 9) {
20542         GemmMicrokernelTester()
20543           .mr(3)
20544           .nr(4)
20545           .kr(2)
20546           .sr(4)
20547           .m(3)
20548           .n(n)
20549           .k(k)
20550           .ks(3)
20551           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20552       }
20553     }
20554   }
20555 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)20556   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
20557     TEST_REQUIRES_X86_AVX;
20558     for (size_t k = 1; k <= 40; k += 9) {
20559       for (uint32_t n = 1; n <= 4; n++) {
20560         for (uint32_t m = 1; m <= 3; m++) {
20561           GemmMicrokernelTester()
20562             .mr(3)
20563             .nr(4)
20564             .kr(2)
20565             .sr(4)
20566             .m(m)
20567             .n(n)
20568             .k(k)
20569             .cm_stride(7)
20570             .iterations(1)
20571             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20572         }
20573       }
20574     }
20575   }
20576 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,a_offset)20577   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, a_offset) {
20578     TEST_REQUIRES_X86_AVX;
20579     for (size_t k = 1; k <= 40; k += 9) {
20580       GemmMicrokernelTester()
20581         .mr(3)
20582         .nr(4)
20583         .kr(2)
20584         .sr(4)
20585         .m(3)
20586         .n(4)
20587         .k(k)
20588         .ks(3)
20589         .a_offset(127)
20590         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20591     }
20592   }
20593 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,zero)20594   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, zero) {
20595     TEST_REQUIRES_X86_AVX;
20596     for (size_t k = 1; k <= 40; k += 9) {
20597       for (uint32_t mz = 0; mz < 3; mz++) {
20598         GemmMicrokernelTester()
20599           .mr(3)
20600           .nr(4)
20601           .kr(2)
20602           .sr(4)
20603           .m(3)
20604           .n(4)
20605           .k(k)
20606           .ks(3)
20607           .a_offset(127)
20608           .zero_index(mz)
20609           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20610       }
20611     }
20612   }
20613 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)20614   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
20615     TEST_REQUIRES_X86_AVX;
20616     GemmMicrokernelTester()
20617       .mr(3)
20618       .nr(4)
20619       .kr(2)
20620       .sr(4)
20621       .m(3)
20622       .n(4)
20623       .k(8)
20624       .qmin(128)
20625       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20626   }
20627 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)20628   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
20629     TEST_REQUIRES_X86_AVX;
20630     GemmMicrokernelTester()
20631       .mr(3)
20632       .nr(4)
20633       .kr(2)
20634       .sr(4)
20635       .m(3)
20636       .n(4)
20637       .k(8)
20638       .qmax(128)
20639       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20640   }
20641 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)20642   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
20643     TEST_REQUIRES_X86_AVX;
20644     GemmMicrokernelTester()
20645       .mr(3)
20646       .nr(4)
20647       .kr(2)
20648       .sr(4)
20649       .m(3)
20650       .n(4)
20651       .k(8)
20652       .cm_stride(7)
20653       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20654   }
20655 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_a_zero_point)20656   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_a_zero_point) {
20657     TEST_REQUIRES_X86_AVX;
20658     for (size_t k = 1; k <= 40; k += 9) {
20659       GemmMicrokernelTester()
20660         .mr(3)
20661         .nr(4)
20662         .kr(2)
20663         .sr(4)
20664         .m(3)
20665         .n(4)
20666         .k(k)
20667         .a_zero_point(0)
20668         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20669     }
20670   }
20671 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_b_zero_point)20672   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_b_zero_point) {
20673     TEST_REQUIRES_X86_AVX;
20674     for (size_t k = 1; k <= 40; k += 9) {
20675       GemmMicrokernelTester()
20676         .mr(3)
20677         .nr(4)
20678         .kr(2)
20679         .sr(4)
20680         .m(3)
20681         .n(4)
20682         .k(k)
20683         .b_zero_point(0)
20684         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20685     }
20686   }
20687 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,no_zero_point)20688   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, no_zero_point) {
20689     TEST_REQUIRES_X86_AVX;
20690     for (size_t k = 1; k <= 40; k += 9) {
20691       GemmMicrokernelTester()
20692         .mr(3)
20693         .nr(4)
20694         .kr(2)
20695         .sr(4)
20696         .m(3)
20697         .n(4)
20698         .k(k)
20699         .a_zero_point(0)
20700         .b_zero_point(0)
20701         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20702     }
20703   }
20704 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20705 
20706 
20707 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)20708   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
20709     TEST_REQUIRES_X86_AVX;
20710     GemmMicrokernelTester()
20711       .mr(4)
20712       .nr(4)
20713       .kr(2)
20714       .sr(4)
20715       .m(4)
20716       .n(4)
20717       .k(8)
20718       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20719   }
20720 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)20721   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
20722     TEST_REQUIRES_X86_AVX;
20723     GemmMicrokernelTester()
20724       .mr(4)
20725       .nr(4)
20726       .kr(2)
20727       .sr(4)
20728       .m(4)
20729       .n(4)
20730       .k(8)
20731       .cn_stride(7)
20732       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20733   }
20734 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)20735   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
20736     TEST_REQUIRES_X86_AVX;
20737     for (uint32_t n = 1; n <= 4; n++) {
20738       for (uint32_t m = 1; m <= 4; m++) {
20739         GemmMicrokernelTester()
20740           .mr(4)
20741           .nr(4)
20742           .kr(2)
20743           .sr(4)
20744           .m(m)
20745           .n(n)
20746           .k(8)
20747           .iterations(1)
20748           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20749       }
20750     }
20751   }
20752 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)20753   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
20754     TEST_REQUIRES_X86_AVX;
20755     for (uint32_t m = 1; m <= 4; m++) {
20756       GemmMicrokernelTester()
20757         .mr(4)
20758         .nr(4)
20759         .kr(2)
20760         .sr(4)
20761         .m(m)
20762         .n(4)
20763         .k(8)
20764         .iterations(1)
20765         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20766     }
20767   }
20768 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)20769   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
20770     TEST_REQUIRES_X86_AVX;
20771     for (uint32_t n = 1; n <= 4; n++) {
20772       GemmMicrokernelTester()
20773         .mr(4)
20774         .nr(4)
20775         .kr(2)
20776         .sr(4)
20777         .m(4)
20778         .n(n)
20779         .k(8)
20780         .iterations(1)
20781         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20782     }
20783   }
20784 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)20785   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
20786     TEST_REQUIRES_X86_AVX;
20787     for (size_t k = 1; k < 8; k++) {
20788       GemmMicrokernelTester()
20789         .mr(4)
20790         .nr(4)
20791         .kr(2)
20792         .sr(4)
20793         .m(4)
20794         .n(4)
20795         .k(k)
20796         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20797     }
20798   }
20799 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)20800   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
20801     TEST_REQUIRES_X86_AVX;
20802     for (size_t k = 1; k < 8; k++) {
20803       for (uint32_t n = 1; n <= 4; n++) {
20804         for (uint32_t m = 1; m <= 4; m++) {
20805           GemmMicrokernelTester()
20806             .mr(4)
20807             .nr(4)
20808             .kr(2)
20809             .sr(4)
20810             .m(m)
20811             .n(n)
20812             .k(k)
20813             .iterations(1)
20814             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20815         }
20816       }
20817     }
20818   }
20819 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)20820   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
20821     TEST_REQUIRES_X86_AVX;
20822     for (size_t k = 9; k < 16; k++) {
20823       GemmMicrokernelTester()
20824         .mr(4)
20825         .nr(4)
20826         .kr(2)
20827         .sr(4)
20828         .m(4)
20829         .n(4)
20830         .k(k)
20831         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20832     }
20833   }
20834 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)20835   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
20836     TEST_REQUIRES_X86_AVX;
20837     for (size_t k = 9; k < 16; k++) {
20838       for (uint32_t n = 1; n <= 4; n++) {
20839         for (uint32_t m = 1; m <= 4; m++) {
20840           GemmMicrokernelTester()
20841             .mr(4)
20842             .nr(4)
20843             .kr(2)
20844             .sr(4)
20845             .m(m)
20846             .n(n)
20847             .k(k)
20848             .iterations(1)
20849             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20850         }
20851       }
20852     }
20853   }
20854 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)20855   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
20856     TEST_REQUIRES_X86_AVX;
20857     for (size_t k = 16; k <= 80; k += 8) {
20858       GemmMicrokernelTester()
20859         .mr(4)
20860         .nr(4)
20861         .kr(2)
20862         .sr(4)
20863         .m(4)
20864         .n(4)
20865         .k(k)
20866         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20867     }
20868   }
20869 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)20870   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
20871     TEST_REQUIRES_X86_AVX;
20872     for (size_t k = 16; k <= 80; k += 8) {
20873       for (uint32_t n = 1; n <= 4; n++) {
20874         for (uint32_t m = 1; m <= 4; m++) {
20875           GemmMicrokernelTester()
20876             .mr(4)
20877             .nr(4)
20878             .kr(2)
20879             .sr(4)
20880             .m(m)
20881             .n(n)
20882             .k(k)
20883             .iterations(1)
20884             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20885         }
20886       }
20887     }
20888   }
20889 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)20890   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
20891     TEST_REQUIRES_X86_AVX;
20892     for (uint32_t n = 5; n < 8; n++) {
20893       for (size_t k = 1; k <= 40; k += 9) {
20894         GemmMicrokernelTester()
20895           .mr(4)
20896           .nr(4)
20897           .kr(2)
20898           .sr(4)
20899           .m(4)
20900           .n(n)
20901           .k(k)
20902           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20903       }
20904     }
20905   }
20906 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)20907   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
20908     TEST_REQUIRES_X86_AVX;
20909     for (uint32_t n = 5; n < 8; n++) {
20910       for (size_t k = 1; k <= 40; k += 9) {
20911         GemmMicrokernelTester()
20912           .mr(4)
20913           .nr(4)
20914           .kr(2)
20915           .sr(4)
20916           .m(4)
20917           .n(n)
20918           .k(k)
20919           .cn_stride(7)
20920           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20921       }
20922     }
20923   }
20924 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)20925   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
20926     TEST_REQUIRES_X86_AVX;
20927     for (uint32_t n = 5; n < 8; n++) {
20928       for (size_t k = 1; k <= 40; k += 9) {
20929         for (uint32_t m = 1; m <= 4; m++) {
20930           GemmMicrokernelTester()
20931             .mr(4)
20932             .nr(4)
20933             .kr(2)
20934             .sr(4)
20935             .m(m)
20936             .n(n)
20937             .k(k)
20938             .iterations(1)
20939             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20940         }
20941       }
20942     }
20943   }
20944 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)20945   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
20946     TEST_REQUIRES_X86_AVX;
20947     for (uint32_t n = 8; n <= 12; n += 4) {
20948       for (size_t k = 1; k <= 40; k += 9) {
20949         GemmMicrokernelTester()
20950           .mr(4)
20951           .nr(4)
20952           .kr(2)
20953           .sr(4)
20954           .m(4)
20955           .n(n)
20956           .k(k)
20957           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20958       }
20959     }
20960   }
20961 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)20962   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
20963     TEST_REQUIRES_X86_AVX;
20964     for (uint32_t n = 8; n <= 12; n += 4) {
20965       for (size_t k = 1; k <= 40; k += 9) {
20966         GemmMicrokernelTester()
20967           .mr(4)
20968           .nr(4)
20969           .kr(2)
20970           .sr(4)
20971           .m(4)
20972           .n(n)
20973           .k(k)
20974           .cn_stride(7)
20975           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20976       }
20977     }
20978   }
20979 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)20980   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
20981     TEST_REQUIRES_X86_AVX;
20982     for (uint32_t n = 8; n <= 12; n += 4) {
20983       for (size_t k = 1; k <= 40; k += 9) {
20984         for (uint32_t m = 1; m <= 4; m++) {
20985           GemmMicrokernelTester()
20986             .mr(4)
20987             .nr(4)
20988             .kr(2)
20989             .sr(4)
20990             .m(m)
20991             .n(n)
20992             .k(k)
20993             .iterations(1)
20994             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20995         }
20996       }
20997     }
20998   }
20999 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel)21000   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel) {
21001     TEST_REQUIRES_X86_AVX;
21002     for (size_t k = 1; k <= 40; k += 9) {
21003       GemmMicrokernelTester()
21004         .mr(4)
21005         .nr(4)
21006         .kr(2)
21007         .sr(4)
21008         .m(4)
21009         .n(4)
21010         .k(k)
21011         .ks(3)
21012         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21013     }
21014   }
21015 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel_subtile)21016   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel_subtile) {
21017     TEST_REQUIRES_X86_AVX;
21018     for (size_t k = 1; k <= 40; k += 9) {
21019       for (uint32_t n = 1; n <= 4; n++) {
21020         for (uint32_t m = 1; m <= 4; m++) {
21021           GemmMicrokernelTester()
21022             .mr(4)
21023             .nr(4)
21024             .kr(2)
21025             .sr(4)
21026             .m(m)
21027             .n(n)
21028             .k(k)
21029             .ks(3)
21030             .iterations(1)
21031             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21032         }
21033       }
21034     }
21035   }
21036 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_small_kernel)21037   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
21038     TEST_REQUIRES_X86_AVX;
21039     for (uint32_t n = 5; n < 8; n++) {
21040       for (size_t k = 1; k <= 40; k += 9) {
21041         GemmMicrokernelTester()
21042           .mr(4)
21043           .nr(4)
21044           .kr(2)
21045           .sr(4)
21046           .m(4)
21047           .n(n)
21048           .k(k)
21049           .ks(3)
21050           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21051       }
21052     }
21053   }
21054 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_small_kernel)21055   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_small_kernel) {
21056     TEST_REQUIRES_X86_AVX;
21057     for (uint32_t n = 8; n <= 12; n += 4) {
21058       for (size_t k = 1; k <= 40; k += 9) {
21059         GemmMicrokernelTester()
21060           .mr(4)
21061           .nr(4)
21062           .kr(2)
21063           .sr(4)
21064           .m(4)
21065           .n(n)
21066           .k(k)
21067           .ks(3)
21068           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21069       }
21070     }
21071   }
21072 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)21073   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
21074     TEST_REQUIRES_X86_AVX;
21075     for (size_t k = 1; k <= 40; k += 9) {
21076       for (uint32_t n = 1; n <= 4; n++) {
21077         for (uint32_t m = 1; m <= 4; m++) {
21078           GemmMicrokernelTester()
21079             .mr(4)
21080             .nr(4)
21081             .kr(2)
21082             .sr(4)
21083             .m(m)
21084             .n(n)
21085             .k(k)
21086             .cm_stride(7)
21087             .iterations(1)
21088             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21089         }
21090       }
21091     }
21092   }
21093 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,a_offset)21094   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, a_offset) {
21095     TEST_REQUIRES_X86_AVX;
21096     for (size_t k = 1; k <= 40; k += 9) {
21097       GemmMicrokernelTester()
21098         .mr(4)
21099         .nr(4)
21100         .kr(2)
21101         .sr(4)
21102         .m(4)
21103         .n(4)
21104         .k(k)
21105         .ks(3)
21106         .a_offset(163)
21107         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21108     }
21109   }
21110 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,zero)21111   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, zero) {
21112     TEST_REQUIRES_X86_AVX;
21113     for (size_t k = 1; k <= 40; k += 9) {
21114       for (uint32_t mz = 0; mz < 4; mz++) {
21115         GemmMicrokernelTester()
21116           .mr(4)
21117           .nr(4)
21118           .kr(2)
21119           .sr(4)
21120           .m(4)
21121           .n(4)
21122           .k(k)
21123           .ks(3)
21124           .a_offset(163)
21125           .zero_index(mz)
21126           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21127       }
21128     }
21129   }
21130 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)21131   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
21132     TEST_REQUIRES_X86_AVX;
21133     GemmMicrokernelTester()
21134       .mr(4)
21135       .nr(4)
21136       .kr(2)
21137       .sr(4)
21138       .m(4)
21139       .n(4)
21140       .k(8)
21141       .qmin(128)
21142       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21143   }
21144 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)21145   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
21146     TEST_REQUIRES_X86_AVX;
21147     GemmMicrokernelTester()
21148       .mr(4)
21149       .nr(4)
21150       .kr(2)
21151       .sr(4)
21152       .m(4)
21153       .n(4)
21154       .k(8)
21155       .qmax(128)
21156       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21157   }
21158 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)21159   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
21160     TEST_REQUIRES_X86_AVX;
21161     GemmMicrokernelTester()
21162       .mr(4)
21163       .nr(4)
21164       .kr(2)
21165       .sr(4)
21166       .m(4)
21167       .n(4)
21168       .k(8)
21169       .cm_stride(7)
21170       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21171   }
21172 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_a_zero_point)21173   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_a_zero_point) {
21174     TEST_REQUIRES_X86_AVX;
21175     for (size_t k = 1; k <= 40; k += 9) {
21176       GemmMicrokernelTester()
21177         .mr(4)
21178         .nr(4)
21179         .kr(2)
21180         .sr(4)
21181         .m(4)
21182         .n(4)
21183         .k(k)
21184         .a_zero_point(0)
21185         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21186     }
21187   }
21188 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_b_zero_point)21189   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_b_zero_point) {
21190     TEST_REQUIRES_X86_AVX;
21191     for (size_t k = 1; k <= 40; k += 9) {
21192       GemmMicrokernelTester()
21193         .mr(4)
21194         .nr(4)
21195         .kr(2)
21196         .sr(4)
21197         .m(4)
21198         .n(4)
21199         .k(k)
21200         .b_zero_point(0)
21201         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21202     }
21203   }
21204 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,no_zero_point)21205   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, no_zero_point) {
21206     TEST_REQUIRES_X86_AVX;
21207     for (size_t k = 1; k <= 40; k += 9) {
21208       GemmMicrokernelTester()
21209         .mr(4)
21210         .nr(4)
21211         .kr(2)
21212         .sr(4)
21213         .m(4)
21214         .n(4)
21215         .k(k)
21216         .a_zero_point(0)
21217         .b_zero_point(0)
21218         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21219     }
21220   }
21221 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21222 
21223 
21224 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)21225   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
21226     TEST_REQUIRES_X86_SSE2;
21227     GemmMicrokernelTester()
21228       .mr(1)
21229       .nr(4)
21230       .kr(8)
21231       .sr(1)
21232       .m(1)
21233       .n(4)
21234       .k(8)
21235       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21236   }
21237 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)21238   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
21239     TEST_REQUIRES_X86_SSE2;
21240     GemmMicrokernelTester()
21241       .mr(1)
21242       .nr(4)
21243       .kr(8)
21244       .sr(1)
21245       .m(1)
21246       .n(4)
21247       .k(8)
21248       .cn_stride(7)
21249       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21250   }
21251 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)21252   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
21253     TEST_REQUIRES_X86_SSE2;
21254     for (uint32_t n = 1; n <= 4; n++) {
21255       for (uint32_t m = 1; m <= 1; m++) {
21256         GemmMicrokernelTester()
21257           .mr(1)
21258           .nr(4)
21259           .kr(8)
21260           .sr(1)
21261           .m(m)
21262           .n(n)
21263           .k(8)
21264           .iterations(1)
21265           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21266       }
21267     }
21268   }
21269 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)21270   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
21271     TEST_REQUIRES_X86_SSE2;
21272     for (uint32_t m = 1; m <= 1; m++) {
21273       GemmMicrokernelTester()
21274         .mr(1)
21275         .nr(4)
21276         .kr(8)
21277         .sr(1)
21278         .m(m)
21279         .n(4)
21280         .k(8)
21281         .iterations(1)
21282         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21283     }
21284   }
21285 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)21286   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
21287     TEST_REQUIRES_X86_SSE2;
21288     for (uint32_t n = 1; n <= 4; n++) {
21289       GemmMicrokernelTester()
21290         .mr(1)
21291         .nr(4)
21292         .kr(8)
21293         .sr(1)
21294         .m(1)
21295         .n(n)
21296         .k(8)
21297         .iterations(1)
21298         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21299     }
21300   }
21301 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)21302   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
21303     TEST_REQUIRES_X86_SSE2;
21304     for (size_t k = 1; k < 8; k++) {
21305       GemmMicrokernelTester()
21306         .mr(1)
21307         .nr(4)
21308         .kr(8)
21309         .sr(1)
21310         .m(1)
21311         .n(4)
21312         .k(k)
21313         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21314     }
21315   }
21316 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)21317   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
21318     TEST_REQUIRES_X86_SSE2;
21319     for (size_t k = 1; k < 8; k++) {
21320       for (uint32_t n = 1; n <= 4; n++) {
21321         for (uint32_t m = 1; m <= 1; m++) {
21322           GemmMicrokernelTester()
21323             .mr(1)
21324             .nr(4)
21325             .kr(8)
21326             .sr(1)
21327             .m(m)
21328             .n(n)
21329             .k(k)
21330             .iterations(1)
21331             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21332         }
21333       }
21334     }
21335   }
21336 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)21337   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
21338     TEST_REQUIRES_X86_SSE2;
21339     for (size_t k = 9; k < 16; k++) {
21340       GemmMicrokernelTester()
21341         .mr(1)
21342         .nr(4)
21343         .kr(8)
21344         .sr(1)
21345         .m(1)
21346         .n(4)
21347         .k(k)
21348         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21349     }
21350   }
21351 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)21352   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
21353     TEST_REQUIRES_X86_SSE2;
21354     for (size_t k = 9; k < 16; k++) {
21355       for (uint32_t n = 1; n <= 4; n++) {
21356         for (uint32_t m = 1; m <= 1; m++) {
21357           GemmMicrokernelTester()
21358             .mr(1)
21359             .nr(4)
21360             .kr(8)
21361             .sr(1)
21362             .m(m)
21363             .n(n)
21364             .k(k)
21365             .iterations(1)
21366             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21367         }
21368       }
21369     }
21370   }
21371 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)21372   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
21373     TEST_REQUIRES_X86_SSE2;
21374     for (size_t k = 16; k <= 80; k += 8) {
21375       GemmMicrokernelTester()
21376         .mr(1)
21377         .nr(4)
21378         .kr(8)
21379         .sr(1)
21380         .m(1)
21381         .n(4)
21382         .k(k)
21383         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21384     }
21385   }
21386 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)21387   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
21388     TEST_REQUIRES_X86_SSE2;
21389     for (size_t k = 16; k <= 80; k += 8) {
21390       for (uint32_t n = 1; n <= 4; n++) {
21391         for (uint32_t m = 1; m <= 1; m++) {
21392           GemmMicrokernelTester()
21393             .mr(1)
21394             .nr(4)
21395             .kr(8)
21396             .sr(1)
21397             .m(m)
21398             .n(n)
21399             .k(k)
21400             .iterations(1)
21401             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21402         }
21403       }
21404     }
21405   }
21406 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)21407   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
21408     TEST_REQUIRES_X86_SSE2;
21409     for (uint32_t n = 5; n < 8; n++) {
21410       for (size_t k = 1; k <= 40; k += 9) {
21411         GemmMicrokernelTester()
21412           .mr(1)
21413           .nr(4)
21414           .kr(8)
21415           .sr(1)
21416           .m(1)
21417           .n(n)
21418           .k(k)
21419           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21420       }
21421     }
21422   }
21423 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)21424   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
21425     TEST_REQUIRES_X86_SSE2;
21426     for (uint32_t n = 5; n < 8; n++) {
21427       for (size_t k = 1; k <= 40; k += 9) {
21428         GemmMicrokernelTester()
21429           .mr(1)
21430           .nr(4)
21431           .kr(8)
21432           .sr(1)
21433           .m(1)
21434           .n(n)
21435           .k(k)
21436           .cn_stride(7)
21437           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21438       }
21439     }
21440   }
21441 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)21442   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
21443     TEST_REQUIRES_X86_SSE2;
21444     for (uint32_t n = 5; n < 8; n++) {
21445       for (size_t k = 1; k <= 40; k += 9) {
21446         for (uint32_t m = 1; m <= 1; m++) {
21447           GemmMicrokernelTester()
21448             .mr(1)
21449             .nr(4)
21450             .kr(8)
21451             .sr(1)
21452             .m(m)
21453             .n(n)
21454             .k(k)
21455             .iterations(1)
21456             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21457         }
21458       }
21459     }
21460   }
21461 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)21462   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
21463     TEST_REQUIRES_X86_SSE2;
21464     for (uint32_t n = 8; n <= 12; n += 4) {
21465       for (size_t k = 1; k <= 40; k += 9) {
21466         GemmMicrokernelTester()
21467           .mr(1)
21468           .nr(4)
21469           .kr(8)
21470           .sr(1)
21471           .m(1)
21472           .n(n)
21473           .k(k)
21474           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21475       }
21476     }
21477   }
21478 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)21479   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
21480     TEST_REQUIRES_X86_SSE2;
21481     for (uint32_t n = 8; n <= 12; n += 4) {
21482       for (size_t k = 1; k <= 40; k += 9) {
21483         GemmMicrokernelTester()
21484           .mr(1)
21485           .nr(4)
21486           .kr(8)
21487           .sr(1)
21488           .m(1)
21489           .n(n)
21490           .k(k)
21491           .cn_stride(7)
21492           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21493       }
21494     }
21495   }
21496 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)21497   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
21498     TEST_REQUIRES_X86_SSE2;
21499     for (uint32_t n = 8; n <= 12; n += 4) {
21500       for (size_t k = 1; k <= 40; k += 9) {
21501         for (uint32_t m = 1; m <= 1; m++) {
21502           GemmMicrokernelTester()
21503             .mr(1)
21504             .nr(4)
21505             .kr(8)
21506             .sr(1)
21507             .m(m)
21508             .n(n)
21509             .k(k)
21510             .iterations(1)
21511             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21512         }
21513       }
21514     }
21515   }
21516 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel)21517   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
21518     TEST_REQUIRES_X86_SSE2;
21519     for (size_t k = 1; k <= 40; k += 9) {
21520       GemmMicrokernelTester()
21521         .mr(1)
21522         .nr(4)
21523         .kr(8)
21524         .sr(1)
21525         .m(1)
21526         .n(4)
21527         .k(k)
21528         .ks(3)
21529         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21530     }
21531   }
21532 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel_subtile)21533   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
21534     TEST_REQUIRES_X86_SSE2;
21535     for (size_t k = 1; k <= 40; k += 9) {
21536       for (uint32_t n = 1; n <= 4; n++) {
21537         for (uint32_t m = 1; m <= 1; m++) {
21538           GemmMicrokernelTester()
21539             .mr(1)
21540             .nr(4)
21541             .kr(8)
21542             .sr(1)
21543             .m(m)
21544             .n(n)
21545             .k(k)
21546             .ks(3)
21547             .iterations(1)
21548             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21549         }
21550       }
21551     }
21552   }
21553 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_small_kernel)21554   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
21555     TEST_REQUIRES_X86_SSE2;
21556     for (uint32_t n = 5; n < 8; n++) {
21557       for (size_t k = 1; k <= 40; k += 9) {
21558         GemmMicrokernelTester()
21559           .mr(1)
21560           .nr(4)
21561           .kr(8)
21562           .sr(1)
21563           .m(1)
21564           .n(n)
21565           .k(k)
21566           .ks(3)
21567           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21568       }
21569     }
21570   }
21571 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_small_kernel)21572   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
21573     TEST_REQUIRES_X86_SSE2;
21574     for (uint32_t n = 8; n <= 12; n += 4) {
21575       for (size_t k = 1; k <= 40; k += 9) {
21576         GemmMicrokernelTester()
21577           .mr(1)
21578           .nr(4)
21579           .kr(8)
21580           .sr(1)
21581           .m(1)
21582           .n(n)
21583           .k(k)
21584           .ks(3)
21585           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21586       }
21587     }
21588   }
21589 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)21590   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
21591     TEST_REQUIRES_X86_SSE2;
21592     for (size_t k = 1; k <= 40; k += 9) {
21593       for (uint32_t n = 1; n <= 4; n++) {
21594         for (uint32_t m = 1; m <= 1; m++) {
21595           GemmMicrokernelTester()
21596             .mr(1)
21597             .nr(4)
21598             .kr(8)
21599             .sr(1)
21600             .m(m)
21601             .n(n)
21602             .k(k)
21603             .cm_stride(7)
21604             .iterations(1)
21605             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21606         }
21607       }
21608     }
21609   }
21610 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,a_offset)21611   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
21612     TEST_REQUIRES_X86_SSE2;
21613     for (size_t k = 1; k <= 40; k += 9) {
21614       GemmMicrokernelTester()
21615         .mr(1)
21616         .nr(4)
21617         .kr(8)
21618         .sr(1)
21619         .m(1)
21620         .n(4)
21621         .k(k)
21622         .ks(3)
21623         .a_offset(43)
21624         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21625     }
21626   }
21627 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,zero)21628   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
21629     TEST_REQUIRES_X86_SSE2;
21630     for (size_t k = 1; k <= 40; k += 9) {
21631       for (uint32_t mz = 0; mz < 1; mz++) {
21632         GemmMicrokernelTester()
21633           .mr(1)
21634           .nr(4)
21635           .kr(8)
21636           .sr(1)
21637           .m(1)
21638           .n(4)
21639           .k(k)
21640           .ks(3)
21641           .a_offset(43)
21642           .zero_index(mz)
21643           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21644       }
21645     }
21646   }
21647 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)21648   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
21649     TEST_REQUIRES_X86_SSE2;
21650     GemmMicrokernelTester()
21651       .mr(1)
21652       .nr(4)
21653       .kr(8)
21654       .sr(1)
21655       .m(1)
21656       .n(4)
21657       .k(8)
21658       .qmin(128)
21659       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21660   }
21661 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)21662   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
21663     TEST_REQUIRES_X86_SSE2;
21664     GemmMicrokernelTester()
21665       .mr(1)
21666       .nr(4)
21667       .kr(8)
21668       .sr(1)
21669       .m(1)
21670       .n(4)
21671       .k(8)
21672       .qmax(128)
21673       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21674   }
21675 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)21676   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
21677     TEST_REQUIRES_X86_SSE2;
21678     GemmMicrokernelTester()
21679       .mr(1)
21680       .nr(4)
21681       .kr(8)
21682       .sr(1)
21683       .m(1)
21684       .n(4)
21685       .k(8)
21686       .cm_stride(7)
21687       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21688   }
21689 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_a_zero_point)21690   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
21691     TEST_REQUIRES_X86_SSE2;
21692     for (size_t k = 1; k <= 40; k += 9) {
21693       GemmMicrokernelTester()
21694         .mr(1)
21695         .nr(4)
21696         .kr(8)
21697         .sr(1)
21698         .m(1)
21699         .n(4)
21700         .k(k)
21701         .a_zero_point(0)
21702         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21703     }
21704   }
21705 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_b_zero_point)21706   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
21707     TEST_REQUIRES_X86_SSE2;
21708     for (size_t k = 1; k <= 40; k += 9) {
21709       GemmMicrokernelTester()
21710         .mr(1)
21711         .nr(4)
21712         .kr(8)
21713         .sr(1)
21714         .m(1)
21715         .n(4)
21716         .k(k)
21717         .b_zero_point(0)
21718         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21719     }
21720   }
21721 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,no_zero_point)21722   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
21723     TEST_REQUIRES_X86_SSE2;
21724     for (size_t k = 1; k <= 40; k += 9) {
21725       GemmMicrokernelTester()
21726         .mr(1)
21727         .nr(4)
21728         .kr(8)
21729         .sr(1)
21730         .m(1)
21731         .n(4)
21732         .k(k)
21733         .a_zero_point(0)
21734         .b_zero_point(0)
21735         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21736     }
21737   }
21738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21739 
21740 
21741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8)21742   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
21743     TEST_REQUIRES_X86_SSE41;
21744     GemmMicrokernelTester()
21745       .mr(1)
21746       .nr(4)
21747       .kr(8)
21748       .sr(1)
21749       .m(1)
21750       .n(4)
21751       .k(8)
21752       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21753   }
21754 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cn)21755   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
21756     TEST_REQUIRES_X86_SSE41;
21757     GemmMicrokernelTester()
21758       .mr(1)
21759       .nr(4)
21760       .kr(8)
21761       .sr(1)
21762       .m(1)
21763       .n(4)
21764       .k(8)
21765       .cn_stride(7)
21766       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21767   }
21768 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile)21769   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
21770     TEST_REQUIRES_X86_SSE41;
21771     for (uint32_t n = 1; n <= 4; n++) {
21772       for (uint32_t m = 1; m <= 1; m++) {
21773         GemmMicrokernelTester()
21774           .mr(1)
21775           .nr(4)
21776           .kr(8)
21777           .sr(1)
21778           .m(m)
21779           .n(n)
21780           .k(8)
21781           .iterations(1)
21782           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21783       }
21784     }
21785   }
21786 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_m)21787   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
21788     TEST_REQUIRES_X86_SSE41;
21789     for (uint32_t m = 1; m <= 1; m++) {
21790       GemmMicrokernelTester()
21791         .mr(1)
21792         .nr(4)
21793         .kr(8)
21794         .sr(1)
21795         .m(m)
21796         .n(4)
21797         .k(8)
21798         .iterations(1)
21799         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21800     }
21801   }
21802 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_n)21803   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
21804     TEST_REQUIRES_X86_SSE41;
21805     for (uint32_t n = 1; n <= 4; n++) {
21806       GemmMicrokernelTester()
21807         .mr(1)
21808         .nr(4)
21809         .kr(8)
21810         .sr(1)
21811         .m(1)
21812         .n(n)
21813         .k(8)
21814         .iterations(1)
21815         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21816     }
21817   }
21818 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8)21819   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
21820     TEST_REQUIRES_X86_SSE41;
21821     for (size_t k = 1; k < 8; k++) {
21822       GemmMicrokernelTester()
21823         .mr(1)
21824         .nr(4)
21825         .kr(8)
21826         .sr(1)
21827         .m(1)
21828         .n(4)
21829         .k(k)
21830         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21831     }
21832   }
21833 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_subtile)21834   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
21835     TEST_REQUIRES_X86_SSE41;
21836     for (size_t k = 1; k < 8; k++) {
21837       for (uint32_t n = 1; n <= 4; n++) {
21838         for (uint32_t m = 1; m <= 1; m++) {
21839           GemmMicrokernelTester()
21840             .mr(1)
21841             .nr(4)
21842             .kr(8)
21843             .sr(1)
21844             .m(m)
21845             .n(n)
21846             .k(k)
21847             .iterations(1)
21848             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21849         }
21850       }
21851     }
21852   }
21853 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8)21854   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
21855     TEST_REQUIRES_X86_SSE41;
21856     for (size_t k = 9; k < 16; k++) {
21857       GemmMicrokernelTester()
21858         .mr(1)
21859         .nr(4)
21860         .kr(8)
21861         .sr(1)
21862         .m(1)
21863         .n(4)
21864         .k(k)
21865         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21866     }
21867   }
21868 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_subtile)21869   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
21870     TEST_REQUIRES_X86_SSE41;
21871     for (size_t k = 9; k < 16; k++) {
21872       for (uint32_t n = 1; n <= 4; n++) {
21873         for (uint32_t m = 1; m <= 1; m++) {
21874           GemmMicrokernelTester()
21875             .mr(1)
21876             .nr(4)
21877             .kr(8)
21878             .sr(1)
21879             .m(m)
21880             .n(n)
21881             .k(k)
21882             .iterations(1)
21883             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21884         }
21885       }
21886     }
21887   }
21888 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8)21889   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
21890     TEST_REQUIRES_X86_SSE41;
21891     for (size_t k = 16; k <= 80; k += 8) {
21892       GemmMicrokernelTester()
21893         .mr(1)
21894         .nr(4)
21895         .kr(8)
21896         .sr(1)
21897         .m(1)
21898         .n(4)
21899         .k(k)
21900         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21901     }
21902   }
21903 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_subtile)21904   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
21905     TEST_REQUIRES_X86_SSE41;
21906     for (size_t k = 16; k <= 80; k += 8) {
21907       for (uint32_t n = 1; n <= 4; n++) {
21908         for (uint32_t m = 1; m <= 1; m++) {
21909           GemmMicrokernelTester()
21910             .mr(1)
21911             .nr(4)
21912             .kr(8)
21913             .sr(1)
21914             .m(m)
21915             .n(n)
21916             .k(k)
21917             .iterations(1)
21918             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21919         }
21920       }
21921     }
21922   }
21923 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4)21924   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
21925     TEST_REQUIRES_X86_SSE41;
21926     for (uint32_t n = 5; n < 8; n++) {
21927       for (size_t k = 1; k <= 40; k += 9) {
21928         GemmMicrokernelTester()
21929           .mr(1)
21930           .nr(4)
21931           .kr(8)
21932           .sr(1)
21933           .m(1)
21934           .n(n)
21935           .k(k)
21936           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21937       }
21938     }
21939   }
21940 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_cn)21941   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
21942     TEST_REQUIRES_X86_SSE41;
21943     for (uint32_t n = 5; n < 8; n++) {
21944       for (size_t k = 1; k <= 40; k += 9) {
21945         GemmMicrokernelTester()
21946           .mr(1)
21947           .nr(4)
21948           .kr(8)
21949           .sr(1)
21950           .m(1)
21951           .n(n)
21952           .k(k)
21953           .cn_stride(7)
21954           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21955       }
21956     }
21957   }
21958 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_subtile)21959   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
21960     TEST_REQUIRES_X86_SSE41;
21961     for (uint32_t n = 5; n < 8; n++) {
21962       for (size_t k = 1; k <= 40; k += 9) {
21963         for (uint32_t m = 1; m <= 1; m++) {
21964           GemmMicrokernelTester()
21965             .mr(1)
21966             .nr(4)
21967             .kr(8)
21968             .sr(1)
21969             .m(m)
21970             .n(n)
21971             .k(k)
21972             .iterations(1)
21973             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21974         }
21975       }
21976     }
21977   }
21978 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4)21979   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
21980     TEST_REQUIRES_X86_SSE41;
21981     for (uint32_t n = 8; n <= 12; n += 4) {
21982       for (size_t k = 1; k <= 40; k += 9) {
21983         GemmMicrokernelTester()
21984           .mr(1)
21985           .nr(4)
21986           .kr(8)
21987           .sr(1)
21988           .m(1)
21989           .n(n)
21990           .k(k)
21991           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21992       }
21993     }
21994   }
21995 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_cn)21996   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
21997     TEST_REQUIRES_X86_SSE41;
21998     for (uint32_t n = 8; n <= 12; n += 4) {
21999       for (size_t k = 1; k <= 40; k += 9) {
22000         GemmMicrokernelTester()
22001           .mr(1)
22002           .nr(4)
22003           .kr(8)
22004           .sr(1)
22005           .m(1)
22006           .n(n)
22007           .k(k)
22008           .cn_stride(7)
22009           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22010       }
22011     }
22012   }
22013 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_subtile)22014   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
22015     TEST_REQUIRES_X86_SSE41;
22016     for (uint32_t n = 8; n <= 12; n += 4) {
22017       for (size_t k = 1; k <= 40; k += 9) {
22018         for (uint32_t m = 1; m <= 1; m++) {
22019           GemmMicrokernelTester()
22020             .mr(1)
22021             .nr(4)
22022             .kr(8)
22023             .sr(1)
22024             .m(m)
22025             .n(n)
22026             .k(k)
22027             .iterations(1)
22028             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22029         }
22030       }
22031     }
22032   }
22033 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel)22034   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
22035     TEST_REQUIRES_X86_SSE41;
22036     for (size_t k = 1; k <= 40; k += 9) {
22037       GemmMicrokernelTester()
22038         .mr(1)
22039         .nr(4)
22040         .kr(8)
22041         .sr(1)
22042         .m(1)
22043         .n(4)
22044         .k(k)
22045         .ks(3)
22046         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22047     }
22048   }
22049 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel_subtile)22050   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
22051     TEST_REQUIRES_X86_SSE41;
22052     for (size_t k = 1; k <= 40; k += 9) {
22053       for (uint32_t n = 1; n <= 4; n++) {
22054         for (uint32_t m = 1; m <= 1; m++) {
22055           GemmMicrokernelTester()
22056             .mr(1)
22057             .nr(4)
22058             .kr(8)
22059             .sr(1)
22060             .m(m)
22061             .n(n)
22062             .k(k)
22063             .ks(3)
22064             .iterations(1)
22065             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22066         }
22067       }
22068     }
22069   }
22070 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_small_kernel)22071   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
22072     TEST_REQUIRES_X86_SSE41;
22073     for (uint32_t n = 5; n < 8; n++) {
22074       for (size_t k = 1; k <= 40; k += 9) {
22075         GemmMicrokernelTester()
22076           .mr(1)
22077           .nr(4)
22078           .kr(8)
22079           .sr(1)
22080           .m(1)
22081           .n(n)
22082           .k(k)
22083           .ks(3)
22084           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22085       }
22086     }
22087   }
22088 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_small_kernel)22089   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
22090     TEST_REQUIRES_X86_SSE41;
22091     for (uint32_t n = 8; n <= 12; n += 4) {
22092       for (size_t k = 1; k <= 40; k += 9) {
22093         GemmMicrokernelTester()
22094           .mr(1)
22095           .nr(4)
22096           .kr(8)
22097           .sr(1)
22098           .m(1)
22099           .n(n)
22100           .k(k)
22101           .ks(3)
22102           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22103       }
22104     }
22105   }
22106 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm_subtile)22107   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
22108     TEST_REQUIRES_X86_SSE41;
22109     for (size_t k = 1; k <= 40; k += 9) {
22110       for (uint32_t n = 1; n <= 4; n++) {
22111         for (uint32_t m = 1; m <= 1; m++) {
22112           GemmMicrokernelTester()
22113             .mr(1)
22114             .nr(4)
22115             .kr(8)
22116             .sr(1)
22117             .m(m)
22118             .n(n)
22119             .k(k)
22120             .cm_stride(7)
22121             .iterations(1)
22122             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22123         }
22124       }
22125     }
22126   }
22127 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,a_offset)22128   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
22129     TEST_REQUIRES_X86_SSE41;
22130     for (size_t k = 1; k <= 40; k += 9) {
22131       GemmMicrokernelTester()
22132         .mr(1)
22133         .nr(4)
22134         .kr(8)
22135         .sr(1)
22136         .m(1)
22137         .n(4)
22138         .k(k)
22139         .ks(3)
22140         .a_offset(43)
22141         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22142     }
22143   }
22144 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,zero)22145   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
22146     TEST_REQUIRES_X86_SSE41;
22147     for (size_t k = 1; k <= 40; k += 9) {
22148       for (uint32_t mz = 0; mz < 1; mz++) {
22149         GemmMicrokernelTester()
22150           .mr(1)
22151           .nr(4)
22152           .kr(8)
22153           .sr(1)
22154           .m(1)
22155           .n(4)
22156           .k(k)
22157           .ks(3)
22158           .a_offset(43)
22159           .zero_index(mz)
22160           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22161       }
22162     }
22163   }
22164 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmin)22165   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
22166     TEST_REQUIRES_X86_SSE41;
22167     GemmMicrokernelTester()
22168       .mr(1)
22169       .nr(4)
22170       .kr(8)
22171       .sr(1)
22172       .m(1)
22173       .n(4)
22174       .k(8)
22175       .qmin(128)
22176       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22177   }
22178 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmax)22179   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
22180     TEST_REQUIRES_X86_SSE41;
22181     GemmMicrokernelTester()
22182       .mr(1)
22183       .nr(4)
22184       .kr(8)
22185       .sr(1)
22186       .m(1)
22187       .n(4)
22188       .k(8)
22189       .qmax(128)
22190       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22191   }
22192 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm)22193   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
22194     TEST_REQUIRES_X86_SSE41;
22195     GemmMicrokernelTester()
22196       .mr(1)
22197       .nr(4)
22198       .kr(8)
22199       .sr(1)
22200       .m(1)
22201       .n(4)
22202       .k(8)
22203       .cm_stride(7)
22204       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22205   }
22206 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_a_zero_point)22207   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_a_zero_point) {
22208     TEST_REQUIRES_X86_SSE41;
22209     for (size_t k = 1; k <= 40; k += 9) {
22210       GemmMicrokernelTester()
22211         .mr(1)
22212         .nr(4)
22213         .kr(8)
22214         .sr(1)
22215         .m(1)
22216         .n(4)
22217         .k(k)
22218         .a_zero_point(0)
22219         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22220     }
22221   }
22222 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_b_zero_point)22223   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_b_zero_point) {
22224     TEST_REQUIRES_X86_SSE41;
22225     for (size_t k = 1; k <= 40; k += 9) {
22226       GemmMicrokernelTester()
22227         .mr(1)
22228         .nr(4)
22229         .kr(8)
22230         .sr(1)
22231         .m(1)
22232         .n(4)
22233         .k(k)
22234         .b_zero_point(0)
22235         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22236     }
22237   }
22238 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,no_zero_point)22239   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_zero_point) {
22240     TEST_REQUIRES_X86_SSE41;
22241     for (size_t k = 1; k <= 40; k += 9) {
22242       GemmMicrokernelTester()
22243         .mr(1)
22244         .nr(4)
22245         .kr(8)
22246         .sr(1)
22247         .m(1)
22248         .n(4)
22249         .k(k)
22250         .a_zero_point(0)
22251         .b_zero_point(0)
22252         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22253     }
22254   }
22255 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22256 
22257 
22258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)22259   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
22260     TEST_REQUIRES_X86_SSE2;
22261     GemmMicrokernelTester()
22262       .mr(2)
22263       .nr(4)
22264       .kr(8)
22265       .sr(1)
22266       .m(2)
22267       .n(4)
22268       .k(8)
22269       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22270   }
22271 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)22272   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
22273     TEST_REQUIRES_X86_SSE2;
22274     GemmMicrokernelTester()
22275       .mr(2)
22276       .nr(4)
22277       .kr(8)
22278       .sr(1)
22279       .m(2)
22280       .n(4)
22281       .k(8)
22282       .cn_stride(7)
22283       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22284   }
22285 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)22286   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
22287     TEST_REQUIRES_X86_SSE2;
22288     for (uint32_t n = 1; n <= 4; n++) {
22289       for (uint32_t m = 1; m <= 2; m++) {
22290         GemmMicrokernelTester()
22291           .mr(2)
22292           .nr(4)
22293           .kr(8)
22294           .sr(1)
22295           .m(m)
22296           .n(n)
22297           .k(8)
22298           .iterations(1)
22299           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22300       }
22301     }
22302   }
22303 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)22304   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
22305     TEST_REQUIRES_X86_SSE2;
22306     for (uint32_t m = 1; m <= 2; m++) {
22307       GemmMicrokernelTester()
22308         .mr(2)
22309         .nr(4)
22310         .kr(8)
22311         .sr(1)
22312         .m(m)
22313         .n(4)
22314         .k(8)
22315         .iterations(1)
22316         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22317     }
22318   }
22319 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)22320   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
22321     TEST_REQUIRES_X86_SSE2;
22322     for (uint32_t n = 1; n <= 4; n++) {
22323       GemmMicrokernelTester()
22324         .mr(2)
22325         .nr(4)
22326         .kr(8)
22327         .sr(1)
22328         .m(2)
22329         .n(n)
22330         .k(8)
22331         .iterations(1)
22332         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22333     }
22334   }
22335 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)22336   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
22337     TEST_REQUIRES_X86_SSE2;
22338     for (size_t k = 1; k < 8; k++) {
22339       GemmMicrokernelTester()
22340         .mr(2)
22341         .nr(4)
22342         .kr(8)
22343         .sr(1)
22344         .m(2)
22345         .n(4)
22346         .k(k)
22347         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22348     }
22349   }
22350 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)22351   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
22352     TEST_REQUIRES_X86_SSE2;
22353     for (size_t k = 1; k < 8; k++) {
22354       for (uint32_t n = 1; n <= 4; n++) {
22355         for (uint32_t m = 1; m <= 2; m++) {
22356           GemmMicrokernelTester()
22357             .mr(2)
22358             .nr(4)
22359             .kr(8)
22360             .sr(1)
22361             .m(m)
22362             .n(n)
22363             .k(k)
22364             .iterations(1)
22365             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22366         }
22367       }
22368     }
22369   }
22370 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)22371   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
22372     TEST_REQUIRES_X86_SSE2;
22373     for (size_t k = 9; k < 16; k++) {
22374       GemmMicrokernelTester()
22375         .mr(2)
22376         .nr(4)
22377         .kr(8)
22378         .sr(1)
22379         .m(2)
22380         .n(4)
22381         .k(k)
22382         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22383     }
22384   }
22385 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)22386   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
22387     TEST_REQUIRES_X86_SSE2;
22388     for (size_t k = 9; k < 16; k++) {
22389       for (uint32_t n = 1; n <= 4; n++) {
22390         for (uint32_t m = 1; m <= 2; m++) {
22391           GemmMicrokernelTester()
22392             .mr(2)
22393             .nr(4)
22394             .kr(8)
22395             .sr(1)
22396             .m(m)
22397             .n(n)
22398             .k(k)
22399             .iterations(1)
22400             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22401         }
22402       }
22403     }
22404   }
22405 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)22406   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
22407     TEST_REQUIRES_X86_SSE2;
22408     for (size_t k = 16; k <= 80; k += 8) {
22409       GemmMicrokernelTester()
22410         .mr(2)
22411         .nr(4)
22412         .kr(8)
22413         .sr(1)
22414         .m(2)
22415         .n(4)
22416         .k(k)
22417         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22418     }
22419   }
22420 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)22421   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
22422     TEST_REQUIRES_X86_SSE2;
22423     for (size_t k = 16; k <= 80; k += 8) {
22424       for (uint32_t n = 1; n <= 4; n++) {
22425         for (uint32_t m = 1; m <= 2; m++) {
22426           GemmMicrokernelTester()
22427             .mr(2)
22428             .nr(4)
22429             .kr(8)
22430             .sr(1)
22431             .m(m)
22432             .n(n)
22433             .k(k)
22434             .iterations(1)
22435             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22436         }
22437       }
22438     }
22439   }
22440 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)22441   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
22442     TEST_REQUIRES_X86_SSE2;
22443     for (uint32_t n = 5; n < 8; n++) {
22444       for (size_t k = 1; k <= 40; k += 9) {
22445         GemmMicrokernelTester()
22446           .mr(2)
22447           .nr(4)
22448           .kr(8)
22449           .sr(1)
22450           .m(2)
22451           .n(n)
22452           .k(k)
22453           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22454       }
22455     }
22456   }
22457 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)22458   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
22459     TEST_REQUIRES_X86_SSE2;
22460     for (uint32_t n = 5; n < 8; n++) {
22461       for (size_t k = 1; k <= 40; k += 9) {
22462         GemmMicrokernelTester()
22463           .mr(2)
22464           .nr(4)
22465           .kr(8)
22466           .sr(1)
22467           .m(2)
22468           .n(n)
22469           .k(k)
22470           .cn_stride(7)
22471           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22472       }
22473     }
22474   }
22475 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)22476   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
22477     TEST_REQUIRES_X86_SSE2;
22478     for (uint32_t n = 5; n < 8; n++) {
22479       for (size_t k = 1; k <= 40; k += 9) {
22480         for (uint32_t m = 1; m <= 2; m++) {
22481           GemmMicrokernelTester()
22482             .mr(2)
22483             .nr(4)
22484             .kr(8)
22485             .sr(1)
22486             .m(m)
22487             .n(n)
22488             .k(k)
22489             .iterations(1)
22490             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22491         }
22492       }
22493     }
22494   }
22495 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)22496   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
22497     TEST_REQUIRES_X86_SSE2;
22498     for (uint32_t n = 8; n <= 12; n += 4) {
22499       for (size_t k = 1; k <= 40; k += 9) {
22500         GemmMicrokernelTester()
22501           .mr(2)
22502           .nr(4)
22503           .kr(8)
22504           .sr(1)
22505           .m(2)
22506           .n(n)
22507           .k(k)
22508           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22509       }
22510     }
22511   }
22512 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)22513   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
22514     TEST_REQUIRES_X86_SSE2;
22515     for (uint32_t n = 8; n <= 12; n += 4) {
22516       for (size_t k = 1; k <= 40; k += 9) {
22517         GemmMicrokernelTester()
22518           .mr(2)
22519           .nr(4)
22520           .kr(8)
22521           .sr(1)
22522           .m(2)
22523           .n(n)
22524           .k(k)
22525           .cn_stride(7)
22526           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22527       }
22528     }
22529   }
22530 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)22531   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
22532     TEST_REQUIRES_X86_SSE2;
22533     for (uint32_t n = 8; n <= 12; n += 4) {
22534       for (size_t k = 1; k <= 40; k += 9) {
22535         for (uint32_t m = 1; m <= 2; m++) {
22536           GemmMicrokernelTester()
22537             .mr(2)
22538             .nr(4)
22539             .kr(8)
22540             .sr(1)
22541             .m(m)
22542             .n(n)
22543             .k(k)
22544             .iterations(1)
22545             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22546         }
22547       }
22548     }
22549   }
22550 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)22551   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
22552     TEST_REQUIRES_X86_SSE2;
22553     for (size_t k = 1; k <= 40; k += 9) {
22554       GemmMicrokernelTester()
22555         .mr(2)
22556         .nr(4)
22557         .kr(8)
22558         .sr(1)
22559         .m(2)
22560         .n(4)
22561         .k(k)
22562         .ks(3)
22563         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22564     }
22565   }
22566 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)22567   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
22568     TEST_REQUIRES_X86_SSE2;
22569     for (size_t k = 1; k <= 40; k += 9) {
22570       for (uint32_t n = 1; n <= 4; n++) {
22571         for (uint32_t m = 1; m <= 2; m++) {
22572           GemmMicrokernelTester()
22573             .mr(2)
22574             .nr(4)
22575             .kr(8)
22576             .sr(1)
22577             .m(m)
22578             .n(n)
22579             .k(k)
22580             .ks(3)
22581             .iterations(1)
22582             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22583         }
22584       }
22585     }
22586   }
22587 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)22588   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
22589     TEST_REQUIRES_X86_SSE2;
22590     for (uint32_t n = 5; n < 8; n++) {
22591       for (size_t k = 1; k <= 40; k += 9) {
22592         GemmMicrokernelTester()
22593           .mr(2)
22594           .nr(4)
22595           .kr(8)
22596           .sr(1)
22597           .m(2)
22598           .n(n)
22599           .k(k)
22600           .ks(3)
22601           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22602       }
22603     }
22604   }
22605 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)22606   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
22607     TEST_REQUIRES_X86_SSE2;
22608     for (uint32_t n = 8; n <= 12; n += 4) {
22609       for (size_t k = 1; k <= 40; k += 9) {
22610         GemmMicrokernelTester()
22611           .mr(2)
22612           .nr(4)
22613           .kr(8)
22614           .sr(1)
22615           .m(2)
22616           .n(n)
22617           .k(k)
22618           .ks(3)
22619           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22620       }
22621     }
22622   }
22623 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)22624   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
22625     TEST_REQUIRES_X86_SSE2;
22626     for (size_t k = 1; k <= 40; k += 9) {
22627       for (uint32_t n = 1; n <= 4; n++) {
22628         for (uint32_t m = 1; m <= 2; m++) {
22629           GemmMicrokernelTester()
22630             .mr(2)
22631             .nr(4)
22632             .kr(8)
22633             .sr(1)
22634             .m(m)
22635             .n(n)
22636             .k(k)
22637             .cm_stride(7)
22638             .iterations(1)
22639             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22640         }
22641       }
22642     }
22643   }
22644 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)22645   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
22646     TEST_REQUIRES_X86_SSE2;
22647     for (size_t k = 1; k <= 40; k += 9) {
22648       GemmMicrokernelTester()
22649         .mr(2)
22650         .nr(4)
22651         .kr(8)
22652         .sr(1)
22653         .m(2)
22654         .n(4)
22655         .k(k)
22656         .ks(3)
22657         .a_offset(83)
22658         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22659     }
22660   }
22661 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)22662   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
22663     TEST_REQUIRES_X86_SSE2;
22664     for (size_t k = 1; k <= 40; k += 9) {
22665       for (uint32_t mz = 0; mz < 2; mz++) {
22666         GemmMicrokernelTester()
22667           .mr(2)
22668           .nr(4)
22669           .kr(8)
22670           .sr(1)
22671           .m(2)
22672           .n(4)
22673           .k(k)
22674           .ks(3)
22675           .a_offset(83)
22676           .zero_index(mz)
22677           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22678       }
22679     }
22680   }
22681 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)22682   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
22683     TEST_REQUIRES_X86_SSE2;
22684     GemmMicrokernelTester()
22685       .mr(2)
22686       .nr(4)
22687       .kr(8)
22688       .sr(1)
22689       .m(2)
22690       .n(4)
22691       .k(8)
22692       .qmin(128)
22693       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22694   }
22695 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)22696   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
22697     TEST_REQUIRES_X86_SSE2;
22698     GemmMicrokernelTester()
22699       .mr(2)
22700       .nr(4)
22701       .kr(8)
22702       .sr(1)
22703       .m(2)
22704       .n(4)
22705       .k(8)
22706       .qmax(128)
22707       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22708   }
22709 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)22710   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
22711     TEST_REQUIRES_X86_SSE2;
22712     GemmMicrokernelTester()
22713       .mr(2)
22714       .nr(4)
22715       .kr(8)
22716       .sr(1)
22717       .m(2)
22718       .n(4)
22719       .k(8)
22720       .cm_stride(7)
22721       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22722   }
22723 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_a_zero_point)22724   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
22725     TEST_REQUIRES_X86_SSE2;
22726     for (size_t k = 1; k <= 40; k += 9) {
22727       GemmMicrokernelTester()
22728         .mr(2)
22729         .nr(4)
22730         .kr(8)
22731         .sr(1)
22732         .m(2)
22733         .n(4)
22734         .k(k)
22735         .a_zero_point(0)
22736         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22737     }
22738   }
22739 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_b_zero_point)22740   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
22741     TEST_REQUIRES_X86_SSE2;
22742     for (size_t k = 1; k <= 40; k += 9) {
22743       GemmMicrokernelTester()
22744         .mr(2)
22745         .nr(4)
22746         .kr(8)
22747         .sr(1)
22748         .m(2)
22749         .n(4)
22750         .k(k)
22751         .b_zero_point(0)
22752         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22753     }
22754   }
22755 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,no_zero_point)22756   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
22757     TEST_REQUIRES_X86_SSE2;
22758     for (size_t k = 1; k <= 40; k += 9) {
22759       GemmMicrokernelTester()
22760         .mr(2)
22761         .nr(4)
22762         .kr(8)
22763         .sr(1)
22764         .m(2)
22765         .n(4)
22766         .k(k)
22767         .a_zero_point(0)
22768         .b_zero_point(0)
22769         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22770     }
22771   }
22772 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22773 
22774 
22775 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)22776   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
22777     TEST_REQUIRES_X86_SSE41;
22778     GemmMicrokernelTester()
22779       .mr(2)
22780       .nr(4)
22781       .kr(8)
22782       .sr(1)
22783       .m(2)
22784       .n(4)
22785       .k(8)
22786       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22787   }
22788 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)22789   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
22790     TEST_REQUIRES_X86_SSE41;
22791     GemmMicrokernelTester()
22792       .mr(2)
22793       .nr(4)
22794       .kr(8)
22795       .sr(1)
22796       .m(2)
22797       .n(4)
22798       .k(8)
22799       .cn_stride(7)
22800       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22801   }
22802 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)22803   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
22804     TEST_REQUIRES_X86_SSE41;
22805     for (uint32_t n = 1; n <= 4; n++) {
22806       for (uint32_t m = 1; m <= 2; m++) {
22807         GemmMicrokernelTester()
22808           .mr(2)
22809           .nr(4)
22810           .kr(8)
22811           .sr(1)
22812           .m(m)
22813           .n(n)
22814           .k(8)
22815           .iterations(1)
22816           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22817       }
22818     }
22819   }
22820 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)22821   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
22822     TEST_REQUIRES_X86_SSE41;
22823     for (uint32_t m = 1; m <= 2; m++) {
22824       GemmMicrokernelTester()
22825         .mr(2)
22826         .nr(4)
22827         .kr(8)
22828         .sr(1)
22829         .m(m)
22830         .n(4)
22831         .k(8)
22832         .iterations(1)
22833         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22834     }
22835   }
22836 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)22837   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
22838     TEST_REQUIRES_X86_SSE41;
22839     for (uint32_t n = 1; n <= 4; n++) {
22840       GemmMicrokernelTester()
22841         .mr(2)
22842         .nr(4)
22843         .kr(8)
22844         .sr(1)
22845         .m(2)
22846         .n(n)
22847         .k(8)
22848         .iterations(1)
22849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22850     }
22851   }
22852 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)22853   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
22854     TEST_REQUIRES_X86_SSE41;
22855     for (size_t k = 1; k < 8; k++) {
22856       GemmMicrokernelTester()
22857         .mr(2)
22858         .nr(4)
22859         .kr(8)
22860         .sr(1)
22861         .m(2)
22862         .n(4)
22863         .k(k)
22864         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22865     }
22866   }
22867 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)22868   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
22869     TEST_REQUIRES_X86_SSE41;
22870     for (size_t k = 1; k < 8; k++) {
22871       for (uint32_t n = 1; n <= 4; n++) {
22872         for (uint32_t m = 1; m <= 2; m++) {
22873           GemmMicrokernelTester()
22874             .mr(2)
22875             .nr(4)
22876             .kr(8)
22877             .sr(1)
22878             .m(m)
22879             .n(n)
22880             .k(k)
22881             .iterations(1)
22882             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22883         }
22884       }
22885     }
22886   }
22887 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)22888   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
22889     TEST_REQUIRES_X86_SSE41;
22890     for (size_t k = 9; k < 16; k++) {
22891       GemmMicrokernelTester()
22892         .mr(2)
22893         .nr(4)
22894         .kr(8)
22895         .sr(1)
22896         .m(2)
22897         .n(4)
22898         .k(k)
22899         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22900     }
22901   }
22902 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)22903   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
22904     TEST_REQUIRES_X86_SSE41;
22905     for (size_t k = 9; k < 16; k++) {
22906       for (uint32_t n = 1; n <= 4; n++) {
22907         for (uint32_t m = 1; m <= 2; m++) {
22908           GemmMicrokernelTester()
22909             .mr(2)
22910             .nr(4)
22911             .kr(8)
22912             .sr(1)
22913             .m(m)
22914             .n(n)
22915             .k(k)
22916             .iterations(1)
22917             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22918         }
22919       }
22920     }
22921   }
22922 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)22923   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
22924     TEST_REQUIRES_X86_SSE41;
22925     for (size_t k = 16; k <= 80; k += 8) {
22926       GemmMicrokernelTester()
22927         .mr(2)
22928         .nr(4)
22929         .kr(8)
22930         .sr(1)
22931         .m(2)
22932         .n(4)
22933         .k(k)
22934         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22935     }
22936   }
22937 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)22938   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
22939     TEST_REQUIRES_X86_SSE41;
22940     for (size_t k = 16; k <= 80; k += 8) {
22941       for (uint32_t n = 1; n <= 4; n++) {
22942         for (uint32_t m = 1; m <= 2; m++) {
22943           GemmMicrokernelTester()
22944             .mr(2)
22945             .nr(4)
22946             .kr(8)
22947             .sr(1)
22948             .m(m)
22949             .n(n)
22950             .k(k)
22951             .iterations(1)
22952             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22953         }
22954       }
22955     }
22956   }
22957 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)22958   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
22959     TEST_REQUIRES_X86_SSE41;
22960     for (uint32_t n = 5; n < 8; n++) {
22961       for (size_t k = 1; k <= 40; k += 9) {
22962         GemmMicrokernelTester()
22963           .mr(2)
22964           .nr(4)
22965           .kr(8)
22966           .sr(1)
22967           .m(2)
22968           .n(n)
22969           .k(k)
22970           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22971       }
22972     }
22973   }
22974 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)22975   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
22976     TEST_REQUIRES_X86_SSE41;
22977     for (uint32_t n = 5; n < 8; n++) {
22978       for (size_t k = 1; k <= 40; k += 9) {
22979         GemmMicrokernelTester()
22980           .mr(2)
22981           .nr(4)
22982           .kr(8)
22983           .sr(1)
22984           .m(2)
22985           .n(n)
22986           .k(k)
22987           .cn_stride(7)
22988           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22989       }
22990     }
22991   }
22992 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)22993   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
22994     TEST_REQUIRES_X86_SSE41;
22995     for (uint32_t n = 5; n < 8; n++) {
22996       for (size_t k = 1; k <= 40; k += 9) {
22997         for (uint32_t m = 1; m <= 2; m++) {
22998           GemmMicrokernelTester()
22999             .mr(2)
23000             .nr(4)
23001             .kr(8)
23002             .sr(1)
23003             .m(m)
23004             .n(n)
23005             .k(k)
23006             .iterations(1)
23007             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23008         }
23009       }
23010     }
23011   }
23012 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)23013   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
23014     TEST_REQUIRES_X86_SSE41;
23015     for (uint32_t n = 8; n <= 12; n += 4) {
23016       for (size_t k = 1; k <= 40; k += 9) {
23017         GemmMicrokernelTester()
23018           .mr(2)
23019           .nr(4)
23020           .kr(8)
23021           .sr(1)
23022           .m(2)
23023           .n(n)
23024           .k(k)
23025           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23026       }
23027     }
23028   }
23029 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)23030   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
23031     TEST_REQUIRES_X86_SSE41;
23032     for (uint32_t n = 8; n <= 12; n += 4) {
23033       for (size_t k = 1; k <= 40; k += 9) {
23034         GemmMicrokernelTester()
23035           .mr(2)
23036           .nr(4)
23037           .kr(8)
23038           .sr(1)
23039           .m(2)
23040           .n(n)
23041           .k(k)
23042           .cn_stride(7)
23043           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23044       }
23045     }
23046   }
23047 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)23048   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
23049     TEST_REQUIRES_X86_SSE41;
23050     for (uint32_t n = 8; n <= 12; n += 4) {
23051       for (size_t k = 1; k <= 40; k += 9) {
23052         for (uint32_t m = 1; m <= 2; m++) {
23053           GemmMicrokernelTester()
23054             .mr(2)
23055             .nr(4)
23056             .kr(8)
23057             .sr(1)
23058             .m(m)
23059             .n(n)
23060             .k(k)
23061             .iterations(1)
23062             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23063         }
23064       }
23065     }
23066   }
23067 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel)23068   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
23069     TEST_REQUIRES_X86_SSE41;
23070     for (size_t k = 1; k <= 40; k += 9) {
23071       GemmMicrokernelTester()
23072         .mr(2)
23073         .nr(4)
23074         .kr(8)
23075         .sr(1)
23076         .m(2)
23077         .n(4)
23078         .k(k)
23079         .ks(3)
23080         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23081     }
23082   }
23083 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel_subtile)23084   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
23085     TEST_REQUIRES_X86_SSE41;
23086     for (size_t k = 1; k <= 40; k += 9) {
23087       for (uint32_t n = 1; n <= 4; n++) {
23088         for (uint32_t m = 1; m <= 2; m++) {
23089           GemmMicrokernelTester()
23090             .mr(2)
23091             .nr(4)
23092             .kr(8)
23093             .sr(1)
23094             .m(m)
23095             .n(n)
23096             .k(k)
23097             .ks(3)
23098             .iterations(1)
23099             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23100         }
23101       }
23102     }
23103   }
23104 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_small_kernel)23105   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
23106     TEST_REQUIRES_X86_SSE41;
23107     for (uint32_t n = 5; n < 8; n++) {
23108       for (size_t k = 1; k <= 40; k += 9) {
23109         GemmMicrokernelTester()
23110           .mr(2)
23111           .nr(4)
23112           .kr(8)
23113           .sr(1)
23114           .m(2)
23115           .n(n)
23116           .k(k)
23117           .ks(3)
23118           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23119       }
23120     }
23121   }
23122 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_small_kernel)23123   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
23124     TEST_REQUIRES_X86_SSE41;
23125     for (uint32_t n = 8; n <= 12; n += 4) {
23126       for (size_t k = 1; k <= 40; k += 9) {
23127         GemmMicrokernelTester()
23128           .mr(2)
23129           .nr(4)
23130           .kr(8)
23131           .sr(1)
23132           .m(2)
23133           .n(n)
23134           .k(k)
23135           .ks(3)
23136           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23137       }
23138     }
23139   }
23140 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)23141   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
23142     TEST_REQUIRES_X86_SSE41;
23143     for (size_t k = 1; k <= 40; k += 9) {
23144       for (uint32_t n = 1; n <= 4; n++) {
23145         for (uint32_t m = 1; m <= 2; m++) {
23146           GemmMicrokernelTester()
23147             .mr(2)
23148             .nr(4)
23149             .kr(8)
23150             .sr(1)
23151             .m(m)
23152             .n(n)
23153             .k(k)
23154             .cm_stride(7)
23155             .iterations(1)
23156             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23157         }
23158       }
23159     }
23160   }
23161 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,a_offset)23162   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
23163     TEST_REQUIRES_X86_SSE41;
23164     for (size_t k = 1; k <= 40; k += 9) {
23165       GemmMicrokernelTester()
23166         .mr(2)
23167         .nr(4)
23168         .kr(8)
23169         .sr(1)
23170         .m(2)
23171         .n(4)
23172         .k(k)
23173         .ks(3)
23174         .a_offset(83)
23175         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23176     }
23177   }
23178 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,zero)23179   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
23180     TEST_REQUIRES_X86_SSE41;
23181     for (size_t k = 1; k <= 40; k += 9) {
23182       for (uint32_t mz = 0; mz < 2; mz++) {
23183         GemmMicrokernelTester()
23184           .mr(2)
23185           .nr(4)
23186           .kr(8)
23187           .sr(1)
23188           .m(2)
23189           .n(4)
23190           .k(k)
23191           .ks(3)
23192           .a_offset(83)
23193           .zero_index(mz)
23194           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23195       }
23196     }
23197   }
23198 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)23199   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
23200     TEST_REQUIRES_X86_SSE41;
23201     GemmMicrokernelTester()
23202       .mr(2)
23203       .nr(4)
23204       .kr(8)
23205       .sr(1)
23206       .m(2)
23207       .n(4)
23208       .k(8)
23209       .qmin(128)
23210       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23211   }
23212 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)23213   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
23214     TEST_REQUIRES_X86_SSE41;
23215     GemmMicrokernelTester()
23216       .mr(2)
23217       .nr(4)
23218       .kr(8)
23219       .sr(1)
23220       .m(2)
23221       .n(4)
23222       .k(8)
23223       .qmax(128)
23224       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23225   }
23226 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)23227   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
23228     TEST_REQUIRES_X86_SSE41;
23229     GemmMicrokernelTester()
23230       .mr(2)
23231       .nr(4)
23232       .kr(8)
23233       .sr(1)
23234       .m(2)
23235       .n(4)
23236       .k(8)
23237       .cm_stride(7)
23238       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23239   }
23240 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_a_zero_point)23241   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_a_zero_point) {
23242     TEST_REQUIRES_X86_SSE41;
23243     for (size_t k = 1; k <= 40; k += 9) {
23244       GemmMicrokernelTester()
23245         .mr(2)
23246         .nr(4)
23247         .kr(8)
23248         .sr(1)
23249         .m(2)
23250         .n(4)
23251         .k(k)
23252         .a_zero_point(0)
23253         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23254     }
23255   }
23256 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_b_zero_point)23257   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_b_zero_point) {
23258     TEST_REQUIRES_X86_SSE41;
23259     for (size_t k = 1; k <= 40; k += 9) {
23260       GemmMicrokernelTester()
23261         .mr(2)
23262         .nr(4)
23263         .kr(8)
23264         .sr(1)
23265         .m(2)
23266         .n(4)
23267         .k(k)
23268         .b_zero_point(0)
23269         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23270     }
23271   }
23272 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,no_zero_point)23273   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_zero_point) {
23274     TEST_REQUIRES_X86_SSE41;
23275     for (size_t k = 1; k <= 40; k += 9) {
23276       GemmMicrokernelTester()
23277         .mr(2)
23278         .nr(4)
23279         .kr(8)
23280         .sr(1)
23281         .m(2)
23282         .n(4)
23283         .k(k)
23284         .a_zero_point(0)
23285         .b_zero_point(0)
23286         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23287     }
23288   }
23289 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23290 
23291 
23292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)23293   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
23294     TEST_REQUIRES_X86_AVX;
23295     GemmMicrokernelTester()
23296       .mr(2)
23297       .nr(4)
23298       .kr(8)
23299       .sr(1)
23300       .m(2)
23301       .n(4)
23302       .k(8)
23303       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23304   }
23305 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)23306   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
23307     TEST_REQUIRES_X86_AVX;
23308     GemmMicrokernelTester()
23309       .mr(2)
23310       .nr(4)
23311       .kr(8)
23312       .sr(1)
23313       .m(2)
23314       .n(4)
23315       .k(8)
23316       .cn_stride(7)
23317       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23318   }
23319 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)23320   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
23321     TEST_REQUIRES_X86_AVX;
23322     for (uint32_t n = 1; n <= 4; n++) {
23323       for (uint32_t m = 1; m <= 2; m++) {
23324         GemmMicrokernelTester()
23325           .mr(2)
23326           .nr(4)
23327           .kr(8)
23328           .sr(1)
23329           .m(m)
23330           .n(n)
23331           .k(8)
23332           .iterations(1)
23333           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23334       }
23335     }
23336   }
23337 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)23338   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
23339     TEST_REQUIRES_X86_AVX;
23340     for (uint32_t m = 1; m <= 2; m++) {
23341       GemmMicrokernelTester()
23342         .mr(2)
23343         .nr(4)
23344         .kr(8)
23345         .sr(1)
23346         .m(m)
23347         .n(4)
23348         .k(8)
23349         .iterations(1)
23350         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23351     }
23352   }
23353 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)23354   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
23355     TEST_REQUIRES_X86_AVX;
23356     for (uint32_t n = 1; n <= 4; n++) {
23357       GemmMicrokernelTester()
23358         .mr(2)
23359         .nr(4)
23360         .kr(8)
23361         .sr(1)
23362         .m(2)
23363         .n(n)
23364         .k(8)
23365         .iterations(1)
23366         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23367     }
23368   }
23369 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)23370   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
23371     TEST_REQUIRES_X86_AVX;
23372     for (size_t k = 1; k < 8; k++) {
23373       GemmMicrokernelTester()
23374         .mr(2)
23375         .nr(4)
23376         .kr(8)
23377         .sr(1)
23378         .m(2)
23379         .n(4)
23380         .k(k)
23381         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23382     }
23383   }
23384 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)23385   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
23386     TEST_REQUIRES_X86_AVX;
23387     for (size_t k = 1; k < 8; k++) {
23388       for (uint32_t n = 1; n <= 4; n++) {
23389         for (uint32_t m = 1; m <= 2; m++) {
23390           GemmMicrokernelTester()
23391             .mr(2)
23392             .nr(4)
23393             .kr(8)
23394             .sr(1)
23395             .m(m)
23396             .n(n)
23397             .k(k)
23398             .iterations(1)
23399             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23400         }
23401       }
23402     }
23403   }
23404 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)23405   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
23406     TEST_REQUIRES_X86_AVX;
23407     for (size_t k = 9; k < 16; k++) {
23408       GemmMicrokernelTester()
23409         .mr(2)
23410         .nr(4)
23411         .kr(8)
23412         .sr(1)
23413         .m(2)
23414         .n(4)
23415         .k(k)
23416         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23417     }
23418   }
23419 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)23420   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
23421     TEST_REQUIRES_X86_AVX;
23422     for (size_t k = 9; k < 16; k++) {
23423       for (uint32_t n = 1; n <= 4; n++) {
23424         for (uint32_t m = 1; m <= 2; m++) {
23425           GemmMicrokernelTester()
23426             .mr(2)
23427             .nr(4)
23428             .kr(8)
23429             .sr(1)
23430             .m(m)
23431             .n(n)
23432             .k(k)
23433             .iterations(1)
23434             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23435         }
23436       }
23437     }
23438   }
23439 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)23440   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
23441     TEST_REQUIRES_X86_AVX;
23442     for (size_t k = 16; k <= 80; k += 8) {
23443       GemmMicrokernelTester()
23444         .mr(2)
23445         .nr(4)
23446         .kr(8)
23447         .sr(1)
23448         .m(2)
23449         .n(4)
23450         .k(k)
23451         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23452     }
23453   }
23454 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)23455   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
23456     TEST_REQUIRES_X86_AVX;
23457     for (size_t k = 16; k <= 80; k += 8) {
23458       for (uint32_t n = 1; n <= 4; n++) {
23459         for (uint32_t m = 1; m <= 2; m++) {
23460           GemmMicrokernelTester()
23461             .mr(2)
23462             .nr(4)
23463             .kr(8)
23464             .sr(1)
23465             .m(m)
23466             .n(n)
23467             .k(k)
23468             .iterations(1)
23469             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23470         }
23471       }
23472     }
23473   }
23474 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)23475   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
23476     TEST_REQUIRES_X86_AVX;
23477     for (uint32_t n = 5; n < 8; n++) {
23478       for (size_t k = 1; k <= 40; k += 9) {
23479         GemmMicrokernelTester()
23480           .mr(2)
23481           .nr(4)
23482           .kr(8)
23483           .sr(1)
23484           .m(2)
23485           .n(n)
23486           .k(k)
23487           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23488       }
23489     }
23490   }
23491 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)23492   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
23493     TEST_REQUIRES_X86_AVX;
23494     for (uint32_t n = 5; n < 8; n++) {
23495       for (size_t k = 1; k <= 40; k += 9) {
23496         GemmMicrokernelTester()
23497           .mr(2)
23498           .nr(4)
23499           .kr(8)
23500           .sr(1)
23501           .m(2)
23502           .n(n)
23503           .k(k)
23504           .cn_stride(7)
23505           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23506       }
23507     }
23508   }
23509 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)23510   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
23511     TEST_REQUIRES_X86_AVX;
23512     for (uint32_t n = 5; n < 8; n++) {
23513       for (size_t k = 1; k <= 40; k += 9) {
23514         for (uint32_t m = 1; m <= 2; m++) {
23515           GemmMicrokernelTester()
23516             .mr(2)
23517             .nr(4)
23518             .kr(8)
23519             .sr(1)
23520             .m(m)
23521             .n(n)
23522             .k(k)
23523             .iterations(1)
23524             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23525         }
23526       }
23527     }
23528   }
23529 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)23530   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
23531     TEST_REQUIRES_X86_AVX;
23532     for (uint32_t n = 8; n <= 12; n += 4) {
23533       for (size_t k = 1; k <= 40; k += 9) {
23534         GemmMicrokernelTester()
23535           .mr(2)
23536           .nr(4)
23537           .kr(8)
23538           .sr(1)
23539           .m(2)
23540           .n(n)
23541           .k(k)
23542           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23543       }
23544     }
23545   }
23546 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)23547   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
23548     TEST_REQUIRES_X86_AVX;
23549     for (uint32_t n = 8; n <= 12; n += 4) {
23550       for (size_t k = 1; k <= 40; k += 9) {
23551         GemmMicrokernelTester()
23552           .mr(2)
23553           .nr(4)
23554           .kr(8)
23555           .sr(1)
23556           .m(2)
23557           .n(n)
23558           .k(k)
23559           .cn_stride(7)
23560           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23561       }
23562     }
23563   }
23564 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)23565   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
23566     TEST_REQUIRES_X86_AVX;
23567     for (uint32_t n = 8; n <= 12; n += 4) {
23568       for (size_t k = 1; k <= 40; k += 9) {
23569         for (uint32_t m = 1; m <= 2; m++) {
23570           GemmMicrokernelTester()
23571             .mr(2)
23572             .nr(4)
23573             .kr(8)
23574             .sr(1)
23575             .m(m)
23576             .n(n)
23577             .k(k)
23578             .iterations(1)
23579             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23580         }
23581       }
23582     }
23583   }
23584 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel)23585   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
23586     TEST_REQUIRES_X86_AVX;
23587     for (size_t k = 1; k <= 40; k += 9) {
23588       GemmMicrokernelTester()
23589         .mr(2)
23590         .nr(4)
23591         .kr(8)
23592         .sr(1)
23593         .m(2)
23594         .n(4)
23595         .k(k)
23596         .ks(3)
23597         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23598     }
23599   }
23600 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel_subtile)23601   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
23602     TEST_REQUIRES_X86_AVX;
23603     for (size_t k = 1; k <= 40; k += 9) {
23604       for (uint32_t n = 1; n <= 4; n++) {
23605         for (uint32_t m = 1; m <= 2; m++) {
23606           GemmMicrokernelTester()
23607             .mr(2)
23608             .nr(4)
23609             .kr(8)
23610             .sr(1)
23611             .m(m)
23612             .n(n)
23613             .k(k)
23614             .ks(3)
23615             .iterations(1)
23616             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23617         }
23618       }
23619     }
23620   }
23621 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_small_kernel)23622   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
23623     TEST_REQUIRES_X86_AVX;
23624     for (uint32_t n = 5; n < 8; n++) {
23625       for (size_t k = 1; k <= 40; k += 9) {
23626         GemmMicrokernelTester()
23627           .mr(2)
23628           .nr(4)
23629           .kr(8)
23630           .sr(1)
23631           .m(2)
23632           .n(n)
23633           .k(k)
23634           .ks(3)
23635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23636       }
23637     }
23638   }
23639 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_small_kernel)23640   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
23641     TEST_REQUIRES_X86_AVX;
23642     for (uint32_t n = 8; n <= 12; n += 4) {
23643       for (size_t k = 1; k <= 40; k += 9) {
23644         GemmMicrokernelTester()
23645           .mr(2)
23646           .nr(4)
23647           .kr(8)
23648           .sr(1)
23649           .m(2)
23650           .n(n)
23651           .k(k)
23652           .ks(3)
23653           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23654       }
23655     }
23656   }
23657 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)23658   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
23659     TEST_REQUIRES_X86_AVX;
23660     for (size_t k = 1; k <= 40; k += 9) {
23661       for (uint32_t n = 1; n <= 4; n++) {
23662         for (uint32_t m = 1; m <= 2; m++) {
23663           GemmMicrokernelTester()
23664             .mr(2)
23665             .nr(4)
23666             .kr(8)
23667             .sr(1)
23668             .m(m)
23669             .n(n)
23670             .k(k)
23671             .cm_stride(7)
23672             .iterations(1)
23673             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23674         }
23675       }
23676     }
23677   }
23678 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,a_offset)23679   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
23680     TEST_REQUIRES_X86_AVX;
23681     for (size_t k = 1; k <= 40; k += 9) {
23682       GemmMicrokernelTester()
23683         .mr(2)
23684         .nr(4)
23685         .kr(8)
23686         .sr(1)
23687         .m(2)
23688         .n(4)
23689         .k(k)
23690         .ks(3)
23691         .a_offset(83)
23692         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23693     }
23694   }
23695 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,zero)23696   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
23697     TEST_REQUIRES_X86_AVX;
23698     for (size_t k = 1; k <= 40; k += 9) {
23699       for (uint32_t mz = 0; mz < 2; mz++) {
23700         GemmMicrokernelTester()
23701           .mr(2)
23702           .nr(4)
23703           .kr(8)
23704           .sr(1)
23705           .m(2)
23706           .n(4)
23707           .k(k)
23708           .ks(3)
23709           .a_offset(83)
23710           .zero_index(mz)
23711           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23712       }
23713     }
23714   }
23715 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)23716   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
23717     TEST_REQUIRES_X86_AVX;
23718     GemmMicrokernelTester()
23719       .mr(2)
23720       .nr(4)
23721       .kr(8)
23722       .sr(1)
23723       .m(2)
23724       .n(4)
23725       .k(8)
23726       .qmin(128)
23727       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23728   }
23729 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)23730   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
23731     TEST_REQUIRES_X86_AVX;
23732     GemmMicrokernelTester()
23733       .mr(2)
23734       .nr(4)
23735       .kr(8)
23736       .sr(1)
23737       .m(2)
23738       .n(4)
23739       .k(8)
23740       .qmax(128)
23741       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23742   }
23743 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)23744   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
23745     TEST_REQUIRES_X86_AVX;
23746     GemmMicrokernelTester()
23747       .mr(2)
23748       .nr(4)
23749       .kr(8)
23750       .sr(1)
23751       .m(2)
23752       .n(4)
23753       .k(8)
23754       .cm_stride(7)
23755       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23756   }
23757 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_a_zero_point)23758   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
23759     TEST_REQUIRES_X86_AVX;
23760     for (size_t k = 1; k <= 40; k += 9) {
23761       GemmMicrokernelTester()
23762         .mr(2)
23763         .nr(4)
23764         .kr(8)
23765         .sr(1)
23766         .m(2)
23767         .n(4)
23768         .k(k)
23769         .a_zero_point(0)
23770         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23771     }
23772   }
23773 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_b_zero_point)23774   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
23775     TEST_REQUIRES_X86_AVX;
23776     for (size_t k = 1; k <= 40; k += 9) {
23777       GemmMicrokernelTester()
23778         .mr(2)
23779         .nr(4)
23780         .kr(8)
23781         .sr(1)
23782         .m(2)
23783         .n(4)
23784         .k(k)
23785         .b_zero_point(0)
23786         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23787     }
23788   }
23789 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,no_zero_point)23790   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
23791     TEST_REQUIRES_X86_AVX;
23792     for (size_t k = 1; k <= 40; k += 9) {
23793       GemmMicrokernelTester()
23794         .mr(2)
23795         .nr(4)
23796         .kr(8)
23797         .sr(1)
23798         .m(2)
23799         .n(4)
23800         .k(k)
23801         .a_zero_point(0)
23802         .b_zero_point(0)
23803         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23804     }
23805   }
23806 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23807 
23808 
23809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)23810   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
23811     TEST_REQUIRES_X86_XOP;
23812     GemmMicrokernelTester()
23813       .mr(2)
23814       .nr(4)
23815       .kr(8)
23816       .sr(1)
23817       .m(2)
23818       .n(4)
23819       .k(8)
23820       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23821   }
23822 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)23823   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
23824     TEST_REQUIRES_X86_XOP;
23825     GemmMicrokernelTester()
23826       .mr(2)
23827       .nr(4)
23828       .kr(8)
23829       .sr(1)
23830       .m(2)
23831       .n(4)
23832       .k(8)
23833       .cn_stride(7)
23834       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23835   }
23836 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)23837   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
23838     TEST_REQUIRES_X86_XOP;
23839     for (uint32_t n = 1; n <= 4; n++) {
23840       for (uint32_t m = 1; m <= 2; m++) {
23841         GemmMicrokernelTester()
23842           .mr(2)
23843           .nr(4)
23844           .kr(8)
23845           .sr(1)
23846           .m(m)
23847           .n(n)
23848           .k(8)
23849           .iterations(1)
23850           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23851       }
23852     }
23853   }
23854 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)23855   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
23856     TEST_REQUIRES_X86_XOP;
23857     for (uint32_t m = 1; m <= 2; m++) {
23858       GemmMicrokernelTester()
23859         .mr(2)
23860         .nr(4)
23861         .kr(8)
23862         .sr(1)
23863         .m(m)
23864         .n(4)
23865         .k(8)
23866         .iterations(1)
23867         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23868     }
23869   }
23870 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)23871   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
23872     TEST_REQUIRES_X86_XOP;
23873     for (uint32_t n = 1; n <= 4; n++) {
23874       GemmMicrokernelTester()
23875         .mr(2)
23876         .nr(4)
23877         .kr(8)
23878         .sr(1)
23879         .m(2)
23880         .n(n)
23881         .k(8)
23882         .iterations(1)
23883         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23884     }
23885   }
23886 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)23887   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
23888     TEST_REQUIRES_X86_XOP;
23889     for (size_t k = 1; k < 8; k++) {
23890       GemmMicrokernelTester()
23891         .mr(2)
23892         .nr(4)
23893         .kr(8)
23894         .sr(1)
23895         .m(2)
23896         .n(4)
23897         .k(k)
23898         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23899     }
23900   }
23901 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)23902   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
23903     TEST_REQUIRES_X86_XOP;
23904     for (size_t k = 1; k < 8; k++) {
23905       for (uint32_t n = 1; n <= 4; n++) {
23906         for (uint32_t m = 1; m <= 2; m++) {
23907           GemmMicrokernelTester()
23908             .mr(2)
23909             .nr(4)
23910             .kr(8)
23911             .sr(1)
23912             .m(m)
23913             .n(n)
23914             .k(k)
23915             .iterations(1)
23916             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23917         }
23918       }
23919     }
23920   }
23921 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)23922   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
23923     TEST_REQUIRES_X86_XOP;
23924     for (size_t k = 9; k < 16; k++) {
23925       GemmMicrokernelTester()
23926         .mr(2)
23927         .nr(4)
23928         .kr(8)
23929         .sr(1)
23930         .m(2)
23931         .n(4)
23932         .k(k)
23933         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23934     }
23935   }
23936 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)23937   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
23938     TEST_REQUIRES_X86_XOP;
23939     for (size_t k = 9; k < 16; k++) {
23940       for (uint32_t n = 1; n <= 4; n++) {
23941         for (uint32_t m = 1; m <= 2; m++) {
23942           GemmMicrokernelTester()
23943             .mr(2)
23944             .nr(4)
23945             .kr(8)
23946             .sr(1)
23947             .m(m)
23948             .n(n)
23949             .k(k)
23950             .iterations(1)
23951             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23952         }
23953       }
23954     }
23955   }
23956 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)23957   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
23958     TEST_REQUIRES_X86_XOP;
23959     for (size_t k = 16; k <= 80; k += 8) {
23960       GemmMicrokernelTester()
23961         .mr(2)
23962         .nr(4)
23963         .kr(8)
23964         .sr(1)
23965         .m(2)
23966         .n(4)
23967         .k(k)
23968         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23969     }
23970   }
23971 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)23972   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
23973     TEST_REQUIRES_X86_XOP;
23974     for (size_t k = 16; k <= 80; k += 8) {
23975       for (uint32_t n = 1; n <= 4; n++) {
23976         for (uint32_t m = 1; m <= 2; m++) {
23977           GemmMicrokernelTester()
23978             .mr(2)
23979             .nr(4)
23980             .kr(8)
23981             .sr(1)
23982             .m(m)
23983             .n(n)
23984             .k(k)
23985             .iterations(1)
23986             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23987         }
23988       }
23989     }
23990   }
23991 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)23992   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
23993     TEST_REQUIRES_X86_XOP;
23994     for (uint32_t n = 5; n < 8; n++) {
23995       for (size_t k = 1; k <= 40; k += 9) {
23996         GemmMicrokernelTester()
23997           .mr(2)
23998           .nr(4)
23999           .kr(8)
24000           .sr(1)
24001           .m(2)
24002           .n(n)
24003           .k(k)
24004           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24005       }
24006     }
24007   }
24008 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)24009   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
24010     TEST_REQUIRES_X86_XOP;
24011     for (uint32_t n = 5; n < 8; n++) {
24012       for (size_t k = 1; k <= 40; k += 9) {
24013         GemmMicrokernelTester()
24014           .mr(2)
24015           .nr(4)
24016           .kr(8)
24017           .sr(1)
24018           .m(2)
24019           .n(n)
24020           .k(k)
24021           .cn_stride(7)
24022           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24023       }
24024     }
24025   }
24026 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)24027   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
24028     TEST_REQUIRES_X86_XOP;
24029     for (uint32_t n = 5; n < 8; n++) {
24030       for (size_t k = 1; k <= 40; k += 9) {
24031         for (uint32_t m = 1; m <= 2; m++) {
24032           GemmMicrokernelTester()
24033             .mr(2)
24034             .nr(4)
24035             .kr(8)
24036             .sr(1)
24037             .m(m)
24038             .n(n)
24039             .k(k)
24040             .iterations(1)
24041             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24042         }
24043       }
24044     }
24045   }
24046 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)24047   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
24048     TEST_REQUIRES_X86_XOP;
24049     for (uint32_t n = 8; n <= 12; n += 4) {
24050       for (size_t k = 1; k <= 40; k += 9) {
24051         GemmMicrokernelTester()
24052           .mr(2)
24053           .nr(4)
24054           .kr(8)
24055           .sr(1)
24056           .m(2)
24057           .n(n)
24058           .k(k)
24059           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24060       }
24061     }
24062   }
24063 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)24064   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
24065     TEST_REQUIRES_X86_XOP;
24066     for (uint32_t n = 8; n <= 12; n += 4) {
24067       for (size_t k = 1; k <= 40; k += 9) {
24068         GemmMicrokernelTester()
24069           .mr(2)
24070           .nr(4)
24071           .kr(8)
24072           .sr(1)
24073           .m(2)
24074           .n(n)
24075           .k(k)
24076           .cn_stride(7)
24077           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24078       }
24079     }
24080   }
24081 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)24082   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
24083     TEST_REQUIRES_X86_XOP;
24084     for (uint32_t n = 8; n <= 12; n += 4) {
24085       for (size_t k = 1; k <= 40; k += 9) {
24086         for (uint32_t m = 1; m <= 2; m++) {
24087           GemmMicrokernelTester()
24088             .mr(2)
24089             .nr(4)
24090             .kr(8)
24091             .sr(1)
24092             .m(m)
24093             .n(n)
24094             .k(k)
24095             .iterations(1)
24096             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24097         }
24098       }
24099     }
24100   }
24101 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel)24102   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
24103     TEST_REQUIRES_X86_XOP;
24104     for (size_t k = 1; k <= 40; k += 9) {
24105       GemmMicrokernelTester()
24106         .mr(2)
24107         .nr(4)
24108         .kr(8)
24109         .sr(1)
24110         .m(2)
24111         .n(4)
24112         .k(k)
24113         .ks(3)
24114         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24115     }
24116   }
24117 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel_subtile)24118   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
24119     TEST_REQUIRES_X86_XOP;
24120     for (size_t k = 1; k <= 40; k += 9) {
24121       for (uint32_t n = 1; n <= 4; n++) {
24122         for (uint32_t m = 1; m <= 2; m++) {
24123           GemmMicrokernelTester()
24124             .mr(2)
24125             .nr(4)
24126             .kr(8)
24127             .sr(1)
24128             .m(m)
24129             .n(n)
24130             .k(k)
24131             .ks(3)
24132             .iterations(1)
24133             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24134         }
24135       }
24136     }
24137   }
24138 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_small_kernel)24139   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
24140     TEST_REQUIRES_X86_XOP;
24141     for (uint32_t n = 5; n < 8; n++) {
24142       for (size_t k = 1; k <= 40; k += 9) {
24143         GemmMicrokernelTester()
24144           .mr(2)
24145           .nr(4)
24146           .kr(8)
24147           .sr(1)
24148           .m(2)
24149           .n(n)
24150           .k(k)
24151           .ks(3)
24152           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24153       }
24154     }
24155   }
24156 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_small_kernel)24157   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
24158     TEST_REQUIRES_X86_XOP;
24159     for (uint32_t n = 8; n <= 12; n += 4) {
24160       for (size_t k = 1; k <= 40; k += 9) {
24161         GemmMicrokernelTester()
24162           .mr(2)
24163           .nr(4)
24164           .kr(8)
24165           .sr(1)
24166           .m(2)
24167           .n(n)
24168           .k(k)
24169           .ks(3)
24170           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24171       }
24172     }
24173   }
24174 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)24175   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
24176     TEST_REQUIRES_X86_XOP;
24177     for (size_t k = 1; k <= 40; k += 9) {
24178       for (uint32_t n = 1; n <= 4; n++) {
24179         for (uint32_t m = 1; m <= 2; m++) {
24180           GemmMicrokernelTester()
24181             .mr(2)
24182             .nr(4)
24183             .kr(8)
24184             .sr(1)
24185             .m(m)
24186             .n(n)
24187             .k(k)
24188             .cm_stride(7)
24189             .iterations(1)
24190             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24191         }
24192       }
24193     }
24194   }
24195 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,a_offset)24196   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
24197     TEST_REQUIRES_X86_XOP;
24198     for (size_t k = 1; k <= 40; k += 9) {
24199       GemmMicrokernelTester()
24200         .mr(2)
24201         .nr(4)
24202         .kr(8)
24203         .sr(1)
24204         .m(2)
24205         .n(4)
24206         .k(k)
24207         .ks(3)
24208         .a_offset(83)
24209         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24210     }
24211   }
24212 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,zero)24213   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
24214     TEST_REQUIRES_X86_XOP;
24215     for (size_t k = 1; k <= 40; k += 9) {
24216       for (uint32_t mz = 0; mz < 2; mz++) {
24217         GemmMicrokernelTester()
24218           .mr(2)
24219           .nr(4)
24220           .kr(8)
24221           .sr(1)
24222           .m(2)
24223           .n(4)
24224           .k(k)
24225           .ks(3)
24226           .a_offset(83)
24227           .zero_index(mz)
24228           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24229       }
24230     }
24231   }
24232 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)24233   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
24234     TEST_REQUIRES_X86_XOP;
24235     GemmMicrokernelTester()
24236       .mr(2)
24237       .nr(4)
24238       .kr(8)
24239       .sr(1)
24240       .m(2)
24241       .n(4)
24242       .k(8)
24243       .qmin(128)
24244       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24245   }
24246 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)24247   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
24248     TEST_REQUIRES_X86_XOP;
24249     GemmMicrokernelTester()
24250       .mr(2)
24251       .nr(4)
24252       .kr(8)
24253       .sr(1)
24254       .m(2)
24255       .n(4)
24256       .k(8)
24257       .qmax(128)
24258       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24259   }
24260 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)24261   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
24262     TEST_REQUIRES_X86_XOP;
24263     GemmMicrokernelTester()
24264       .mr(2)
24265       .nr(4)
24266       .kr(8)
24267       .sr(1)
24268       .m(2)
24269       .n(4)
24270       .k(8)
24271       .cm_stride(7)
24272       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24273   }
24274 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_a_zero_point)24275   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
24276     TEST_REQUIRES_X86_XOP;
24277     for (size_t k = 1; k <= 40; k += 9) {
24278       GemmMicrokernelTester()
24279         .mr(2)
24280         .nr(4)
24281         .kr(8)
24282         .sr(1)
24283         .m(2)
24284         .n(4)
24285         .k(k)
24286         .a_zero_point(0)
24287         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24288     }
24289   }
24290 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_b_zero_point)24291   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
24292     TEST_REQUIRES_X86_XOP;
24293     for (size_t k = 1; k <= 40; k += 9) {
24294       GemmMicrokernelTester()
24295         .mr(2)
24296         .nr(4)
24297         .kr(8)
24298         .sr(1)
24299         .m(2)
24300         .n(4)
24301         .k(k)
24302         .b_zero_point(0)
24303         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24304     }
24305   }
24306 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,no_zero_point)24307   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
24308     TEST_REQUIRES_X86_XOP;
24309     for (size_t k = 1; k <= 40; k += 9) {
24310       GemmMicrokernelTester()
24311         .mr(2)
24312         .nr(4)
24313         .kr(8)
24314         .sr(1)
24315         .m(2)
24316         .n(4)
24317         .k(k)
24318         .a_zero_point(0)
24319         .b_zero_point(0)
24320         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24321     }
24322   }
24323 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24324 
24325 
24326 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)24327   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
24328     TEST_REQUIRES_X86_AVX;
24329     GemmMicrokernelTester()
24330       .mr(3)
24331       .nr(4)
24332       .kr(8)
24333       .sr(1)
24334       .m(3)
24335       .n(4)
24336       .k(8)
24337       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24338   }
24339 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)24340   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
24341     TEST_REQUIRES_X86_AVX;
24342     GemmMicrokernelTester()
24343       .mr(3)
24344       .nr(4)
24345       .kr(8)
24346       .sr(1)
24347       .m(3)
24348       .n(4)
24349       .k(8)
24350       .cn_stride(7)
24351       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24352   }
24353 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)24354   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
24355     TEST_REQUIRES_X86_AVX;
24356     for (uint32_t n = 1; n <= 4; n++) {
24357       for (uint32_t m = 1; m <= 3; m++) {
24358         GemmMicrokernelTester()
24359           .mr(3)
24360           .nr(4)
24361           .kr(8)
24362           .sr(1)
24363           .m(m)
24364           .n(n)
24365           .k(8)
24366           .iterations(1)
24367           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24368       }
24369     }
24370   }
24371 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)24372   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
24373     TEST_REQUIRES_X86_AVX;
24374     for (uint32_t m = 1; m <= 3; m++) {
24375       GemmMicrokernelTester()
24376         .mr(3)
24377         .nr(4)
24378         .kr(8)
24379         .sr(1)
24380         .m(m)
24381         .n(4)
24382         .k(8)
24383         .iterations(1)
24384         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24385     }
24386   }
24387 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)24388   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
24389     TEST_REQUIRES_X86_AVX;
24390     for (uint32_t n = 1; n <= 4; n++) {
24391       GemmMicrokernelTester()
24392         .mr(3)
24393         .nr(4)
24394         .kr(8)
24395         .sr(1)
24396         .m(3)
24397         .n(n)
24398         .k(8)
24399         .iterations(1)
24400         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24401     }
24402   }
24403 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)24404   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
24405     TEST_REQUIRES_X86_AVX;
24406     for (size_t k = 1; k < 8; k++) {
24407       GemmMicrokernelTester()
24408         .mr(3)
24409         .nr(4)
24410         .kr(8)
24411         .sr(1)
24412         .m(3)
24413         .n(4)
24414         .k(k)
24415         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24416     }
24417   }
24418 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)24419   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
24420     TEST_REQUIRES_X86_AVX;
24421     for (size_t k = 1; k < 8; k++) {
24422       for (uint32_t n = 1; n <= 4; n++) {
24423         for (uint32_t m = 1; m <= 3; m++) {
24424           GemmMicrokernelTester()
24425             .mr(3)
24426             .nr(4)
24427             .kr(8)
24428             .sr(1)
24429             .m(m)
24430             .n(n)
24431             .k(k)
24432             .iterations(1)
24433             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24434         }
24435       }
24436     }
24437   }
24438 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)24439   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
24440     TEST_REQUIRES_X86_AVX;
24441     for (size_t k = 9; k < 16; k++) {
24442       GemmMicrokernelTester()
24443         .mr(3)
24444         .nr(4)
24445         .kr(8)
24446         .sr(1)
24447         .m(3)
24448         .n(4)
24449         .k(k)
24450         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24451     }
24452   }
24453 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)24454   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
24455     TEST_REQUIRES_X86_AVX;
24456     for (size_t k = 9; k < 16; k++) {
24457       for (uint32_t n = 1; n <= 4; n++) {
24458         for (uint32_t m = 1; m <= 3; m++) {
24459           GemmMicrokernelTester()
24460             .mr(3)
24461             .nr(4)
24462             .kr(8)
24463             .sr(1)
24464             .m(m)
24465             .n(n)
24466             .k(k)
24467             .iterations(1)
24468             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24469         }
24470       }
24471     }
24472   }
24473 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)24474   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
24475     TEST_REQUIRES_X86_AVX;
24476     for (size_t k = 16; k <= 80; k += 8) {
24477       GemmMicrokernelTester()
24478         .mr(3)
24479         .nr(4)
24480         .kr(8)
24481         .sr(1)
24482         .m(3)
24483         .n(4)
24484         .k(k)
24485         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24486     }
24487   }
24488 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)24489   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
24490     TEST_REQUIRES_X86_AVX;
24491     for (size_t k = 16; k <= 80; k += 8) {
24492       for (uint32_t n = 1; n <= 4; n++) {
24493         for (uint32_t m = 1; m <= 3; m++) {
24494           GemmMicrokernelTester()
24495             .mr(3)
24496             .nr(4)
24497             .kr(8)
24498             .sr(1)
24499             .m(m)
24500             .n(n)
24501             .k(k)
24502             .iterations(1)
24503             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24504         }
24505       }
24506     }
24507   }
24508 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)24509   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
24510     TEST_REQUIRES_X86_AVX;
24511     for (uint32_t n = 5; n < 8; n++) {
24512       for (size_t k = 1; k <= 40; k += 9) {
24513         GemmMicrokernelTester()
24514           .mr(3)
24515           .nr(4)
24516           .kr(8)
24517           .sr(1)
24518           .m(3)
24519           .n(n)
24520           .k(k)
24521           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24522       }
24523     }
24524   }
24525 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)24526   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
24527     TEST_REQUIRES_X86_AVX;
24528     for (uint32_t n = 5; n < 8; n++) {
24529       for (size_t k = 1; k <= 40; k += 9) {
24530         GemmMicrokernelTester()
24531           .mr(3)
24532           .nr(4)
24533           .kr(8)
24534           .sr(1)
24535           .m(3)
24536           .n(n)
24537           .k(k)
24538           .cn_stride(7)
24539           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24540       }
24541     }
24542   }
24543 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)24544   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
24545     TEST_REQUIRES_X86_AVX;
24546     for (uint32_t n = 5; n < 8; n++) {
24547       for (size_t k = 1; k <= 40; k += 9) {
24548         for (uint32_t m = 1; m <= 3; m++) {
24549           GemmMicrokernelTester()
24550             .mr(3)
24551             .nr(4)
24552             .kr(8)
24553             .sr(1)
24554             .m(m)
24555             .n(n)
24556             .k(k)
24557             .iterations(1)
24558             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24559         }
24560       }
24561     }
24562   }
24563 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)24564   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
24565     TEST_REQUIRES_X86_AVX;
24566     for (uint32_t n = 8; n <= 12; n += 4) {
24567       for (size_t k = 1; k <= 40; k += 9) {
24568         GemmMicrokernelTester()
24569           .mr(3)
24570           .nr(4)
24571           .kr(8)
24572           .sr(1)
24573           .m(3)
24574           .n(n)
24575           .k(k)
24576           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24577       }
24578     }
24579   }
24580 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)24581   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
24582     TEST_REQUIRES_X86_AVX;
24583     for (uint32_t n = 8; n <= 12; n += 4) {
24584       for (size_t k = 1; k <= 40; k += 9) {
24585         GemmMicrokernelTester()
24586           .mr(3)
24587           .nr(4)
24588           .kr(8)
24589           .sr(1)
24590           .m(3)
24591           .n(n)
24592           .k(k)
24593           .cn_stride(7)
24594           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24595       }
24596     }
24597   }
24598 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)24599   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
24600     TEST_REQUIRES_X86_AVX;
24601     for (uint32_t n = 8; n <= 12; n += 4) {
24602       for (size_t k = 1; k <= 40; k += 9) {
24603         for (uint32_t m = 1; m <= 3; m++) {
24604           GemmMicrokernelTester()
24605             .mr(3)
24606             .nr(4)
24607             .kr(8)
24608             .sr(1)
24609             .m(m)
24610             .n(n)
24611             .k(k)
24612             .iterations(1)
24613             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24614         }
24615       }
24616     }
24617   }
24618 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel)24619   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
24620     TEST_REQUIRES_X86_AVX;
24621     for (size_t k = 1; k <= 40; k += 9) {
24622       GemmMicrokernelTester()
24623         .mr(3)
24624         .nr(4)
24625         .kr(8)
24626         .sr(1)
24627         .m(3)
24628         .n(4)
24629         .k(k)
24630         .ks(3)
24631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24632     }
24633   }
24634 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel_subtile)24635   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
24636     TEST_REQUIRES_X86_AVX;
24637     for (size_t k = 1; k <= 40; k += 9) {
24638       for (uint32_t n = 1; n <= 4; n++) {
24639         for (uint32_t m = 1; m <= 3; m++) {
24640           GemmMicrokernelTester()
24641             .mr(3)
24642             .nr(4)
24643             .kr(8)
24644             .sr(1)
24645             .m(m)
24646             .n(n)
24647             .k(k)
24648             .ks(3)
24649             .iterations(1)
24650             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24651         }
24652       }
24653     }
24654   }
24655 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_small_kernel)24656   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
24657     TEST_REQUIRES_X86_AVX;
24658     for (uint32_t n = 5; n < 8; n++) {
24659       for (size_t k = 1; k <= 40; k += 9) {
24660         GemmMicrokernelTester()
24661           .mr(3)
24662           .nr(4)
24663           .kr(8)
24664           .sr(1)
24665           .m(3)
24666           .n(n)
24667           .k(k)
24668           .ks(3)
24669           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24670       }
24671     }
24672   }
24673 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_small_kernel)24674   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
24675     TEST_REQUIRES_X86_AVX;
24676     for (uint32_t n = 8; n <= 12; n += 4) {
24677       for (size_t k = 1; k <= 40; k += 9) {
24678         GemmMicrokernelTester()
24679           .mr(3)
24680           .nr(4)
24681           .kr(8)
24682           .sr(1)
24683           .m(3)
24684           .n(n)
24685           .k(k)
24686           .ks(3)
24687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24688       }
24689     }
24690   }
24691 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)24692   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
24693     TEST_REQUIRES_X86_AVX;
24694     for (size_t k = 1; k <= 40; k += 9) {
24695       for (uint32_t n = 1; n <= 4; n++) {
24696         for (uint32_t m = 1; m <= 3; m++) {
24697           GemmMicrokernelTester()
24698             .mr(3)
24699             .nr(4)
24700             .kr(8)
24701             .sr(1)
24702             .m(m)
24703             .n(n)
24704             .k(k)
24705             .cm_stride(7)
24706             .iterations(1)
24707             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24708         }
24709       }
24710     }
24711   }
24712 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,a_offset)24713   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
24714     TEST_REQUIRES_X86_AVX;
24715     for (size_t k = 1; k <= 40; k += 9) {
24716       GemmMicrokernelTester()
24717         .mr(3)
24718         .nr(4)
24719         .kr(8)
24720         .sr(1)
24721         .m(3)
24722         .n(4)
24723         .k(k)
24724         .ks(3)
24725         .a_offset(127)
24726         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24727     }
24728   }
24729 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,zero)24730   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
24731     TEST_REQUIRES_X86_AVX;
24732     for (size_t k = 1; k <= 40; k += 9) {
24733       for (uint32_t mz = 0; mz < 3; mz++) {
24734         GemmMicrokernelTester()
24735           .mr(3)
24736           .nr(4)
24737           .kr(8)
24738           .sr(1)
24739           .m(3)
24740           .n(4)
24741           .k(k)
24742           .ks(3)
24743           .a_offset(127)
24744           .zero_index(mz)
24745           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24746       }
24747     }
24748   }
24749 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)24750   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
24751     TEST_REQUIRES_X86_AVX;
24752     GemmMicrokernelTester()
24753       .mr(3)
24754       .nr(4)
24755       .kr(8)
24756       .sr(1)
24757       .m(3)
24758       .n(4)
24759       .k(8)
24760       .qmin(128)
24761       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24762   }
24763 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)24764   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
24765     TEST_REQUIRES_X86_AVX;
24766     GemmMicrokernelTester()
24767       .mr(3)
24768       .nr(4)
24769       .kr(8)
24770       .sr(1)
24771       .m(3)
24772       .n(4)
24773       .k(8)
24774       .qmax(128)
24775       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24776   }
24777 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)24778   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
24779     TEST_REQUIRES_X86_AVX;
24780     GemmMicrokernelTester()
24781       .mr(3)
24782       .nr(4)
24783       .kr(8)
24784       .sr(1)
24785       .m(3)
24786       .n(4)
24787       .k(8)
24788       .cm_stride(7)
24789       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24790   }
24791 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_a_zero_point)24792   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
24793     TEST_REQUIRES_X86_AVX;
24794     for (size_t k = 1; k <= 40; k += 9) {
24795       GemmMicrokernelTester()
24796         .mr(3)
24797         .nr(4)
24798         .kr(8)
24799         .sr(1)
24800         .m(3)
24801         .n(4)
24802         .k(k)
24803         .a_zero_point(0)
24804         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24805     }
24806   }
24807 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_b_zero_point)24808   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
24809     TEST_REQUIRES_X86_AVX;
24810     for (size_t k = 1; k <= 40; k += 9) {
24811       GemmMicrokernelTester()
24812         .mr(3)
24813         .nr(4)
24814         .kr(8)
24815         .sr(1)
24816         .m(3)
24817         .n(4)
24818         .k(k)
24819         .b_zero_point(0)
24820         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24821     }
24822   }
24823 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,no_zero_point)24824   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
24825     TEST_REQUIRES_X86_AVX;
24826     for (size_t k = 1; k <= 40; k += 9) {
24827       GemmMicrokernelTester()
24828         .mr(3)
24829         .nr(4)
24830         .kr(8)
24831         .sr(1)
24832         .m(3)
24833         .n(4)
24834         .k(k)
24835         .a_zero_point(0)
24836         .b_zero_point(0)
24837         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24838     }
24839   }
24840 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24841 
24842 
24843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)24844   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
24845     TEST_REQUIRES_X86_XOP;
24846     GemmMicrokernelTester()
24847       .mr(3)
24848       .nr(4)
24849       .kr(8)
24850       .sr(1)
24851       .m(3)
24852       .n(4)
24853       .k(8)
24854       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24855   }
24856 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)24857   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
24858     TEST_REQUIRES_X86_XOP;
24859     GemmMicrokernelTester()
24860       .mr(3)
24861       .nr(4)
24862       .kr(8)
24863       .sr(1)
24864       .m(3)
24865       .n(4)
24866       .k(8)
24867       .cn_stride(7)
24868       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24869   }
24870 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)24871   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
24872     TEST_REQUIRES_X86_XOP;
24873     for (uint32_t n = 1; n <= 4; n++) {
24874       for (uint32_t m = 1; m <= 3; m++) {
24875         GemmMicrokernelTester()
24876           .mr(3)
24877           .nr(4)
24878           .kr(8)
24879           .sr(1)
24880           .m(m)
24881           .n(n)
24882           .k(8)
24883           .iterations(1)
24884           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24885       }
24886     }
24887   }
24888 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)24889   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
24890     TEST_REQUIRES_X86_XOP;
24891     for (uint32_t m = 1; m <= 3; m++) {
24892       GemmMicrokernelTester()
24893         .mr(3)
24894         .nr(4)
24895         .kr(8)
24896         .sr(1)
24897         .m(m)
24898         .n(4)
24899         .k(8)
24900         .iterations(1)
24901         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24902     }
24903   }
24904 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)24905   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
24906     TEST_REQUIRES_X86_XOP;
24907     for (uint32_t n = 1; n <= 4; n++) {
24908       GemmMicrokernelTester()
24909         .mr(3)
24910         .nr(4)
24911         .kr(8)
24912         .sr(1)
24913         .m(3)
24914         .n(n)
24915         .k(8)
24916         .iterations(1)
24917         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24918     }
24919   }
24920 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)24921   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
24922     TEST_REQUIRES_X86_XOP;
24923     for (size_t k = 1; k < 8; k++) {
24924       GemmMicrokernelTester()
24925         .mr(3)
24926         .nr(4)
24927         .kr(8)
24928         .sr(1)
24929         .m(3)
24930         .n(4)
24931         .k(k)
24932         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24933     }
24934   }
24935 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)24936   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
24937     TEST_REQUIRES_X86_XOP;
24938     for (size_t k = 1; k < 8; k++) {
24939       for (uint32_t n = 1; n <= 4; n++) {
24940         for (uint32_t m = 1; m <= 3; m++) {
24941           GemmMicrokernelTester()
24942             .mr(3)
24943             .nr(4)
24944             .kr(8)
24945             .sr(1)
24946             .m(m)
24947             .n(n)
24948             .k(k)
24949             .iterations(1)
24950             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24951         }
24952       }
24953     }
24954   }
24955 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)24956   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
24957     TEST_REQUIRES_X86_XOP;
24958     for (size_t k = 9; k < 16; k++) {
24959       GemmMicrokernelTester()
24960         .mr(3)
24961         .nr(4)
24962         .kr(8)
24963         .sr(1)
24964         .m(3)
24965         .n(4)
24966         .k(k)
24967         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24968     }
24969   }
24970 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)24971   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
24972     TEST_REQUIRES_X86_XOP;
24973     for (size_t k = 9; k < 16; k++) {
24974       for (uint32_t n = 1; n <= 4; n++) {
24975         for (uint32_t m = 1; m <= 3; m++) {
24976           GemmMicrokernelTester()
24977             .mr(3)
24978             .nr(4)
24979             .kr(8)
24980             .sr(1)
24981             .m(m)
24982             .n(n)
24983             .k(k)
24984             .iterations(1)
24985             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24986         }
24987       }
24988     }
24989   }
24990 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)24991   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
24992     TEST_REQUIRES_X86_XOP;
24993     for (size_t k = 16; k <= 80; k += 8) {
24994       GemmMicrokernelTester()
24995         .mr(3)
24996         .nr(4)
24997         .kr(8)
24998         .sr(1)
24999         .m(3)
25000         .n(4)
25001         .k(k)
25002         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25003     }
25004   }
25005 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)25006   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
25007     TEST_REQUIRES_X86_XOP;
25008     for (size_t k = 16; k <= 80; k += 8) {
25009       for (uint32_t n = 1; n <= 4; n++) {
25010         for (uint32_t m = 1; m <= 3; m++) {
25011           GemmMicrokernelTester()
25012             .mr(3)
25013             .nr(4)
25014             .kr(8)
25015             .sr(1)
25016             .m(m)
25017             .n(n)
25018             .k(k)
25019             .iterations(1)
25020             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25021         }
25022       }
25023     }
25024   }
25025 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)25026   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
25027     TEST_REQUIRES_X86_XOP;
25028     for (uint32_t n = 5; n < 8; n++) {
25029       for (size_t k = 1; k <= 40; k += 9) {
25030         GemmMicrokernelTester()
25031           .mr(3)
25032           .nr(4)
25033           .kr(8)
25034           .sr(1)
25035           .m(3)
25036           .n(n)
25037           .k(k)
25038           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25039       }
25040     }
25041   }
25042 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)25043   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
25044     TEST_REQUIRES_X86_XOP;
25045     for (uint32_t n = 5; n < 8; n++) {
25046       for (size_t k = 1; k <= 40; k += 9) {
25047         GemmMicrokernelTester()
25048           .mr(3)
25049           .nr(4)
25050           .kr(8)
25051           .sr(1)
25052           .m(3)
25053           .n(n)
25054           .k(k)
25055           .cn_stride(7)
25056           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25057       }
25058     }
25059   }
25060 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)25061   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
25062     TEST_REQUIRES_X86_XOP;
25063     for (uint32_t n = 5; n < 8; n++) {
25064       for (size_t k = 1; k <= 40; k += 9) {
25065         for (uint32_t m = 1; m <= 3; m++) {
25066           GemmMicrokernelTester()
25067             .mr(3)
25068             .nr(4)
25069             .kr(8)
25070             .sr(1)
25071             .m(m)
25072             .n(n)
25073             .k(k)
25074             .iterations(1)
25075             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25076         }
25077       }
25078     }
25079   }
25080 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)25081   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
25082     TEST_REQUIRES_X86_XOP;
25083     for (uint32_t n = 8; n <= 12; n += 4) {
25084       for (size_t k = 1; k <= 40; k += 9) {
25085         GemmMicrokernelTester()
25086           .mr(3)
25087           .nr(4)
25088           .kr(8)
25089           .sr(1)
25090           .m(3)
25091           .n(n)
25092           .k(k)
25093           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25094       }
25095     }
25096   }
25097 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)25098   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
25099     TEST_REQUIRES_X86_XOP;
25100     for (uint32_t n = 8; n <= 12; n += 4) {
25101       for (size_t k = 1; k <= 40; k += 9) {
25102         GemmMicrokernelTester()
25103           .mr(3)
25104           .nr(4)
25105           .kr(8)
25106           .sr(1)
25107           .m(3)
25108           .n(n)
25109           .k(k)
25110           .cn_stride(7)
25111           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25112       }
25113     }
25114   }
25115 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)25116   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
25117     TEST_REQUIRES_X86_XOP;
25118     for (uint32_t n = 8; n <= 12; n += 4) {
25119       for (size_t k = 1; k <= 40; k += 9) {
25120         for (uint32_t m = 1; m <= 3; m++) {
25121           GemmMicrokernelTester()
25122             .mr(3)
25123             .nr(4)
25124             .kr(8)
25125             .sr(1)
25126             .m(m)
25127             .n(n)
25128             .k(k)
25129             .iterations(1)
25130             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25131         }
25132       }
25133     }
25134   }
25135 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)25136   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
25137     TEST_REQUIRES_X86_XOP;
25138     for (size_t k = 1; k <= 40; k += 9) {
25139       GemmMicrokernelTester()
25140         .mr(3)
25141         .nr(4)
25142         .kr(8)
25143         .sr(1)
25144         .m(3)
25145         .n(4)
25146         .k(k)
25147         .ks(3)
25148         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25149     }
25150   }
25151 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)25152   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
25153     TEST_REQUIRES_X86_XOP;
25154     for (size_t k = 1; k <= 40; k += 9) {
25155       for (uint32_t n = 1; n <= 4; n++) {
25156         for (uint32_t m = 1; m <= 3; m++) {
25157           GemmMicrokernelTester()
25158             .mr(3)
25159             .nr(4)
25160             .kr(8)
25161             .sr(1)
25162             .m(m)
25163             .n(n)
25164             .k(k)
25165             .ks(3)
25166             .iterations(1)
25167             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25168         }
25169       }
25170     }
25171   }
25172 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)25173   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
25174     TEST_REQUIRES_X86_XOP;
25175     for (uint32_t n = 5; n < 8; n++) {
25176       for (size_t k = 1; k <= 40; k += 9) {
25177         GemmMicrokernelTester()
25178           .mr(3)
25179           .nr(4)
25180           .kr(8)
25181           .sr(1)
25182           .m(3)
25183           .n(n)
25184           .k(k)
25185           .ks(3)
25186           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25187       }
25188     }
25189   }
25190 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)25191   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
25192     TEST_REQUIRES_X86_XOP;
25193     for (uint32_t n = 8; n <= 12; n += 4) {
25194       for (size_t k = 1; k <= 40; k += 9) {
25195         GemmMicrokernelTester()
25196           .mr(3)
25197           .nr(4)
25198           .kr(8)
25199           .sr(1)
25200           .m(3)
25201           .n(n)
25202           .k(k)
25203           .ks(3)
25204           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25205       }
25206     }
25207   }
25208 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)25209   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
25210     TEST_REQUIRES_X86_XOP;
25211     for (size_t k = 1; k <= 40; k += 9) {
25212       for (uint32_t n = 1; n <= 4; n++) {
25213         for (uint32_t m = 1; m <= 3; m++) {
25214           GemmMicrokernelTester()
25215             .mr(3)
25216             .nr(4)
25217             .kr(8)
25218             .sr(1)
25219             .m(m)
25220             .n(n)
25221             .k(k)
25222             .cm_stride(7)
25223             .iterations(1)
25224             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25225         }
25226       }
25227     }
25228   }
25229 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)25230   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
25231     TEST_REQUIRES_X86_XOP;
25232     for (size_t k = 1; k <= 40; k += 9) {
25233       GemmMicrokernelTester()
25234         .mr(3)
25235         .nr(4)
25236         .kr(8)
25237         .sr(1)
25238         .m(3)
25239         .n(4)
25240         .k(k)
25241         .ks(3)
25242         .a_offset(127)
25243         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25244     }
25245   }
25246 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)25247   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
25248     TEST_REQUIRES_X86_XOP;
25249     for (size_t k = 1; k <= 40; k += 9) {
25250       for (uint32_t mz = 0; mz < 3; mz++) {
25251         GemmMicrokernelTester()
25252           .mr(3)
25253           .nr(4)
25254           .kr(8)
25255           .sr(1)
25256           .m(3)
25257           .n(4)
25258           .k(k)
25259           .ks(3)
25260           .a_offset(127)
25261           .zero_index(mz)
25262           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25263       }
25264     }
25265   }
25266 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)25267   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
25268     TEST_REQUIRES_X86_XOP;
25269     GemmMicrokernelTester()
25270       .mr(3)
25271       .nr(4)
25272       .kr(8)
25273       .sr(1)
25274       .m(3)
25275       .n(4)
25276       .k(8)
25277       .qmin(128)
25278       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25279   }
25280 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)25281   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
25282     TEST_REQUIRES_X86_XOP;
25283     GemmMicrokernelTester()
25284       .mr(3)
25285       .nr(4)
25286       .kr(8)
25287       .sr(1)
25288       .m(3)
25289       .n(4)
25290       .k(8)
25291       .qmax(128)
25292       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25293   }
25294 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)25295   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
25296     TEST_REQUIRES_X86_XOP;
25297     GemmMicrokernelTester()
25298       .mr(3)
25299       .nr(4)
25300       .kr(8)
25301       .sr(1)
25302       .m(3)
25303       .n(4)
25304       .k(8)
25305       .cm_stride(7)
25306       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25307   }
25308 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_a_zero_point)25309   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
25310     TEST_REQUIRES_X86_XOP;
25311     for (size_t k = 1; k <= 40; k += 9) {
25312       GemmMicrokernelTester()
25313         .mr(3)
25314         .nr(4)
25315         .kr(8)
25316         .sr(1)
25317         .m(3)
25318         .n(4)
25319         .k(k)
25320         .a_zero_point(0)
25321         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25322     }
25323   }
25324 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_b_zero_point)25325   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
25326     TEST_REQUIRES_X86_XOP;
25327     for (size_t k = 1; k <= 40; k += 9) {
25328       GemmMicrokernelTester()
25329         .mr(3)
25330         .nr(4)
25331         .kr(8)
25332         .sr(1)
25333         .m(3)
25334         .n(4)
25335         .k(k)
25336         .b_zero_point(0)
25337         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25338     }
25339   }
25340 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,no_zero_point)25341   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
25342     TEST_REQUIRES_X86_XOP;
25343     for (size_t k = 1; k <= 40; k += 9) {
25344       GemmMicrokernelTester()
25345         .mr(3)
25346         .nr(4)
25347         .kr(8)
25348         .sr(1)
25349         .m(3)
25350         .n(4)
25351         .k(k)
25352         .a_zero_point(0)
25353         .b_zero_point(0)
25354         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25355     }
25356   }
25357 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25358 
25359 
25360 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)25361   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
25362     TEST_REQUIRES_X86_SSE41;
25363     GemmMicrokernelTester()
25364       .mr(1)
25365       .nr(4)
25366       .kr(8)
25367       .sr(1)
25368       .m(1)
25369       .n(4)
25370       .k(8)
25371       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25372   }
25373 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)25374   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
25375     TEST_REQUIRES_X86_SSE41;
25376     GemmMicrokernelTester()
25377       .mr(1)
25378       .nr(4)
25379       .kr(8)
25380       .sr(1)
25381       .m(1)
25382       .n(4)
25383       .k(8)
25384       .cn_stride(7)
25385       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25386   }
25387 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)25388   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
25389     TEST_REQUIRES_X86_SSE41;
25390     for (uint32_t n = 1; n <= 4; n++) {
25391       for (uint32_t m = 1; m <= 1; m++) {
25392         GemmMicrokernelTester()
25393           .mr(1)
25394           .nr(4)
25395           .kr(8)
25396           .sr(1)
25397           .m(m)
25398           .n(n)
25399           .k(8)
25400           .iterations(1)
25401           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25402       }
25403     }
25404   }
25405 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)25406   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
25407     TEST_REQUIRES_X86_SSE41;
25408     for (uint32_t m = 1; m <= 1; m++) {
25409       GemmMicrokernelTester()
25410         .mr(1)
25411         .nr(4)
25412         .kr(8)
25413         .sr(1)
25414         .m(m)
25415         .n(4)
25416         .k(8)
25417         .iterations(1)
25418         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25419     }
25420   }
25421 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)25422   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
25423     TEST_REQUIRES_X86_SSE41;
25424     for (uint32_t n = 1; n <= 4; n++) {
25425       GemmMicrokernelTester()
25426         .mr(1)
25427         .nr(4)
25428         .kr(8)
25429         .sr(1)
25430         .m(1)
25431         .n(n)
25432         .k(8)
25433         .iterations(1)
25434         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25435     }
25436   }
25437 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)25438   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
25439     TEST_REQUIRES_X86_SSE41;
25440     for (size_t k = 1; k < 8; k++) {
25441       GemmMicrokernelTester()
25442         .mr(1)
25443         .nr(4)
25444         .kr(8)
25445         .sr(1)
25446         .m(1)
25447         .n(4)
25448         .k(k)
25449         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25450     }
25451   }
25452 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)25453   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
25454     TEST_REQUIRES_X86_SSE41;
25455     for (size_t k = 1; k < 8; k++) {
25456       for (uint32_t n = 1; n <= 4; n++) {
25457         for (uint32_t m = 1; m <= 1; m++) {
25458           GemmMicrokernelTester()
25459             .mr(1)
25460             .nr(4)
25461             .kr(8)
25462             .sr(1)
25463             .m(m)
25464             .n(n)
25465             .k(k)
25466             .iterations(1)
25467             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25468         }
25469       }
25470     }
25471   }
25472 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)25473   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
25474     TEST_REQUIRES_X86_SSE41;
25475     for (size_t k = 9; k < 16; k++) {
25476       GemmMicrokernelTester()
25477         .mr(1)
25478         .nr(4)
25479         .kr(8)
25480         .sr(1)
25481         .m(1)
25482         .n(4)
25483         .k(k)
25484         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25485     }
25486   }
25487 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)25488   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
25489     TEST_REQUIRES_X86_SSE41;
25490     for (size_t k = 9; k < 16; k++) {
25491       for (uint32_t n = 1; n <= 4; n++) {
25492         for (uint32_t m = 1; m <= 1; m++) {
25493           GemmMicrokernelTester()
25494             .mr(1)
25495             .nr(4)
25496             .kr(8)
25497             .sr(1)
25498             .m(m)
25499             .n(n)
25500             .k(k)
25501             .iterations(1)
25502             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25503         }
25504       }
25505     }
25506   }
25507 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)25508   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
25509     TEST_REQUIRES_X86_SSE41;
25510     for (size_t k = 16; k <= 80; k += 8) {
25511       GemmMicrokernelTester()
25512         .mr(1)
25513         .nr(4)
25514         .kr(8)
25515         .sr(1)
25516         .m(1)
25517         .n(4)
25518         .k(k)
25519         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25520     }
25521   }
25522 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)25523   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
25524     TEST_REQUIRES_X86_SSE41;
25525     for (size_t k = 16; k <= 80; k += 8) {
25526       for (uint32_t n = 1; n <= 4; n++) {
25527         for (uint32_t m = 1; m <= 1; m++) {
25528           GemmMicrokernelTester()
25529             .mr(1)
25530             .nr(4)
25531             .kr(8)
25532             .sr(1)
25533             .m(m)
25534             .n(n)
25535             .k(k)
25536             .iterations(1)
25537             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25538         }
25539       }
25540     }
25541   }
25542 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)25543   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
25544     TEST_REQUIRES_X86_SSE41;
25545     for (uint32_t n = 5; n < 8; n++) {
25546       for (size_t k = 1; k <= 40; k += 9) {
25547         GemmMicrokernelTester()
25548           .mr(1)
25549           .nr(4)
25550           .kr(8)
25551           .sr(1)
25552           .m(1)
25553           .n(n)
25554           .k(k)
25555           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25556       }
25557     }
25558   }
25559 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)25560   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
25561     TEST_REQUIRES_X86_SSE41;
25562     for (uint32_t n = 5; n < 8; n++) {
25563       for (size_t k = 1; k <= 40; k += 9) {
25564         GemmMicrokernelTester()
25565           .mr(1)
25566           .nr(4)
25567           .kr(8)
25568           .sr(1)
25569           .m(1)
25570           .n(n)
25571           .k(k)
25572           .cn_stride(7)
25573           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25574       }
25575     }
25576   }
25577 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)25578   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
25579     TEST_REQUIRES_X86_SSE41;
25580     for (uint32_t n = 5; n < 8; n++) {
25581       for (size_t k = 1; k <= 40; k += 9) {
25582         for (uint32_t m = 1; m <= 1; m++) {
25583           GemmMicrokernelTester()
25584             .mr(1)
25585             .nr(4)
25586             .kr(8)
25587             .sr(1)
25588             .m(m)
25589             .n(n)
25590             .k(k)
25591             .iterations(1)
25592             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25593         }
25594       }
25595     }
25596   }
25597 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)25598   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
25599     TEST_REQUIRES_X86_SSE41;
25600     for (uint32_t n = 8; n <= 12; n += 4) {
25601       for (size_t k = 1; k <= 40; k += 9) {
25602         GemmMicrokernelTester()
25603           .mr(1)
25604           .nr(4)
25605           .kr(8)
25606           .sr(1)
25607           .m(1)
25608           .n(n)
25609           .k(k)
25610           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25611       }
25612     }
25613   }
25614 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)25615   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
25616     TEST_REQUIRES_X86_SSE41;
25617     for (uint32_t n = 8; n <= 12; n += 4) {
25618       for (size_t k = 1; k <= 40; k += 9) {
25619         GemmMicrokernelTester()
25620           .mr(1)
25621           .nr(4)
25622           .kr(8)
25623           .sr(1)
25624           .m(1)
25625           .n(n)
25626           .k(k)
25627           .cn_stride(7)
25628           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25629       }
25630     }
25631   }
25632 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)25633   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
25634     TEST_REQUIRES_X86_SSE41;
25635     for (uint32_t n = 8; n <= 12; n += 4) {
25636       for (size_t k = 1; k <= 40; k += 9) {
25637         for (uint32_t m = 1; m <= 1; m++) {
25638           GemmMicrokernelTester()
25639             .mr(1)
25640             .nr(4)
25641             .kr(8)
25642             .sr(1)
25643             .m(m)
25644             .n(n)
25645             .k(k)
25646             .iterations(1)
25647             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25648         }
25649       }
25650     }
25651   }
25652 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel)25653   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
25654     TEST_REQUIRES_X86_SSE41;
25655     for (size_t k = 1; k <= 40; k += 9) {
25656       GemmMicrokernelTester()
25657         .mr(1)
25658         .nr(4)
25659         .kr(8)
25660         .sr(1)
25661         .m(1)
25662         .n(4)
25663         .k(k)
25664         .ks(3)
25665         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25666     }
25667   }
25668 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel_subtile)25669   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
25670     TEST_REQUIRES_X86_SSE41;
25671     for (size_t k = 1; k <= 40; k += 9) {
25672       for (uint32_t n = 1; n <= 4; n++) {
25673         for (uint32_t m = 1; m <= 1; m++) {
25674           GemmMicrokernelTester()
25675             .mr(1)
25676             .nr(4)
25677             .kr(8)
25678             .sr(1)
25679             .m(m)
25680             .n(n)
25681             .k(k)
25682             .ks(3)
25683             .iterations(1)
25684             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25685         }
25686       }
25687     }
25688   }
25689 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_small_kernel)25690   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
25691     TEST_REQUIRES_X86_SSE41;
25692     for (uint32_t n = 5; n < 8; n++) {
25693       for (size_t k = 1; k <= 40; k += 9) {
25694         GemmMicrokernelTester()
25695           .mr(1)
25696           .nr(4)
25697           .kr(8)
25698           .sr(1)
25699           .m(1)
25700           .n(n)
25701           .k(k)
25702           .ks(3)
25703           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25704       }
25705     }
25706   }
25707 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_small_kernel)25708   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
25709     TEST_REQUIRES_X86_SSE41;
25710     for (uint32_t n = 8; n <= 12; n += 4) {
25711       for (size_t k = 1; k <= 40; k += 9) {
25712         GemmMicrokernelTester()
25713           .mr(1)
25714           .nr(4)
25715           .kr(8)
25716           .sr(1)
25717           .m(1)
25718           .n(n)
25719           .k(k)
25720           .ks(3)
25721           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25722       }
25723     }
25724   }
25725 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)25726   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
25727     TEST_REQUIRES_X86_SSE41;
25728     for (size_t k = 1; k <= 40; k += 9) {
25729       for (uint32_t n = 1; n <= 4; n++) {
25730         for (uint32_t m = 1; m <= 1; m++) {
25731           GemmMicrokernelTester()
25732             .mr(1)
25733             .nr(4)
25734             .kr(8)
25735             .sr(1)
25736             .m(m)
25737             .n(n)
25738             .k(k)
25739             .cm_stride(7)
25740             .iterations(1)
25741             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25742         }
25743       }
25744     }
25745   }
25746 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,a_offset)25747   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
25748     TEST_REQUIRES_X86_SSE41;
25749     for (size_t k = 1; k <= 40; k += 9) {
25750       GemmMicrokernelTester()
25751         .mr(1)
25752         .nr(4)
25753         .kr(8)
25754         .sr(1)
25755         .m(1)
25756         .n(4)
25757         .k(k)
25758         .ks(3)
25759         .a_offset(43)
25760         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25761     }
25762   }
25763 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,zero)25764   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
25765     TEST_REQUIRES_X86_SSE41;
25766     for (size_t k = 1; k <= 40; k += 9) {
25767       for (uint32_t mz = 0; mz < 1; mz++) {
25768         GemmMicrokernelTester()
25769           .mr(1)
25770           .nr(4)
25771           .kr(8)
25772           .sr(1)
25773           .m(1)
25774           .n(4)
25775           .k(k)
25776           .ks(3)
25777           .a_offset(43)
25778           .zero_index(mz)
25779           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25780       }
25781     }
25782   }
25783 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)25784   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
25785     TEST_REQUIRES_X86_SSE41;
25786     GemmMicrokernelTester()
25787       .mr(1)
25788       .nr(4)
25789       .kr(8)
25790       .sr(1)
25791       .m(1)
25792       .n(4)
25793       .k(8)
25794       .qmin(128)
25795       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25796   }
25797 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)25798   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
25799     TEST_REQUIRES_X86_SSE41;
25800     GemmMicrokernelTester()
25801       .mr(1)
25802       .nr(4)
25803       .kr(8)
25804       .sr(1)
25805       .m(1)
25806       .n(4)
25807       .k(8)
25808       .qmax(128)
25809       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25810   }
25811 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)25812   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
25813     TEST_REQUIRES_X86_SSE41;
25814     GemmMicrokernelTester()
25815       .mr(1)
25816       .nr(4)
25817       .kr(8)
25818       .sr(1)
25819       .m(1)
25820       .n(4)
25821       .k(8)
25822       .cm_stride(7)
25823       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25824   }
25825 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_a_zero_point)25826   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
25827     TEST_REQUIRES_X86_SSE41;
25828     for (size_t k = 1; k <= 40; k += 9) {
25829       GemmMicrokernelTester()
25830         .mr(1)
25831         .nr(4)
25832         .kr(8)
25833         .sr(1)
25834         .m(1)
25835         .n(4)
25836         .k(k)
25837         .a_zero_point(0)
25838         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25839     }
25840   }
25841 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_b_zero_point)25842   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
25843     TEST_REQUIRES_X86_SSE41;
25844     for (size_t k = 1; k <= 40; k += 9) {
25845       GemmMicrokernelTester()
25846         .mr(1)
25847         .nr(4)
25848         .kr(8)
25849         .sr(1)
25850         .m(1)
25851         .n(4)
25852         .k(k)
25853         .b_zero_point(0)
25854         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25855     }
25856   }
25857 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,no_zero_point)25858   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
25859     TEST_REQUIRES_X86_SSE41;
25860     for (size_t k = 1; k <= 40; k += 9) {
25861       GemmMicrokernelTester()
25862         .mr(1)
25863         .nr(4)
25864         .kr(8)
25865         .sr(1)
25866         .m(1)
25867         .n(4)
25868         .k(k)
25869         .a_zero_point(0)
25870         .b_zero_point(0)
25871         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25872     }
25873   }
25874 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25875 
25876 
25877 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)25878   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
25879     TEST_REQUIRES_X86_SSE41;
25880     GemmMicrokernelTester()
25881       .mr(2)
25882       .nr(4)
25883       .kr(8)
25884       .sr(1)
25885       .m(2)
25886       .n(4)
25887       .k(8)
25888       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25889   }
25890 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)25891   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
25892     TEST_REQUIRES_X86_SSE41;
25893     GemmMicrokernelTester()
25894       .mr(2)
25895       .nr(4)
25896       .kr(8)
25897       .sr(1)
25898       .m(2)
25899       .n(4)
25900       .k(8)
25901       .cn_stride(7)
25902       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25903   }
25904 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)25905   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
25906     TEST_REQUIRES_X86_SSE41;
25907     for (uint32_t n = 1; n <= 4; n++) {
25908       for (uint32_t m = 1; m <= 2; m++) {
25909         GemmMicrokernelTester()
25910           .mr(2)
25911           .nr(4)
25912           .kr(8)
25913           .sr(1)
25914           .m(m)
25915           .n(n)
25916           .k(8)
25917           .iterations(1)
25918           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25919       }
25920     }
25921   }
25922 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)25923   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
25924     TEST_REQUIRES_X86_SSE41;
25925     for (uint32_t m = 1; m <= 2; m++) {
25926       GemmMicrokernelTester()
25927         .mr(2)
25928         .nr(4)
25929         .kr(8)
25930         .sr(1)
25931         .m(m)
25932         .n(4)
25933         .k(8)
25934         .iterations(1)
25935         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25936     }
25937   }
25938 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)25939   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
25940     TEST_REQUIRES_X86_SSE41;
25941     for (uint32_t n = 1; n <= 4; n++) {
25942       GemmMicrokernelTester()
25943         .mr(2)
25944         .nr(4)
25945         .kr(8)
25946         .sr(1)
25947         .m(2)
25948         .n(n)
25949         .k(8)
25950         .iterations(1)
25951         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25952     }
25953   }
25954 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)25955   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
25956     TEST_REQUIRES_X86_SSE41;
25957     for (size_t k = 1; k < 8; k++) {
25958       GemmMicrokernelTester()
25959         .mr(2)
25960         .nr(4)
25961         .kr(8)
25962         .sr(1)
25963         .m(2)
25964         .n(4)
25965         .k(k)
25966         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25967     }
25968   }
25969 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)25970   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
25971     TEST_REQUIRES_X86_SSE41;
25972     for (size_t k = 1; k < 8; k++) {
25973       for (uint32_t n = 1; n <= 4; n++) {
25974         for (uint32_t m = 1; m <= 2; m++) {
25975           GemmMicrokernelTester()
25976             .mr(2)
25977             .nr(4)
25978             .kr(8)
25979             .sr(1)
25980             .m(m)
25981             .n(n)
25982             .k(k)
25983             .iterations(1)
25984             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
25985         }
25986       }
25987     }
25988   }
25989 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)25990   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
25991     TEST_REQUIRES_X86_SSE41;
25992     for (size_t k = 9; k < 16; k++) {
25993       GemmMicrokernelTester()
25994         .mr(2)
25995         .nr(4)
25996         .kr(8)
25997         .sr(1)
25998         .m(2)
25999         .n(4)
26000         .k(k)
26001         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26002     }
26003   }
26004 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)26005   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
26006     TEST_REQUIRES_X86_SSE41;
26007     for (size_t k = 9; k < 16; k++) {
26008       for (uint32_t n = 1; n <= 4; n++) {
26009         for (uint32_t m = 1; m <= 2; m++) {
26010           GemmMicrokernelTester()
26011             .mr(2)
26012             .nr(4)
26013             .kr(8)
26014             .sr(1)
26015             .m(m)
26016             .n(n)
26017             .k(k)
26018             .iterations(1)
26019             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26020         }
26021       }
26022     }
26023   }
26024 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)26025   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
26026     TEST_REQUIRES_X86_SSE41;
26027     for (size_t k = 16; k <= 80; k += 8) {
26028       GemmMicrokernelTester()
26029         .mr(2)
26030         .nr(4)
26031         .kr(8)
26032         .sr(1)
26033         .m(2)
26034         .n(4)
26035         .k(k)
26036         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26037     }
26038   }
26039 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)26040   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
26041     TEST_REQUIRES_X86_SSE41;
26042     for (size_t k = 16; k <= 80; k += 8) {
26043       for (uint32_t n = 1; n <= 4; n++) {
26044         for (uint32_t m = 1; m <= 2; m++) {
26045           GemmMicrokernelTester()
26046             .mr(2)
26047             .nr(4)
26048             .kr(8)
26049             .sr(1)
26050             .m(m)
26051             .n(n)
26052             .k(k)
26053             .iterations(1)
26054             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26055         }
26056       }
26057     }
26058   }
26059 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)26060   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
26061     TEST_REQUIRES_X86_SSE41;
26062     for (uint32_t n = 5; n < 8; n++) {
26063       for (size_t k = 1; k <= 40; k += 9) {
26064         GemmMicrokernelTester()
26065           .mr(2)
26066           .nr(4)
26067           .kr(8)
26068           .sr(1)
26069           .m(2)
26070           .n(n)
26071           .k(k)
26072           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26073       }
26074     }
26075   }
26076 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)26077   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26078     TEST_REQUIRES_X86_SSE41;
26079     for (uint32_t n = 5; n < 8; n++) {
26080       for (size_t k = 1; k <= 40; k += 9) {
26081         GemmMicrokernelTester()
26082           .mr(2)
26083           .nr(4)
26084           .kr(8)
26085           .sr(1)
26086           .m(2)
26087           .n(n)
26088           .k(k)
26089           .cn_stride(7)
26090           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26091       }
26092     }
26093   }
26094 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)26095   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
26096     TEST_REQUIRES_X86_SSE41;
26097     for (uint32_t n = 5; n < 8; n++) {
26098       for (size_t k = 1; k <= 40; k += 9) {
26099         for (uint32_t m = 1; m <= 2; m++) {
26100           GemmMicrokernelTester()
26101             .mr(2)
26102             .nr(4)
26103             .kr(8)
26104             .sr(1)
26105             .m(m)
26106             .n(n)
26107             .k(k)
26108             .iterations(1)
26109             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26110         }
26111       }
26112     }
26113   }
26114 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)26115   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
26116     TEST_REQUIRES_X86_SSE41;
26117     for (uint32_t n = 8; n <= 12; n += 4) {
26118       for (size_t k = 1; k <= 40; k += 9) {
26119         GemmMicrokernelTester()
26120           .mr(2)
26121           .nr(4)
26122           .kr(8)
26123           .sr(1)
26124           .m(2)
26125           .n(n)
26126           .k(k)
26127           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26128       }
26129     }
26130   }
26131 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)26132   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
26133     TEST_REQUIRES_X86_SSE41;
26134     for (uint32_t n = 8; n <= 12; n += 4) {
26135       for (size_t k = 1; k <= 40; k += 9) {
26136         GemmMicrokernelTester()
26137           .mr(2)
26138           .nr(4)
26139           .kr(8)
26140           .sr(1)
26141           .m(2)
26142           .n(n)
26143           .k(k)
26144           .cn_stride(7)
26145           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26146       }
26147     }
26148   }
26149 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)26150   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
26151     TEST_REQUIRES_X86_SSE41;
26152     for (uint32_t n = 8; n <= 12; n += 4) {
26153       for (size_t k = 1; k <= 40; k += 9) {
26154         for (uint32_t m = 1; m <= 2; m++) {
26155           GemmMicrokernelTester()
26156             .mr(2)
26157             .nr(4)
26158             .kr(8)
26159             .sr(1)
26160             .m(m)
26161             .n(n)
26162             .k(k)
26163             .iterations(1)
26164             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26165         }
26166       }
26167     }
26168   }
26169 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel)26170   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
26171     TEST_REQUIRES_X86_SSE41;
26172     for (size_t k = 1; k <= 40; k += 9) {
26173       GemmMicrokernelTester()
26174         .mr(2)
26175         .nr(4)
26176         .kr(8)
26177         .sr(1)
26178         .m(2)
26179         .n(4)
26180         .k(k)
26181         .ks(3)
26182         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26183     }
26184   }
26185 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel_subtile)26186   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
26187     TEST_REQUIRES_X86_SSE41;
26188     for (size_t k = 1; k <= 40; k += 9) {
26189       for (uint32_t n = 1; n <= 4; n++) {
26190         for (uint32_t m = 1; m <= 2; m++) {
26191           GemmMicrokernelTester()
26192             .mr(2)
26193             .nr(4)
26194             .kr(8)
26195             .sr(1)
26196             .m(m)
26197             .n(n)
26198             .k(k)
26199             .ks(3)
26200             .iterations(1)
26201             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26202         }
26203       }
26204     }
26205   }
26206 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_small_kernel)26207   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
26208     TEST_REQUIRES_X86_SSE41;
26209     for (uint32_t n = 5; n < 8; n++) {
26210       for (size_t k = 1; k <= 40; k += 9) {
26211         GemmMicrokernelTester()
26212           .mr(2)
26213           .nr(4)
26214           .kr(8)
26215           .sr(1)
26216           .m(2)
26217           .n(n)
26218           .k(k)
26219           .ks(3)
26220           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26221       }
26222     }
26223   }
26224 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_small_kernel)26225   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
26226     TEST_REQUIRES_X86_SSE41;
26227     for (uint32_t n = 8; n <= 12; n += 4) {
26228       for (size_t k = 1; k <= 40; k += 9) {
26229         GemmMicrokernelTester()
26230           .mr(2)
26231           .nr(4)
26232           .kr(8)
26233           .sr(1)
26234           .m(2)
26235           .n(n)
26236           .k(k)
26237           .ks(3)
26238           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26239       }
26240     }
26241   }
26242 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)26243   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
26244     TEST_REQUIRES_X86_SSE41;
26245     for (size_t k = 1; k <= 40; k += 9) {
26246       for (uint32_t n = 1; n <= 4; n++) {
26247         for (uint32_t m = 1; m <= 2; m++) {
26248           GemmMicrokernelTester()
26249             .mr(2)
26250             .nr(4)
26251             .kr(8)
26252             .sr(1)
26253             .m(m)
26254             .n(n)
26255             .k(k)
26256             .cm_stride(7)
26257             .iterations(1)
26258             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26259         }
26260       }
26261     }
26262   }
26263 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,a_offset)26264   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
26265     TEST_REQUIRES_X86_SSE41;
26266     for (size_t k = 1; k <= 40; k += 9) {
26267       GemmMicrokernelTester()
26268         .mr(2)
26269         .nr(4)
26270         .kr(8)
26271         .sr(1)
26272         .m(2)
26273         .n(4)
26274         .k(k)
26275         .ks(3)
26276         .a_offset(83)
26277         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26278     }
26279   }
26280 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,zero)26281   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
26282     TEST_REQUIRES_X86_SSE41;
26283     for (size_t k = 1; k <= 40; k += 9) {
26284       for (uint32_t mz = 0; mz < 2; mz++) {
26285         GemmMicrokernelTester()
26286           .mr(2)
26287           .nr(4)
26288           .kr(8)
26289           .sr(1)
26290           .m(2)
26291           .n(4)
26292           .k(k)
26293           .ks(3)
26294           .a_offset(83)
26295           .zero_index(mz)
26296           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26297       }
26298     }
26299   }
26300 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)26301   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
26302     TEST_REQUIRES_X86_SSE41;
26303     GemmMicrokernelTester()
26304       .mr(2)
26305       .nr(4)
26306       .kr(8)
26307       .sr(1)
26308       .m(2)
26309       .n(4)
26310       .k(8)
26311       .qmin(128)
26312       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26313   }
26314 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)26315   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
26316     TEST_REQUIRES_X86_SSE41;
26317     GemmMicrokernelTester()
26318       .mr(2)
26319       .nr(4)
26320       .kr(8)
26321       .sr(1)
26322       .m(2)
26323       .n(4)
26324       .k(8)
26325       .qmax(128)
26326       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26327   }
26328 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)26329   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
26330     TEST_REQUIRES_X86_SSE41;
26331     GemmMicrokernelTester()
26332       .mr(2)
26333       .nr(4)
26334       .kr(8)
26335       .sr(1)
26336       .m(2)
26337       .n(4)
26338       .k(8)
26339       .cm_stride(7)
26340       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26341   }
26342 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_a_zero_point)26343   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
26344     TEST_REQUIRES_X86_SSE41;
26345     for (size_t k = 1; k <= 40; k += 9) {
26346       GemmMicrokernelTester()
26347         .mr(2)
26348         .nr(4)
26349         .kr(8)
26350         .sr(1)
26351         .m(2)
26352         .n(4)
26353         .k(k)
26354         .a_zero_point(0)
26355         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26356     }
26357   }
26358 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_b_zero_point)26359   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
26360     TEST_REQUIRES_X86_SSE41;
26361     for (size_t k = 1; k <= 40; k += 9) {
26362       GemmMicrokernelTester()
26363         .mr(2)
26364         .nr(4)
26365         .kr(8)
26366         .sr(1)
26367         .m(2)
26368         .n(4)
26369         .k(k)
26370         .b_zero_point(0)
26371         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26372     }
26373   }
26374 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,no_zero_point)26375   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
26376     TEST_REQUIRES_X86_SSE41;
26377     for (size_t k = 1; k <= 40; k += 9) {
26378       GemmMicrokernelTester()
26379         .mr(2)
26380         .nr(4)
26381         .kr(8)
26382         .sr(1)
26383         .m(2)
26384         .n(4)
26385         .k(k)
26386         .a_zero_point(0)
26387         .b_zero_point(0)
26388         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26389     }
26390   }
26391 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26392 
26393 
26394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8)26395   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
26396     TEST_REQUIRES_X86_SSE2;
26397     GemmMicrokernelTester()
26398       .mr(3)
26399       .nr(4)
26400       .kr(8)
26401       .sr(1)
26402       .m(3)
26403       .n(4)
26404       .k(8)
26405       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26406   }
26407 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cn)26408   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
26409     TEST_REQUIRES_X86_SSE2;
26410     GemmMicrokernelTester()
26411       .mr(3)
26412       .nr(4)
26413       .kr(8)
26414       .sr(1)
26415       .m(3)
26416       .n(4)
26417       .k(8)
26418       .cn_stride(7)
26419       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26420   }
26421 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile)26422   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
26423     TEST_REQUIRES_X86_SSE2;
26424     for (uint32_t n = 1; n <= 4; n++) {
26425       for (uint32_t m = 1; m <= 3; m++) {
26426         GemmMicrokernelTester()
26427           .mr(3)
26428           .nr(4)
26429           .kr(8)
26430           .sr(1)
26431           .m(m)
26432           .n(n)
26433           .k(8)
26434           .iterations(1)
26435           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26436       }
26437     }
26438   }
26439 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_m)26440   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
26441     TEST_REQUIRES_X86_SSE2;
26442     for (uint32_t m = 1; m <= 3; m++) {
26443       GemmMicrokernelTester()
26444         .mr(3)
26445         .nr(4)
26446         .kr(8)
26447         .sr(1)
26448         .m(m)
26449         .n(4)
26450         .k(8)
26451         .iterations(1)
26452         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26453     }
26454   }
26455 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_n)26456   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
26457     TEST_REQUIRES_X86_SSE2;
26458     for (uint32_t n = 1; n <= 4; n++) {
26459       GemmMicrokernelTester()
26460         .mr(3)
26461         .nr(4)
26462         .kr(8)
26463         .sr(1)
26464         .m(3)
26465         .n(n)
26466         .k(8)
26467         .iterations(1)
26468         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26469     }
26470   }
26471 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8)26472   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
26473     TEST_REQUIRES_X86_SSE2;
26474     for (size_t k = 1; k < 8; k++) {
26475       GemmMicrokernelTester()
26476         .mr(3)
26477         .nr(4)
26478         .kr(8)
26479         .sr(1)
26480         .m(3)
26481         .n(4)
26482         .k(k)
26483         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26484     }
26485   }
26486 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_subtile)26487   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
26488     TEST_REQUIRES_X86_SSE2;
26489     for (size_t k = 1; k < 8; k++) {
26490       for (uint32_t n = 1; n <= 4; n++) {
26491         for (uint32_t m = 1; m <= 3; m++) {
26492           GemmMicrokernelTester()
26493             .mr(3)
26494             .nr(4)
26495             .kr(8)
26496             .sr(1)
26497             .m(m)
26498             .n(n)
26499             .k(k)
26500             .iterations(1)
26501             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26502         }
26503       }
26504     }
26505   }
26506 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8)26507   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
26508     TEST_REQUIRES_X86_SSE2;
26509     for (size_t k = 9; k < 16; k++) {
26510       GemmMicrokernelTester()
26511         .mr(3)
26512         .nr(4)
26513         .kr(8)
26514         .sr(1)
26515         .m(3)
26516         .n(4)
26517         .k(k)
26518         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26519     }
26520   }
26521 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_subtile)26522   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
26523     TEST_REQUIRES_X86_SSE2;
26524     for (size_t k = 9; k < 16; k++) {
26525       for (uint32_t n = 1; n <= 4; n++) {
26526         for (uint32_t m = 1; m <= 3; m++) {
26527           GemmMicrokernelTester()
26528             .mr(3)
26529             .nr(4)
26530             .kr(8)
26531             .sr(1)
26532             .m(m)
26533             .n(n)
26534             .k(k)
26535             .iterations(1)
26536             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26537         }
26538       }
26539     }
26540   }
26541 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8)26542   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
26543     TEST_REQUIRES_X86_SSE2;
26544     for (size_t k = 16; k <= 80; k += 8) {
26545       GemmMicrokernelTester()
26546         .mr(3)
26547         .nr(4)
26548         .kr(8)
26549         .sr(1)
26550         .m(3)
26551         .n(4)
26552         .k(k)
26553         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26554     }
26555   }
26556 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_subtile)26557   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
26558     TEST_REQUIRES_X86_SSE2;
26559     for (size_t k = 16; k <= 80; k += 8) {
26560       for (uint32_t n = 1; n <= 4; n++) {
26561         for (uint32_t m = 1; m <= 3; m++) {
26562           GemmMicrokernelTester()
26563             .mr(3)
26564             .nr(4)
26565             .kr(8)
26566             .sr(1)
26567             .m(m)
26568             .n(n)
26569             .k(k)
26570             .iterations(1)
26571             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26572         }
26573       }
26574     }
26575   }
26576 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4)26577   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
26578     TEST_REQUIRES_X86_SSE2;
26579     for (uint32_t n = 5; n < 8; n++) {
26580       for (size_t k = 1; k <= 40; k += 9) {
26581         GemmMicrokernelTester()
26582           .mr(3)
26583           .nr(4)
26584           .kr(8)
26585           .sr(1)
26586           .m(3)
26587           .n(n)
26588           .k(k)
26589           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26590       }
26591     }
26592   }
26593 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_cn)26594   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
26595     TEST_REQUIRES_X86_SSE2;
26596     for (uint32_t n = 5; n < 8; n++) {
26597       for (size_t k = 1; k <= 40; k += 9) {
26598         GemmMicrokernelTester()
26599           .mr(3)
26600           .nr(4)
26601           .kr(8)
26602           .sr(1)
26603           .m(3)
26604           .n(n)
26605           .k(k)
26606           .cn_stride(7)
26607           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26608       }
26609     }
26610   }
26611 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_subtile)26612   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
26613     TEST_REQUIRES_X86_SSE2;
26614     for (uint32_t n = 5; n < 8; n++) {
26615       for (size_t k = 1; k <= 40; k += 9) {
26616         for (uint32_t m = 1; m <= 3; m++) {
26617           GemmMicrokernelTester()
26618             .mr(3)
26619             .nr(4)
26620             .kr(8)
26621             .sr(1)
26622             .m(m)
26623             .n(n)
26624             .k(k)
26625             .iterations(1)
26626             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26627         }
26628       }
26629     }
26630   }
26631 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4)26632   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
26633     TEST_REQUIRES_X86_SSE2;
26634     for (uint32_t n = 8; n <= 12; n += 4) {
26635       for (size_t k = 1; k <= 40; k += 9) {
26636         GemmMicrokernelTester()
26637           .mr(3)
26638           .nr(4)
26639           .kr(8)
26640           .sr(1)
26641           .m(3)
26642           .n(n)
26643           .k(k)
26644           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26645       }
26646     }
26647   }
26648 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_cn)26649   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
26650     TEST_REQUIRES_X86_SSE2;
26651     for (uint32_t n = 8; n <= 12; n += 4) {
26652       for (size_t k = 1; k <= 40; k += 9) {
26653         GemmMicrokernelTester()
26654           .mr(3)
26655           .nr(4)
26656           .kr(8)
26657           .sr(1)
26658           .m(3)
26659           .n(n)
26660           .k(k)
26661           .cn_stride(7)
26662           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26663       }
26664     }
26665   }
26666 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_subtile)26667   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
26668     TEST_REQUIRES_X86_SSE2;
26669     for (uint32_t n = 8; n <= 12; n += 4) {
26670       for (size_t k = 1; k <= 40; k += 9) {
26671         for (uint32_t m = 1; m <= 3; m++) {
26672           GemmMicrokernelTester()
26673             .mr(3)
26674             .nr(4)
26675             .kr(8)
26676             .sr(1)
26677             .m(m)
26678             .n(n)
26679             .k(k)
26680             .iterations(1)
26681             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26682         }
26683       }
26684     }
26685   }
26686 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel)26687   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
26688     TEST_REQUIRES_X86_SSE2;
26689     for (size_t k = 1; k <= 40; k += 9) {
26690       GemmMicrokernelTester()
26691         .mr(3)
26692         .nr(4)
26693         .kr(8)
26694         .sr(1)
26695         .m(3)
26696         .n(4)
26697         .k(k)
26698         .ks(3)
26699         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26700     }
26701   }
26702 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel_subtile)26703   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
26704     TEST_REQUIRES_X86_SSE2;
26705     for (size_t k = 1; k <= 40; k += 9) {
26706       for (uint32_t n = 1; n <= 4; n++) {
26707         for (uint32_t m = 1; m <= 3; m++) {
26708           GemmMicrokernelTester()
26709             .mr(3)
26710             .nr(4)
26711             .kr(8)
26712             .sr(1)
26713             .m(m)
26714             .n(n)
26715             .k(k)
26716             .ks(3)
26717             .iterations(1)
26718             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26719         }
26720       }
26721     }
26722   }
26723 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_small_kernel)26724   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
26725     TEST_REQUIRES_X86_SSE2;
26726     for (uint32_t n = 5; n < 8; n++) {
26727       for (size_t k = 1; k <= 40; k += 9) {
26728         GemmMicrokernelTester()
26729           .mr(3)
26730           .nr(4)
26731           .kr(8)
26732           .sr(1)
26733           .m(3)
26734           .n(n)
26735           .k(k)
26736           .ks(3)
26737           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26738       }
26739     }
26740   }
26741 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_small_kernel)26742   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
26743     TEST_REQUIRES_X86_SSE2;
26744     for (uint32_t n = 8; n <= 12; n += 4) {
26745       for (size_t k = 1; k <= 40; k += 9) {
26746         GemmMicrokernelTester()
26747           .mr(3)
26748           .nr(4)
26749           .kr(8)
26750           .sr(1)
26751           .m(3)
26752           .n(n)
26753           .k(k)
26754           .ks(3)
26755           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26756       }
26757     }
26758   }
26759 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm_subtile)26760   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
26761     TEST_REQUIRES_X86_SSE2;
26762     for (size_t k = 1; k <= 40; k += 9) {
26763       for (uint32_t n = 1; n <= 4; n++) {
26764         for (uint32_t m = 1; m <= 3; m++) {
26765           GemmMicrokernelTester()
26766             .mr(3)
26767             .nr(4)
26768             .kr(8)
26769             .sr(1)
26770             .m(m)
26771             .n(n)
26772             .k(k)
26773             .cm_stride(7)
26774             .iterations(1)
26775             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26776         }
26777       }
26778     }
26779   }
26780 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,a_offset)26781   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
26782     TEST_REQUIRES_X86_SSE2;
26783     for (size_t k = 1; k <= 40; k += 9) {
26784       GemmMicrokernelTester()
26785         .mr(3)
26786         .nr(4)
26787         .kr(8)
26788         .sr(1)
26789         .m(3)
26790         .n(4)
26791         .k(k)
26792         .ks(3)
26793         .a_offset(127)
26794         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26795     }
26796   }
26797 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,zero)26798   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
26799     TEST_REQUIRES_X86_SSE2;
26800     for (size_t k = 1; k <= 40; k += 9) {
26801       for (uint32_t mz = 0; mz < 3; mz++) {
26802         GemmMicrokernelTester()
26803           .mr(3)
26804           .nr(4)
26805           .kr(8)
26806           .sr(1)
26807           .m(3)
26808           .n(4)
26809           .k(k)
26810           .ks(3)
26811           .a_offset(127)
26812           .zero_index(mz)
26813           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26814       }
26815     }
26816   }
26817 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmin)26818   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
26819     TEST_REQUIRES_X86_SSE2;
26820     GemmMicrokernelTester()
26821       .mr(3)
26822       .nr(4)
26823       .kr(8)
26824       .sr(1)
26825       .m(3)
26826       .n(4)
26827       .k(8)
26828       .qmin(128)
26829       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26830   }
26831 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmax)26832   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
26833     TEST_REQUIRES_X86_SSE2;
26834     GemmMicrokernelTester()
26835       .mr(3)
26836       .nr(4)
26837       .kr(8)
26838       .sr(1)
26839       .m(3)
26840       .n(4)
26841       .k(8)
26842       .qmax(128)
26843       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26844   }
26845 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm)26846   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
26847     TEST_REQUIRES_X86_SSE2;
26848     GemmMicrokernelTester()
26849       .mr(3)
26850       .nr(4)
26851       .kr(8)
26852       .sr(1)
26853       .m(3)
26854       .n(4)
26855       .k(8)
26856       .cm_stride(7)
26857       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26858   }
26859 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_a_zero_point)26860   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_a_zero_point) {
26861     TEST_REQUIRES_X86_SSE2;
26862     for (size_t k = 1; k <= 40; k += 9) {
26863       GemmMicrokernelTester()
26864         .mr(3)
26865         .nr(4)
26866         .kr(8)
26867         .sr(1)
26868         .m(3)
26869         .n(4)
26870         .k(k)
26871         .a_zero_point(0)
26872         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26873     }
26874   }
26875 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_b_zero_point)26876   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_b_zero_point) {
26877     TEST_REQUIRES_X86_SSE2;
26878     for (size_t k = 1; k <= 40; k += 9) {
26879       GemmMicrokernelTester()
26880         .mr(3)
26881         .nr(4)
26882         .kr(8)
26883         .sr(1)
26884         .m(3)
26885         .n(4)
26886         .k(k)
26887         .b_zero_point(0)
26888         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26889     }
26890   }
26891 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,no_zero_point)26892   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_zero_point) {
26893     TEST_REQUIRES_X86_SSE2;
26894     for (size_t k = 1; k <= 40; k += 9) {
26895       GemmMicrokernelTester()
26896         .mr(3)
26897         .nr(4)
26898         .kr(8)
26899         .sr(1)
26900         .m(3)
26901         .n(4)
26902         .k(k)
26903         .a_zero_point(0)
26904         .b_zero_point(0)
26905         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26906     }
26907   }
26908 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26909 
26910 
26911 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)26912   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
26913     TEST_REQUIRES_X86_AVX;
26914     GemmMicrokernelTester()
26915       .mr(1)
26916       .nr(4)
26917       .kr(8)
26918       .sr(1)
26919       .m(1)
26920       .n(4)
26921       .k(8)
26922       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26923   }
26924 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)26925   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
26926     TEST_REQUIRES_X86_AVX;
26927     GemmMicrokernelTester()
26928       .mr(1)
26929       .nr(4)
26930       .kr(8)
26931       .sr(1)
26932       .m(1)
26933       .n(4)
26934       .k(8)
26935       .cn_stride(7)
26936       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26937   }
26938 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)26939   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
26940     TEST_REQUIRES_X86_AVX;
26941     for (uint32_t n = 1; n <= 4; n++) {
26942       for (uint32_t m = 1; m <= 1; m++) {
26943         GemmMicrokernelTester()
26944           .mr(1)
26945           .nr(4)
26946           .kr(8)
26947           .sr(1)
26948           .m(m)
26949           .n(n)
26950           .k(8)
26951           .iterations(1)
26952           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26953       }
26954     }
26955   }
26956 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)26957   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
26958     TEST_REQUIRES_X86_AVX;
26959     for (uint32_t m = 1; m <= 1; m++) {
26960       GemmMicrokernelTester()
26961         .mr(1)
26962         .nr(4)
26963         .kr(8)
26964         .sr(1)
26965         .m(m)
26966         .n(4)
26967         .k(8)
26968         .iterations(1)
26969         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26970     }
26971   }
26972 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)26973   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
26974     TEST_REQUIRES_X86_AVX;
26975     for (uint32_t n = 1; n <= 4; n++) {
26976       GemmMicrokernelTester()
26977         .mr(1)
26978         .nr(4)
26979         .kr(8)
26980         .sr(1)
26981         .m(1)
26982         .n(n)
26983         .k(8)
26984         .iterations(1)
26985         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
26986     }
26987   }
26988 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)26989   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
26990     TEST_REQUIRES_X86_AVX;
26991     for (size_t k = 1; k < 8; k++) {
26992       GemmMicrokernelTester()
26993         .mr(1)
26994         .nr(4)
26995         .kr(8)
26996         .sr(1)
26997         .m(1)
26998         .n(4)
26999         .k(k)
27000         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27001     }
27002   }
27003 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)27004   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
27005     TEST_REQUIRES_X86_AVX;
27006     for (size_t k = 1; k < 8; k++) {
27007       for (uint32_t n = 1; n <= 4; n++) {
27008         for (uint32_t m = 1; m <= 1; m++) {
27009           GemmMicrokernelTester()
27010             .mr(1)
27011             .nr(4)
27012             .kr(8)
27013             .sr(1)
27014             .m(m)
27015             .n(n)
27016             .k(k)
27017             .iterations(1)
27018             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27019         }
27020       }
27021     }
27022   }
27023 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)27024   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
27025     TEST_REQUIRES_X86_AVX;
27026     for (size_t k = 9; k < 16; k++) {
27027       GemmMicrokernelTester()
27028         .mr(1)
27029         .nr(4)
27030         .kr(8)
27031         .sr(1)
27032         .m(1)
27033         .n(4)
27034         .k(k)
27035         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27036     }
27037   }
27038 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)27039   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
27040     TEST_REQUIRES_X86_AVX;
27041     for (size_t k = 9; k < 16; k++) {
27042       for (uint32_t n = 1; n <= 4; n++) {
27043         for (uint32_t m = 1; m <= 1; m++) {
27044           GemmMicrokernelTester()
27045             .mr(1)
27046             .nr(4)
27047             .kr(8)
27048             .sr(1)
27049             .m(m)
27050             .n(n)
27051             .k(k)
27052             .iterations(1)
27053             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27054         }
27055       }
27056     }
27057   }
27058 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)27059   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
27060     TEST_REQUIRES_X86_AVX;
27061     for (size_t k = 16; k <= 80; k += 8) {
27062       GemmMicrokernelTester()
27063         .mr(1)
27064         .nr(4)
27065         .kr(8)
27066         .sr(1)
27067         .m(1)
27068         .n(4)
27069         .k(k)
27070         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27071     }
27072   }
27073 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)27074   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
27075     TEST_REQUIRES_X86_AVX;
27076     for (size_t k = 16; k <= 80; k += 8) {
27077       for (uint32_t n = 1; n <= 4; n++) {
27078         for (uint32_t m = 1; m <= 1; m++) {
27079           GemmMicrokernelTester()
27080             .mr(1)
27081             .nr(4)
27082             .kr(8)
27083             .sr(1)
27084             .m(m)
27085             .n(n)
27086             .k(k)
27087             .iterations(1)
27088             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27089         }
27090       }
27091     }
27092   }
27093 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)27094   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
27095     TEST_REQUIRES_X86_AVX;
27096     for (uint32_t n = 5; n < 8; n++) {
27097       for (size_t k = 1; k <= 40; k += 9) {
27098         GemmMicrokernelTester()
27099           .mr(1)
27100           .nr(4)
27101           .kr(8)
27102           .sr(1)
27103           .m(1)
27104           .n(n)
27105           .k(k)
27106           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27107       }
27108     }
27109   }
27110 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)27111   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
27112     TEST_REQUIRES_X86_AVX;
27113     for (uint32_t n = 5; n < 8; n++) {
27114       for (size_t k = 1; k <= 40; k += 9) {
27115         GemmMicrokernelTester()
27116           .mr(1)
27117           .nr(4)
27118           .kr(8)
27119           .sr(1)
27120           .m(1)
27121           .n(n)
27122           .k(k)
27123           .cn_stride(7)
27124           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27125       }
27126     }
27127   }
27128 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)27129   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
27130     TEST_REQUIRES_X86_AVX;
27131     for (uint32_t n = 5; n < 8; n++) {
27132       for (size_t k = 1; k <= 40; k += 9) {
27133         for (uint32_t m = 1; m <= 1; m++) {
27134           GemmMicrokernelTester()
27135             .mr(1)
27136             .nr(4)
27137             .kr(8)
27138             .sr(1)
27139             .m(m)
27140             .n(n)
27141             .k(k)
27142             .iterations(1)
27143             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27144         }
27145       }
27146     }
27147   }
27148 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)27149   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
27150     TEST_REQUIRES_X86_AVX;
27151     for (uint32_t n = 8; n <= 12; n += 4) {
27152       for (size_t k = 1; k <= 40; k += 9) {
27153         GemmMicrokernelTester()
27154           .mr(1)
27155           .nr(4)
27156           .kr(8)
27157           .sr(1)
27158           .m(1)
27159           .n(n)
27160           .k(k)
27161           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27162       }
27163     }
27164   }
27165 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)27166   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
27167     TEST_REQUIRES_X86_AVX;
27168     for (uint32_t n = 8; n <= 12; n += 4) {
27169       for (size_t k = 1; k <= 40; k += 9) {
27170         GemmMicrokernelTester()
27171           .mr(1)
27172           .nr(4)
27173           .kr(8)
27174           .sr(1)
27175           .m(1)
27176           .n(n)
27177           .k(k)
27178           .cn_stride(7)
27179           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27180       }
27181     }
27182   }
27183 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)27184   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
27185     TEST_REQUIRES_X86_AVX;
27186     for (uint32_t n = 8; n <= 12; n += 4) {
27187       for (size_t k = 1; k <= 40; k += 9) {
27188         for (uint32_t m = 1; m <= 1; m++) {
27189           GemmMicrokernelTester()
27190             .mr(1)
27191             .nr(4)
27192             .kr(8)
27193             .sr(1)
27194             .m(m)
27195             .n(n)
27196             .k(k)
27197             .iterations(1)
27198             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27199         }
27200       }
27201     }
27202   }
27203 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel)27204   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
27205     TEST_REQUIRES_X86_AVX;
27206     for (size_t k = 1; k <= 40; k += 9) {
27207       GemmMicrokernelTester()
27208         .mr(1)
27209         .nr(4)
27210         .kr(8)
27211         .sr(1)
27212         .m(1)
27213         .n(4)
27214         .k(k)
27215         .ks(3)
27216         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27217     }
27218   }
27219 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel_subtile)27220   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
27221     TEST_REQUIRES_X86_AVX;
27222     for (size_t k = 1; k <= 40; k += 9) {
27223       for (uint32_t n = 1; n <= 4; n++) {
27224         for (uint32_t m = 1; m <= 1; m++) {
27225           GemmMicrokernelTester()
27226             .mr(1)
27227             .nr(4)
27228             .kr(8)
27229             .sr(1)
27230             .m(m)
27231             .n(n)
27232             .k(k)
27233             .ks(3)
27234             .iterations(1)
27235             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27236         }
27237       }
27238     }
27239   }
27240 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_small_kernel)27241   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
27242     TEST_REQUIRES_X86_AVX;
27243     for (uint32_t n = 5; n < 8; n++) {
27244       for (size_t k = 1; k <= 40; k += 9) {
27245         GemmMicrokernelTester()
27246           .mr(1)
27247           .nr(4)
27248           .kr(8)
27249           .sr(1)
27250           .m(1)
27251           .n(n)
27252           .k(k)
27253           .ks(3)
27254           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27255       }
27256     }
27257   }
27258 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_small_kernel)27259   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
27260     TEST_REQUIRES_X86_AVX;
27261     for (uint32_t n = 8; n <= 12; n += 4) {
27262       for (size_t k = 1; k <= 40; k += 9) {
27263         GemmMicrokernelTester()
27264           .mr(1)
27265           .nr(4)
27266           .kr(8)
27267           .sr(1)
27268           .m(1)
27269           .n(n)
27270           .k(k)
27271           .ks(3)
27272           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27273       }
27274     }
27275   }
27276 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)27277   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
27278     TEST_REQUIRES_X86_AVX;
27279     for (size_t k = 1; k <= 40; k += 9) {
27280       for (uint32_t n = 1; n <= 4; n++) {
27281         for (uint32_t m = 1; m <= 1; m++) {
27282           GemmMicrokernelTester()
27283             .mr(1)
27284             .nr(4)
27285             .kr(8)
27286             .sr(1)
27287             .m(m)
27288             .n(n)
27289             .k(k)
27290             .cm_stride(7)
27291             .iterations(1)
27292             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27293         }
27294       }
27295     }
27296   }
27297 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,a_offset)27298   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
27299     TEST_REQUIRES_X86_AVX;
27300     for (size_t k = 1; k <= 40; k += 9) {
27301       GemmMicrokernelTester()
27302         .mr(1)
27303         .nr(4)
27304         .kr(8)
27305         .sr(1)
27306         .m(1)
27307         .n(4)
27308         .k(k)
27309         .ks(3)
27310         .a_offset(43)
27311         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27312     }
27313   }
27314 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,zero)27315   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
27316     TEST_REQUIRES_X86_AVX;
27317     for (size_t k = 1; k <= 40; k += 9) {
27318       for (uint32_t mz = 0; mz < 1; mz++) {
27319         GemmMicrokernelTester()
27320           .mr(1)
27321           .nr(4)
27322           .kr(8)
27323           .sr(1)
27324           .m(1)
27325           .n(4)
27326           .k(k)
27327           .ks(3)
27328           .a_offset(43)
27329           .zero_index(mz)
27330           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27331       }
27332     }
27333   }
27334 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)27335   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
27336     TEST_REQUIRES_X86_AVX;
27337     GemmMicrokernelTester()
27338       .mr(1)
27339       .nr(4)
27340       .kr(8)
27341       .sr(1)
27342       .m(1)
27343       .n(4)
27344       .k(8)
27345       .qmin(128)
27346       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27347   }
27348 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)27349   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
27350     TEST_REQUIRES_X86_AVX;
27351     GemmMicrokernelTester()
27352       .mr(1)
27353       .nr(4)
27354       .kr(8)
27355       .sr(1)
27356       .m(1)
27357       .n(4)
27358       .k(8)
27359       .qmax(128)
27360       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27361   }
27362 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)27363   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
27364     TEST_REQUIRES_X86_AVX;
27365     GemmMicrokernelTester()
27366       .mr(1)
27367       .nr(4)
27368       .kr(8)
27369       .sr(1)
27370       .m(1)
27371       .n(4)
27372       .k(8)
27373       .cm_stride(7)
27374       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27375   }
27376 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_a_zero_point)27377   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
27378     TEST_REQUIRES_X86_AVX;
27379     for (size_t k = 1; k <= 40; k += 9) {
27380       GemmMicrokernelTester()
27381         .mr(1)
27382         .nr(4)
27383         .kr(8)
27384         .sr(1)
27385         .m(1)
27386         .n(4)
27387         .k(k)
27388         .a_zero_point(0)
27389         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27390     }
27391   }
27392 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_b_zero_point)27393   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
27394     TEST_REQUIRES_X86_AVX;
27395     for (size_t k = 1; k <= 40; k += 9) {
27396       GemmMicrokernelTester()
27397         .mr(1)
27398         .nr(4)
27399         .kr(8)
27400         .sr(1)
27401         .m(1)
27402         .n(4)
27403         .k(k)
27404         .b_zero_point(0)
27405         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27406     }
27407   }
27408 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,no_zero_point)27409   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
27410     TEST_REQUIRES_X86_AVX;
27411     for (size_t k = 1; k <= 40; k += 9) {
27412       GemmMicrokernelTester()
27413         .mr(1)
27414         .nr(4)
27415         .kr(8)
27416         .sr(1)
27417         .m(1)
27418         .n(4)
27419         .k(k)
27420         .a_zero_point(0)
27421         .b_zero_point(0)
27422         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27423     }
27424   }
27425 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27426 
27427 
27428 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)27429   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
27430     TEST_REQUIRES_X86_AVX;
27431     GemmMicrokernelTester()
27432       .mr(2)
27433       .nr(4)
27434       .kr(8)
27435       .sr(1)
27436       .m(2)
27437       .n(4)
27438       .k(8)
27439       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27440   }
27441 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)27442   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
27443     TEST_REQUIRES_X86_AVX;
27444     GemmMicrokernelTester()
27445       .mr(2)
27446       .nr(4)
27447       .kr(8)
27448       .sr(1)
27449       .m(2)
27450       .n(4)
27451       .k(8)
27452       .cn_stride(7)
27453       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27454   }
27455 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)27456   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
27457     TEST_REQUIRES_X86_AVX;
27458     for (uint32_t n = 1; n <= 4; n++) {
27459       for (uint32_t m = 1; m <= 2; m++) {
27460         GemmMicrokernelTester()
27461           .mr(2)
27462           .nr(4)
27463           .kr(8)
27464           .sr(1)
27465           .m(m)
27466           .n(n)
27467           .k(8)
27468           .iterations(1)
27469           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27470       }
27471     }
27472   }
27473 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)27474   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
27475     TEST_REQUIRES_X86_AVX;
27476     for (uint32_t m = 1; m <= 2; m++) {
27477       GemmMicrokernelTester()
27478         .mr(2)
27479         .nr(4)
27480         .kr(8)
27481         .sr(1)
27482         .m(m)
27483         .n(4)
27484         .k(8)
27485         .iterations(1)
27486         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27487     }
27488   }
27489 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)27490   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
27491     TEST_REQUIRES_X86_AVX;
27492     for (uint32_t n = 1; n <= 4; n++) {
27493       GemmMicrokernelTester()
27494         .mr(2)
27495         .nr(4)
27496         .kr(8)
27497         .sr(1)
27498         .m(2)
27499         .n(n)
27500         .k(8)
27501         .iterations(1)
27502         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27503     }
27504   }
27505 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)27506   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
27507     TEST_REQUIRES_X86_AVX;
27508     for (size_t k = 1; k < 8; k++) {
27509       GemmMicrokernelTester()
27510         .mr(2)
27511         .nr(4)
27512         .kr(8)
27513         .sr(1)
27514         .m(2)
27515         .n(4)
27516         .k(k)
27517         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27518     }
27519   }
27520 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)27521   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
27522     TEST_REQUIRES_X86_AVX;
27523     for (size_t k = 1; k < 8; k++) {
27524       for (uint32_t n = 1; n <= 4; n++) {
27525         for (uint32_t m = 1; m <= 2; m++) {
27526           GemmMicrokernelTester()
27527             .mr(2)
27528             .nr(4)
27529             .kr(8)
27530             .sr(1)
27531             .m(m)
27532             .n(n)
27533             .k(k)
27534             .iterations(1)
27535             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27536         }
27537       }
27538     }
27539   }
27540 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)27541   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
27542     TEST_REQUIRES_X86_AVX;
27543     for (size_t k = 9; k < 16; k++) {
27544       GemmMicrokernelTester()
27545         .mr(2)
27546         .nr(4)
27547         .kr(8)
27548         .sr(1)
27549         .m(2)
27550         .n(4)
27551         .k(k)
27552         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27553     }
27554   }
27555 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)27556   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
27557     TEST_REQUIRES_X86_AVX;
27558     for (size_t k = 9; k < 16; k++) {
27559       for (uint32_t n = 1; n <= 4; n++) {
27560         for (uint32_t m = 1; m <= 2; m++) {
27561           GemmMicrokernelTester()
27562             .mr(2)
27563             .nr(4)
27564             .kr(8)
27565             .sr(1)
27566             .m(m)
27567             .n(n)
27568             .k(k)
27569             .iterations(1)
27570             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27571         }
27572       }
27573     }
27574   }
27575 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)27576   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
27577     TEST_REQUIRES_X86_AVX;
27578     for (size_t k = 16; k <= 80; k += 8) {
27579       GemmMicrokernelTester()
27580         .mr(2)
27581         .nr(4)
27582         .kr(8)
27583         .sr(1)
27584         .m(2)
27585         .n(4)
27586         .k(k)
27587         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27588     }
27589   }
27590 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)27591   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
27592     TEST_REQUIRES_X86_AVX;
27593     for (size_t k = 16; k <= 80; k += 8) {
27594       for (uint32_t n = 1; n <= 4; n++) {
27595         for (uint32_t m = 1; m <= 2; m++) {
27596           GemmMicrokernelTester()
27597             .mr(2)
27598             .nr(4)
27599             .kr(8)
27600             .sr(1)
27601             .m(m)
27602             .n(n)
27603             .k(k)
27604             .iterations(1)
27605             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27606         }
27607       }
27608     }
27609   }
27610 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)27611   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
27612     TEST_REQUIRES_X86_AVX;
27613     for (uint32_t n = 5; n < 8; n++) {
27614       for (size_t k = 1; k <= 40; k += 9) {
27615         GemmMicrokernelTester()
27616           .mr(2)
27617           .nr(4)
27618           .kr(8)
27619           .sr(1)
27620           .m(2)
27621           .n(n)
27622           .k(k)
27623           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27624       }
27625     }
27626   }
27627 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)27628   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
27629     TEST_REQUIRES_X86_AVX;
27630     for (uint32_t n = 5; n < 8; n++) {
27631       for (size_t k = 1; k <= 40; k += 9) {
27632         GemmMicrokernelTester()
27633           .mr(2)
27634           .nr(4)
27635           .kr(8)
27636           .sr(1)
27637           .m(2)
27638           .n(n)
27639           .k(k)
27640           .cn_stride(7)
27641           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27642       }
27643     }
27644   }
27645 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)27646   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
27647     TEST_REQUIRES_X86_AVX;
27648     for (uint32_t n = 5; n < 8; n++) {
27649       for (size_t k = 1; k <= 40; k += 9) {
27650         for (uint32_t m = 1; m <= 2; m++) {
27651           GemmMicrokernelTester()
27652             .mr(2)
27653             .nr(4)
27654             .kr(8)
27655             .sr(1)
27656             .m(m)
27657             .n(n)
27658             .k(k)
27659             .iterations(1)
27660             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27661         }
27662       }
27663     }
27664   }
27665 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)27666   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
27667     TEST_REQUIRES_X86_AVX;
27668     for (uint32_t n = 8; n <= 12; n += 4) {
27669       for (size_t k = 1; k <= 40; k += 9) {
27670         GemmMicrokernelTester()
27671           .mr(2)
27672           .nr(4)
27673           .kr(8)
27674           .sr(1)
27675           .m(2)
27676           .n(n)
27677           .k(k)
27678           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27679       }
27680     }
27681   }
27682 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)27683   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
27684     TEST_REQUIRES_X86_AVX;
27685     for (uint32_t n = 8; n <= 12; n += 4) {
27686       for (size_t k = 1; k <= 40; k += 9) {
27687         GemmMicrokernelTester()
27688           .mr(2)
27689           .nr(4)
27690           .kr(8)
27691           .sr(1)
27692           .m(2)
27693           .n(n)
27694           .k(k)
27695           .cn_stride(7)
27696           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27697       }
27698     }
27699   }
27700 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)27701   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
27702     TEST_REQUIRES_X86_AVX;
27703     for (uint32_t n = 8; n <= 12; n += 4) {
27704       for (size_t k = 1; k <= 40; k += 9) {
27705         for (uint32_t m = 1; m <= 2; m++) {
27706           GemmMicrokernelTester()
27707             .mr(2)
27708             .nr(4)
27709             .kr(8)
27710             .sr(1)
27711             .m(m)
27712             .n(n)
27713             .k(k)
27714             .iterations(1)
27715             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27716         }
27717       }
27718     }
27719   }
27720 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel)27721   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
27722     TEST_REQUIRES_X86_AVX;
27723     for (size_t k = 1; k <= 40; k += 9) {
27724       GemmMicrokernelTester()
27725         .mr(2)
27726         .nr(4)
27727         .kr(8)
27728         .sr(1)
27729         .m(2)
27730         .n(4)
27731         .k(k)
27732         .ks(3)
27733         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27734     }
27735   }
27736 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel_subtile)27737   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
27738     TEST_REQUIRES_X86_AVX;
27739     for (size_t k = 1; k <= 40; k += 9) {
27740       for (uint32_t n = 1; n <= 4; n++) {
27741         for (uint32_t m = 1; m <= 2; m++) {
27742           GemmMicrokernelTester()
27743             .mr(2)
27744             .nr(4)
27745             .kr(8)
27746             .sr(1)
27747             .m(m)
27748             .n(n)
27749             .k(k)
27750             .ks(3)
27751             .iterations(1)
27752             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27753         }
27754       }
27755     }
27756   }
27757 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_small_kernel)27758   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
27759     TEST_REQUIRES_X86_AVX;
27760     for (uint32_t n = 5; n < 8; n++) {
27761       for (size_t k = 1; k <= 40; k += 9) {
27762         GemmMicrokernelTester()
27763           .mr(2)
27764           .nr(4)
27765           .kr(8)
27766           .sr(1)
27767           .m(2)
27768           .n(n)
27769           .k(k)
27770           .ks(3)
27771           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27772       }
27773     }
27774   }
27775 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_small_kernel)27776   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
27777     TEST_REQUIRES_X86_AVX;
27778     for (uint32_t n = 8; n <= 12; n += 4) {
27779       for (size_t k = 1; k <= 40; k += 9) {
27780         GemmMicrokernelTester()
27781           .mr(2)
27782           .nr(4)
27783           .kr(8)
27784           .sr(1)
27785           .m(2)
27786           .n(n)
27787           .k(k)
27788           .ks(3)
27789           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27790       }
27791     }
27792   }
27793 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)27794   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
27795     TEST_REQUIRES_X86_AVX;
27796     for (size_t k = 1; k <= 40; k += 9) {
27797       for (uint32_t n = 1; n <= 4; n++) {
27798         for (uint32_t m = 1; m <= 2; m++) {
27799           GemmMicrokernelTester()
27800             .mr(2)
27801             .nr(4)
27802             .kr(8)
27803             .sr(1)
27804             .m(m)
27805             .n(n)
27806             .k(k)
27807             .cm_stride(7)
27808             .iterations(1)
27809             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27810         }
27811       }
27812     }
27813   }
27814 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,a_offset)27815   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
27816     TEST_REQUIRES_X86_AVX;
27817     for (size_t k = 1; k <= 40; k += 9) {
27818       GemmMicrokernelTester()
27819         .mr(2)
27820         .nr(4)
27821         .kr(8)
27822         .sr(1)
27823         .m(2)
27824         .n(4)
27825         .k(k)
27826         .ks(3)
27827         .a_offset(83)
27828         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27829     }
27830   }
27831 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,zero)27832   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
27833     TEST_REQUIRES_X86_AVX;
27834     for (size_t k = 1; k <= 40; k += 9) {
27835       for (uint32_t mz = 0; mz < 2; mz++) {
27836         GemmMicrokernelTester()
27837           .mr(2)
27838           .nr(4)
27839           .kr(8)
27840           .sr(1)
27841           .m(2)
27842           .n(4)
27843           .k(k)
27844           .ks(3)
27845           .a_offset(83)
27846           .zero_index(mz)
27847           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27848       }
27849     }
27850   }
27851 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)27852   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
27853     TEST_REQUIRES_X86_AVX;
27854     GemmMicrokernelTester()
27855       .mr(2)
27856       .nr(4)
27857       .kr(8)
27858       .sr(1)
27859       .m(2)
27860       .n(4)
27861       .k(8)
27862       .qmin(128)
27863       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27864   }
27865 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)27866   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
27867     TEST_REQUIRES_X86_AVX;
27868     GemmMicrokernelTester()
27869       .mr(2)
27870       .nr(4)
27871       .kr(8)
27872       .sr(1)
27873       .m(2)
27874       .n(4)
27875       .k(8)
27876       .qmax(128)
27877       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27878   }
27879 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)27880   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
27881     TEST_REQUIRES_X86_AVX;
27882     GemmMicrokernelTester()
27883       .mr(2)
27884       .nr(4)
27885       .kr(8)
27886       .sr(1)
27887       .m(2)
27888       .n(4)
27889       .k(8)
27890       .cm_stride(7)
27891       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27892   }
27893 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_a_zero_point)27894   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
27895     TEST_REQUIRES_X86_AVX;
27896     for (size_t k = 1; k <= 40; k += 9) {
27897       GemmMicrokernelTester()
27898         .mr(2)
27899         .nr(4)
27900         .kr(8)
27901         .sr(1)
27902         .m(2)
27903         .n(4)
27904         .k(k)
27905         .a_zero_point(0)
27906         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27907     }
27908   }
27909 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_b_zero_point)27910   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
27911     TEST_REQUIRES_X86_AVX;
27912     for (size_t k = 1; k <= 40; k += 9) {
27913       GemmMicrokernelTester()
27914         .mr(2)
27915         .nr(4)
27916         .kr(8)
27917         .sr(1)
27918         .m(2)
27919         .n(4)
27920         .k(k)
27921         .b_zero_point(0)
27922         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27923     }
27924   }
27925 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,no_zero_point)27926   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
27927     TEST_REQUIRES_X86_AVX;
27928     for (size_t k = 1; k <= 40; k += 9) {
27929       GemmMicrokernelTester()
27930         .mr(2)
27931         .nr(4)
27932         .kr(8)
27933         .sr(1)
27934         .m(2)
27935         .n(4)
27936         .k(k)
27937         .a_zero_point(0)
27938         .b_zero_point(0)
27939         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27940     }
27941   }
27942 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27943 
27944 
27945 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)27946   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
27947     TEST_REQUIRES_X86_XOP;
27948     GemmMicrokernelTester()
27949       .mr(3)
27950       .nr(4)
27951       .kr(8)
27952       .sr(1)
27953       .m(3)
27954       .n(4)
27955       .k(8)
27956       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27957   }
27958 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)27959   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
27960     TEST_REQUIRES_X86_XOP;
27961     GemmMicrokernelTester()
27962       .mr(3)
27963       .nr(4)
27964       .kr(8)
27965       .sr(1)
27966       .m(3)
27967       .n(4)
27968       .k(8)
27969       .cn_stride(7)
27970       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27971   }
27972 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)27973   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
27974     TEST_REQUIRES_X86_XOP;
27975     for (uint32_t n = 1; n <= 4; n++) {
27976       for (uint32_t m = 1; m <= 3; m++) {
27977         GemmMicrokernelTester()
27978           .mr(3)
27979           .nr(4)
27980           .kr(8)
27981           .sr(1)
27982           .m(m)
27983           .n(n)
27984           .k(8)
27985           .iterations(1)
27986           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
27987       }
27988     }
27989   }
27990 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)27991   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
27992     TEST_REQUIRES_X86_XOP;
27993     for (uint32_t m = 1; m <= 3; m++) {
27994       GemmMicrokernelTester()
27995         .mr(3)
27996         .nr(4)
27997         .kr(8)
27998         .sr(1)
27999         .m(m)
28000         .n(4)
28001         .k(8)
28002         .iterations(1)
28003         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28004     }
28005   }
28006 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)28007   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
28008     TEST_REQUIRES_X86_XOP;
28009     for (uint32_t n = 1; n <= 4; n++) {
28010       GemmMicrokernelTester()
28011         .mr(3)
28012         .nr(4)
28013         .kr(8)
28014         .sr(1)
28015         .m(3)
28016         .n(n)
28017         .k(8)
28018         .iterations(1)
28019         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28020     }
28021   }
28022 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)28023   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
28024     TEST_REQUIRES_X86_XOP;
28025     for (size_t k = 1; k < 8; k++) {
28026       GemmMicrokernelTester()
28027         .mr(3)
28028         .nr(4)
28029         .kr(8)
28030         .sr(1)
28031         .m(3)
28032         .n(4)
28033         .k(k)
28034         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28035     }
28036   }
28037 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)28038   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
28039     TEST_REQUIRES_X86_XOP;
28040     for (size_t k = 1; k < 8; k++) {
28041       for (uint32_t n = 1; n <= 4; n++) {
28042         for (uint32_t m = 1; m <= 3; m++) {
28043           GemmMicrokernelTester()
28044             .mr(3)
28045             .nr(4)
28046             .kr(8)
28047             .sr(1)
28048             .m(m)
28049             .n(n)
28050             .k(k)
28051             .iterations(1)
28052             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28053         }
28054       }
28055     }
28056   }
28057 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)28058   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
28059     TEST_REQUIRES_X86_XOP;
28060     for (size_t k = 9; k < 16; k++) {
28061       GemmMicrokernelTester()
28062         .mr(3)
28063         .nr(4)
28064         .kr(8)
28065         .sr(1)
28066         .m(3)
28067         .n(4)
28068         .k(k)
28069         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28070     }
28071   }
28072 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)28073   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
28074     TEST_REQUIRES_X86_XOP;
28075     for (size_t k = 9; k < 16; k++) {
28076       for (uint32_t n = 1; n <= 4; n++) {
28077         for (uint32_t m = 1; m <= 3; m++) {
28078           GemmMicrokernelTester()
28079             .mr(3)
28080             .nr(4)
28081             .kr(8)
28082             .sr(1)
28083             .m(m)
28084             .n(n)
28085             .k(k)
28086             .iterations(1)
28087             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28088         }
28089       }
28090     }
28091   }
28092 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)28093   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
28094     TEST_REQUIRES_X86_XOP;
28095     for (size_t k = 16; k <= 80; k += 8) {
28096       GemmMicrokernelTester()
28097         .mr(3)
28098         .nr(4)
28099         .kr(8)
28100         .sr(1)
28101         .m(3)
28102         .n(4)
28103         .k(k)
28104         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28105     }
28106   }
28107 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)28108   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
28109     TEST_REQUIRES_X86_XOP;
28110     for (size_t k = 16; k <= 80; k += 8) {
28111       for (uint32_t n = 1; n <= 4; n++) {
28112         for (uint32_t m = 1; m <= 3; m++) {
28113           GemmMicrokernelTester()
28114             .mr(3)
28115             .nr(4)
28116             .kr(8)
28117             .sr(1)
28118             .m(m)
28119             .n(n)
28120             .k(k)
28121             .iterations(1)
28122             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28123         }
28124       }
28125     }
28126   }
28127 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)28128   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
28129     TEST_REQUIRES_X86_XOP;
28130     for (uint32_t n = 5; n < 8; n++) {
28131       for (size_t k = 1; k <= 40; k += 9) {
28132         GemmMicrokernelTester()
28133           .mr(3)
28134           .nr(4)
28135           .kr(8)
28136           .sr(1)
28137           .m(3)
28138           .n(n)
28139           .k(k)
28140           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28141       }
28142     }
28143   }
28144 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)28145   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
28146     TEST_REQUIRES_X86_XOP;
28147     for (uint32_t n = 5; n < 8; n++) {
28148       for (size_t k = 1; k <= 40; k += 9) {
28149         GemmMicrokernelTester()
28150           .mr(3)
28151           .nr(4)
28152           .kr(8)
28153           .sr(1)
28154           .m(3)
28155           .n(n)
28156           .k(k)
28157           .cn_stride(7)
28158           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28159       }
28160     }
28161   }
28162 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)28163   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
28164     TEST_REQUIRES_X86_XOP;
28165     for (uint32_t n = 5; n < 8; n++) {
28166       for (size_t k = 1; k <= 40; k += 9) {
28167         for (uint32_t m = 1; m <= 3; m++) {
28168           GemmMicrokernelTester()
28169             .mr(3)
28170             .nr(4)
28171             .kr(8)
28172             .sr(1)
28173             .m(m)
28174             .n(n)
28175             .k(k)
28176             .iterations(1)
28177             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28178         }
28179       }
28180     }
28181   }
28182 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)28183   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
28184     TEST_REQUIRES_X86_XOP;
28185     for (uint32_t n = 8; n <= 12; n += 4) {
28186       for (size_t k = 1; k <= 40; k += 9) {
28187         GemmMicrokernelTester()
28188           .mr(3)
28189           .nr(4)
28190           .kr(8)
28191           .sr(1)
28192           .m(3)
28193           .n(n)
28194           .k(k)
28195           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28196       }
28197     }
28198   }
28199 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)28200   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
28201     TEST_REQUIRES_X86_XOP;
28202     for (uint32_t n = 8; n <= 12; n += 4) {
28203       for (size_t k = 1; k <= 40; k += 9) {
28204         GemmMicrokernelTester()
28205           .mr(3)
28206           .nr(4)
28207           .kr(8)
28208           .sr(1)
28209           .m(3)
28210           .n(n)
28211           .k(k)
28212           .cn_stride(7)
28213           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28214       }
28215     }
28216   }
28217 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)28218   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
28219     TEST_REQUIRES_X86_XOP;
28220     for (uint32_t n = 8; n <= 12; n += 4) {
28221       for (size_t k = 1; k <= 40; k += 9) {
28222         for (uint32_t m = 1; m <= 3; m++) {
28223           GemmMicrokernelTester()
28224             .mr(3)
28225             .nr(4)
28226             .kr(8)
28227             .sr(1)
28228             .m(m)
28229             .n(n)
28230             .k(k)
28231             .iterations(1)
28232             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28233         }
28234       }
28235     }
28236   }
28237 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel)28238   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
28239     TEST_REQUIRES_X86_XOP;
28240     for (size_t k = 1; k <= 40; k += 9) {
28241       GemmMicrokernelTester()
28242         .mr(3)
28243         .nr(4)
28244         .kr(8)
28245         .sr(1)
28246         .m(3)
28247         .n(4)
28248         .k(k)
28249         .ks(3)
28250         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28251     }
28252   }
28253 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel_subtile)28254   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
28255     TEST_REQUIRES_X86_XOP;
28256     for (size_t k = 1; k <= 40; k += 9) {
28257       for (uint32_t n = 1; n <= 4; n++) {
28258         for (uint32_t m = 1; m <= 3; m++) {
28259           GemmMicrokernelTester()
28260             .mr(3)
28261             .nr(4)
28262             .kr(8)
28263             .sr(1)
28264             .m(m)
28265             .n(n)
28266             .k(k)
28267             .ks(3)
28268             .iterations(1)
28269             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28270         }
28271       }
28272     }
28273   }
28274 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_small_kernel)28275   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
28276     TEST_REQUIRES_X86_XOP;
28277     for (uint32_t n = 5; n < 8; n++) {
28278       for (size_t k = 1; k <= 40; k += 9) {
28279         GemmMicrokernelTester()
28280           .mr(3)
28281           .nr(4)
28282           .kr(8)
28283           .sr(1)
28284           .m(3)
28285           .n(n)
28286           .k(k)
28287           .ks(3)
28288           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28289       }
28290     }
28291   }
28292 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_small_kernel)28293   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
28294     TEST_REQUIRES_X86_XOP;
28295     for (uint32_t n = 8; n <= 12; n += 4) {
28296       for (size_t k = 1; k <= 40; k += 9) {
28297         GemmMicrokernelTester()
28298           .mr(3)
28299           .nr(4)
28300           .kr(8)
28301           .sr(1)
28302           .m(3)
28303           .n(n)
28304           .k(k)
28305           .ks(3)
28306           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28307       }
28308     }
28309   }
28310 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)28311   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
28312     TEST_REQUIRES_X86_XOP;
28313     for (size_t k = 1; k <= 40; k += 9) {
28314       for (uint32_t n = 1; n <= 4; n++) {
28315         for (uint32_t m = 1; m <= 3; m++) {
28316           GemmMicrokernelTester()
28317             .mr(3)
28318             .nr(4)
28319             .kr(8)
28320             .sr(1)
28321             .m(m)
28322             .n(n)
28323             .k(k)
28324             .cm_stride(7)
28325             .iterations(1)
28326             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28327         }
28328       }
28329     }
28330   }
28331 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,a_offset)28332   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
28333     TEST_REQUIRES_X86_XOP;
28334     for (size_t k = 1; k <= 40; k += 9) {
28335       GemmMicrokernelTester()
28336         .mr(3)
28337         .nr(4)
28338         .kr(8)
28339         .sr(1)
28340         .m(3)
28341         .n(4)
28342         .k(k)
28343         .ks(3)
28344         .a_offset(127)
28345         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28346     }
28347   }
28348 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,zero)28349   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
28350     TEST_REQUIRES_X86_XOP;
28351     for (size_t k = 1; k <= 40; k += 9) {
28352       for (uint32_t mz = 0; mz < 3; mz++) {
28353         GemmMicrokernelTester()
28354           .mr(3)
28355           .nr(4)
28356           .kr(8)
28357           .sr(1)
28358           .m(3)
28359           .n(4)
28360           .k(k)
28361           .ks(3)
28362           .a_offset(127)
28363           .zero_index(mz)
28364           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28365       }
28366     }
28367   }
28368 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)28369   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
28370     TEST_REQUIRES_X86_XOP;
28371     GemmMicrokernelTester()
28372       .mr(3)
28373       .nr(4)
28374       .kr(8)
28375       .sr(1)
28376       .m(3)
28377       .n(4)
28378       .k(8)
28379       .qmin(128)
28380       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28381   }
28382 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)28383   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
28384     TEST_REQUIRES_X86_XOP;
28385     GemmMicrokernelTester()
28386       .mr(3)
28387       .nr(4)
28388       .kr(8)
28389       .sr(1)
28390       .m(3)
28391       .n(4)
28392       .k(8)
28393       .qmax(128)
28394       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28395   }
28396 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)28397   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
28398     TEST_REQUIRES_X86_XOP;
28399     GemmMicrokernelTester()
28400       .mr(3)
28401       .nr(4)
28402       .kr(8)
28403       .sr(1)
28404       .m(3)
28405       .n(4)
28406       .k(8)
28407       .cm_stride(7)
28408       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28409   }
28410 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_a_zero_point)28411   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
28412     TEST_REQUIRES_X86_XOP;
28413     for (size_t k = 1; k <= 40; k += 9) {
28414       GemmMicrokernelTester()
28415         .mr(3)
28416         .nr(4)
28417         .kr(8)
28418         .sr(1)
28419         .m(3)
28420         .n(4)
28421         .k(k)
28422         .a_zero_point(0)
28423         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28424     }
28425   }
28426 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_b_zero_point)28427   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
28428     TEST_REQUIRES_X86_XOP;
28429     for (size_t k = 1; k <= 40; k += 9) {
28430       GemmMicrokernelTester()
28431         .mr(3)
28432         .nr(4)
28433         .kr(8)
28434         .sr(1)
28435         .m(3)
28436         .n(4)
28437         .k(k)
28438         .b_zero_point(0)
28439         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28440     }
28441   }
28442 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,no_zero_point)28443   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
28444     TEST_REQUIRES_X86_XOP;
28445     for (size_t k = 1; k <= 40; k += 9) {
28446       GemmMicrokernelTester()
28447         .mr(3)
28448         .nr(4)
28449         .kr(8)
28450         .sr(1)
28451         .m(3)
28452         .n(4)
28453         .k(k)
28454         .a_zero_point(0)
28455         .b_zero_point(0)
28456         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
28457     }
28458   }
28459 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28460 
28461 
28462 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)28463   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
28464     TEST_REQUIRES_X86_AVX2;
28465     GemmMicrokernelTester()
28466       .mr(1)
28467       .nr(8)
28468       .kr(8)
28469       .sr(1)
28470       .m(1)
28471       .n(8)
28472       .k(8)
28473       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28474   }
28475 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)28476   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
28477     TEST_REQUIRES_X86_AVX2;
28478     GemmMicrokernelTester()
28479       .mr(1)
28480       .nr(8)
28481       .kr(8)
28482       .sr(1)
28483       .m(1)
28484       .n(8)
28485       .k(8)
28486       .cn_stride(11)
28487       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28488   }
28489 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)28490   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
28491     TEST_REQUIRES_X86_AVX2;
28492     for (uint32_t n = 1; n <= 8; n++) {
28493       for (uint32_t m = 1; m <= 1; m++) {
28494         GemmMicrokernelTester()
28495           .mr(1)
28496           .nr(8)
28497           .kr(8)
28498           .sr(1)
28499           .m(m)
28500           .n(n)
28501           .k(8)
28502           .iterations(1)
28503           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28504       }
28505     }
28506   }
28507 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)28508   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
28509     TEST_REQUIRES_X86_AVX2;
28510     for (uint32_t m = 1; m <= 1; m++) {
28511       GemmMicrokernelTester()
28512         .mr(1)
28513         .nr(8)
28514         .kr(8)
28515         .sr(1)
28516         .m(m)
28517         .n(8)
28518         .k(8)
28519         .iterations(1)
28520         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28521     }
28522   }
28523 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)28524   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
28525     TEST_REQUIRES_X86_AVX2;
28526     for (uint32_t n = 1; n <= 8; n++) {
28527       GemmMicrokernelTester()
28528         .mr(1)
28529         .nr(8)
28530         .kr(8)
28531         .sr(1)
28532         .m(1)
28533         .n(n)
28534         .k(8)
28535         .iterations(1)
28536         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28537     }
28538   }
28539 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)28540   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
28541     TEST_REQUIRES_X86_AVX2;
28542     for (size_t k = 1; k < 8; k++) {
28543       GemmMicrokernelTester()
28544         .mr(1)
28545         .nr(8)
28546         .kr(8)
28547         .sr(1)
28548         .m(1)
28549         .n(8)
28550         .k(k)
28551         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28552     }
28553   }
28554 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)28555   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
28556     TEST_REQUIRES_X86_AVX2;
28557     for (size_t k = 1; k < 8; k++) {
28558       for (uint32_t n = 1; n <= 8; n++) {
28559         for (uint32_t m = 1; m <= 1; m++) {
28560           GemmMicrokernelTester()
28561             .mr(1)
28562             .nr(8)
28563             .kr(8)
28564             .sr(1)
28565             .m(m)
28566             .n(n)
28567             .k(k)
28568             .iterations(1)
28569             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28570         }
28571       }
28572     }
28573   }
28574 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)28575   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
28576     TEST_REQUIRES_X86_AVX2;
28577     for (size_t k = 9; k < 16; k++) {
28578       GemmMicrokernelTester()
28579         .mr(1)
28580         .nr(8)
28581         .kr(8)
28582         .sr(1)
28583         .m(1)
28584         .n(8)
28585         .k(k)
28586         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28587     }
28588   }
28589 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)28590   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
28591     TEST_REQUIRES_X86_AVX2;
28592     for (size_t k = 9; k < 16; k++) {
28593       for (uint32_t n = 1; n <= 8; n++) {
28594         for (uint32_t m = 1; m <= 1; m++) {
28595           GemmMicrokernelTester()
28596             .mr(1)
28597             .nr(8)
28598             .kr(8)
28599             .sr(1)
28600             .m(m)
28601             .n(n)
28602             .k(k)
28603             .iterations(1)
28604             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28605         }
28606       }
28607     }
28608   }
28609 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)28610   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
28611     TEST_REQUIRES_X86_AVX2;
28612     for (size_t k = 16; k <= 80; k += 8) {
28613       GemmMicrokernelTester()
28614         .mr(1)
28615         .nr(8)
28616         .kr(8)
28617         .sr(1)
28618         .m(1)
28619         .n(8)
28620         .k(k)
28621         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28622     }
28623   }
28624 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)28625   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
28626     TEST_REQUIRES_X86_AVX2;
28627     for (size_t k = 16; k <= 80; k += 8) {
28628       for (uint32_t n = 1; n <= 8; n++) {
28629         for (uint32_t m = 1; m <= 1; m++) {
28630           GemmMicrokernelTester()
28631             .mr(1)
28632             .nr(8)
28633             .kr(8)
28634             .sr(1)
28635             .m(m)
28636             .n(n)
28637             .k(k)
28638             .iterations(1)
28639             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28640         }
28641       }
28642     }
28643   }
28644 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)28645   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
28646     TEST_REQUIRES_X86_AVX2;
28647     for (uint32_t n = 9; n < 16; n++) {
28648       for (size_t k = 1; k <= 40; k += 9) {
28649         GemmMicrokernelTester()
28650           .mr(1)
28651           .nr(8)
28652           .kr(8)
28653           .sr(1)
28654           .m(1)
28655           .n(n)
28656           .k(k)
28657           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28658       }
28659     }
28660   }
28661 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)28662   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
28663     TEST_REQUIRES_X86_AVX2;
28664     for (uint32_t n = 9; n < 16; n++) {
28665       for (size_t k = 1; k <= 40; k += 9) {
28666         GemmMicrokernelTester()
28667           .mr(1)
28668           .nr(8)
28669           .kr(8)
28670           .sr(1)
28671           .m(1)
28672           .n(n)
28673           .k(k)
28674           .cn_stride(11)
28675           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28676       }
28677     }
28678   }
28679 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)28680   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
28681     TEST_REQUIRES_X86_AVX2;
28682     for (uint32_t n = 9; n < 16; n++) {
28683       for (size_t k = 1; k <= 40; k += 9) {
28684         for (uint32_t m = 1; m <= 1; m++) {
28685           GemmMicrokernelTester()
28686             .mr(1)
28687             .nr(8)
28688             .kr(8)
28689             .sr(1)
28690             .m(m)
28691             .n(n)
28692             .k(k)
28693             .iterations(1)
28694             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28695         }
28696       }
28697     }
28698   }
28699 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)28700   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
28701     TEST_REQUIRES_X86_AVX2;
28702     for (uint32_t n = 16; n <= 24; n += 8) {
28703       for (size_t k = 1; k <= 40; k += 9) {
28704         GemmMicrokernelTester()
28705           .mr(1)
28706           .nr(8)
28707           .kr(8)
28708           .sr(1)
28709           .m(1)
28710           .n(n)
28711           .k(k)
28712           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28713       }
28714     }
28715   }
28716 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)28717   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
28718     TEST_REQUIRES_X86_AVX2;
28719     for (uint32_t n = 16; n <= 24; n += 8) {
28720       for (size_t k = 1; k <= 40; k += 9) {
28721         GemmMicrokernelTester()
28722           .mr(1)
28723           .nr(8)
28724           .kr(8)
28725           .sr(1)
28726           .m(1)
28727           .n(n)
28728           .k(k)
28729           .cn_stride(11)
28730           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28731       }
28732     }
28733   }
28734 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)28735   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
28736     TEST_REQUIRES_X86_AVX2;
28737     for (uint32_t n = 16; n <= 24; n += 8) {
28738       for (size_t k = 1; k <= 40; k += 9) {
28739         for (uint32_t m = 1; m <= 1; m++) {
28740           GemmMicrokernelTester()
28741             .mr(1)
28742             .nr(8)
28743             .kr(8)
28744             .sr(1)
28745             .m(m)
28746             .n(n)
28747             .k(k)
28748             .iterations(1)
28749             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28750         }
28751       }
28752     }
28753   }
28754 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)28755   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
28756     TEST_REQUIRES_X86_AVX2;
28757     for (size_t k = 1; k <= 40; k += 9) {
28758       GemmMicrokernelTester()
28759         .mr(1)
28760         .nr(8)
28761         .kr(8)
28762         .sr(1)
28763         .m(1)
28764         .n(8)
28765         .k(k)
28766         .ks(3)
28767         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28768     }
28769   }
28770 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)28771   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
28772     TEST_REQUIRES_X86_AVX2;
28773     for (size_t k = 1; k <= 40; k += 9) {
28774       for (uint32_t n = 1; n <= 8; n++) {
28775         for (uint32_t m = 1; m <= 1; m++) {
28776           GemmMicrokernelTester()
28777             .mr(1)
28778             .nr(8)
28779             .kr(8)
28780             .sr(1)
28781             .m(m)
28782             .n(n)
28783             .k(k)
28784             .ks(3)
28785             .iterations(1)
28786             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28787         }
28788       }
28789     }
28790   }
28791 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)28792   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
28793     TEST_REQUIRES_X86_AVX2;
28794     for (uint32_t n = 9; n < 16; n++) {
28795       for (size_t k = 1; k <= 40; k += 9) {
28796         GemmMicrokernelTester()
28797           .mr(1)
28798           .nr(8)
28799           .kr(8)
28800           .sr(1)
28801           .m(1)
28802           .n(n)
28803           .k(k)
28804           .ks(3)
28805           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28806       }
28807     }
28808   }
28809 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)28810   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
28811     TEST_REQUIRES_X86_AVX2;
28812     for (uint32_t n = 16; n <= 24; n += 8) {
28813       for (size_t k = 1; k <= 40; k += 9) {
28814         GemmMicrokernelTester()
28815           .mr(1)
28816           .nr(8)
28817           .kr(8)
28818           .sr(1)
28819           .m(1)
28820           .n(n)
28821           .k(k)
28822           .ks(3)
28823           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28824       }
28825     }
28826   }
28827 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)28828   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
28829     TEST_REQUIRES_X86_AVX2;
28830     for (size_t k = 1; k <= 40; k += 9) {
28831       for (uint32_t n = 1; n <= 8; n++) {
28832         for (uint32_t m = 1; m <= 1; m++) {
28833           GemmMicrokernelTester()
28834             .mr(1)
28835             .nr(8)
28836             .kr(8)
28837             .sr(1)
28838             .m(m)
28839             .n(n)
28840             .k(k)
28841             .cm_stride(11)
28842             .iterations(1)
28843             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28844         }
28845       }
28846     }
28847   }
28848 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)28849   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
28850     TEST_REQUIRES_X86_AVX2;
28851     for (size_t k = 1; k <= 40; k += 9) {
28852       GemmMicrokernelTester()
28853         .mr(1)
28854         .nr(8)
28855         .kr(8)
28856         .sr(1)
28857         .m(1)
28858         .n(8)
28859         .k(k)
28860         .ks(3)
28861         .a_offset(43)
28862         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28863     }
28864   }
28865 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)28866   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
28867     TEST_REQUIRES_X86_AVX2;
28868     for (size_t k = 1; k <= 40; k += 9) {
28869       for (uint32_t mz = 0; mz < 1; mz++) {
28870         GemmMicrokernelTester()
28871           .mr(1)
28872           .nr(8)
28873           .kr(8)
28874           .sr(1)
28875           .m(1)
28876           .n(8)
28877           .k(k)
28878           .ks(3)
28879           .a_offset(43)
28880           .zero_index(mz)
28881           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28882       }
28883     }
28884   }
28885 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)28886   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
28887     TEST_REQUIRES_X86_AVX2;
28888     GemmMicrokernelTester()
28889       .mr(1)
28890       .nr(8)
28891       .kr(8)
28892       .sr(1)
28893       .m(1)
28894       .n(8)
28895       .k(8)
28896       .qmin(128)
28897       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28898   }
28899 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)28900   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
28901     TEST_REQUIRES_X86_AVX2;
28902     GemmMicrokernelTester()
28903       .mr(1)
28904       .nr(8)
28905       .kr(8)
28906       .sr(1)
28907       .m(1)
28908       .n(8)
28909       .k(8)
28910       .qmax(128)
28911       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28912   }
28913 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)28914   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
28915     TEST_REQUIRES_X86_AVX2;
28916     GemmMicrokernelTester()
28917       .mr(1)
28918       .nr(8)
28919       .kr(8)
28920       .sr(1)
28921       .m(1)
28922       .n(8)
28923       .k(8)
28924       .cm_stride(11)
28925       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28926   }
28927 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_a_zero_point)28928   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_a_zero_point) {
28929     TEST_REQUIRES_X86_AVX2;
28930     for (size_t k = 1; k <= 40; k += 9) {
28931       GemmMicrokernelTester()
28932         .mr(1)
28933         .nr(8)
28934         .kr(8)
28935         .sr(1)
28936         .m(1)
28937         .n(8)
28938         .k(k)
28939         .a_zero_point(0)
28940         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28941     }
28942   }
28943 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_b_zero_point)28944   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_b_zero_point) {
28945     TEST_REQUIRES_X86_AVX2;
28946     for (size_t k = 1; k <= 40; k += 9) {
28947       GemmMicrokernelTester()
28948         .mr(1)
28949         .nr(8)
28950         .kr(8)
28951         .sr(1)
28952         .m(1)
28953         .n(8)
28954         .k(k)
28955         .b_zero_point(0)
28956         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28957     }
28958   }
28959 
TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2,no_zero_point)28960   TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_zero_point) {
28961     TEST_REQUIRES_X86_AVX2;
28962     for (size_t k = 1; k <= 40; k += 9) {
28963       GemmMicrokernelTester()
28964         .mr(1)
28965         .nr(8)
28966         .kr(8)
28967         .sr(1)
28968         .m(1)
28969         .n(8)
28970         .k(k)
28971         .a_zero_point(0)
28972         .b_zero_point(0)
28973         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
28974     }
28975   }
28976 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28977 
28978 
28979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8)28980   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
28981     TEST_REQUIRES_X86_AVX512SKX;
28982     GemmMicrokernelTester()
28983       .mr(1)
28984       .nr(16)
28985       .kr(8)
28986       .sr(1)
28987       .m(1)
28988       .n(16)
28989       .k(8)
28990       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
28991   }
28992 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cn)28993   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
28994     TEST_REQUIRES_X86_AVX512SKX;
28995     GemmMicrokernelTester()
28996       .mr(1)
28997       .nr(16)
28998       .kr(8)
28999       .sr(1)
29000       .m(1)
29001       .n(16)
29002       .k(8)
29003       .cn_stride(19)
29004       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29005   }
29006 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile)29007   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
29008     TEST_REQUIRES_X86_AVX512SKX;
29009     for (uint32_t n = 1; n <= 16; n++) {
29010       for (uint32_t m = 1; m <= 1; m++) {
29011         GemmMicrokernelTester()
29012           .mr(1)
29013           .nr(16)
29014           .kr(8)
29015           .sr(1)
29016           .m(m)
29017           .n(n)
29018           .k(8)
29019           .iterations(1)
29020           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29021       }
29022     }
29023   }
29024 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_m)29025   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
29026     TEST_REQUIRES_X86_AVX512SKX;
29027     for (uint32_t m = 1; m <= 1; m++) {
29028       GemmMicrokernelTester()
29029         .mr(1)
29030         .nr(16)
29031         .kr(8)
29032         .sr(1)
29033         .m(m)
29034         .n(16)
29035         .k(8)
29036         .iterations(1)
29037         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29038     }
29039   }
29040 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_n)29041   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
29042     TEST_REQUIRES_X86_AVX512SKX;
29043     for (uint32_t n = 1; n <= 16; n++) {
29044       GemmMicrokernelTester()
29045         .mr(1)
29046         .nr(16)
29047         .kr(8)
29048         .sr(1)
29049         .m(1)
29050         .n(n)
29051         .k(8)
29052         .iterations(1)
29053         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29054     }
29055   }
29056 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8)29057   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
29058     TEST_REQUIRES_X86_AVX512SKX;
29059     for (size_t k = 1; k < 8; k++) {
29060       GemmMicrokernelTester()
29061         .mr(1)
29062         .nr(16)
29063         .kr(8)
29064         .sr(1)
29065         .m(1)
29066         .n(16)
29067         .k(k)
29068         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29069     }
29070   }
29071 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_subtile)29072   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
29073     TEST_REQUIRES_X86_AVX512SKX;
29074     for (size_t k = 1; k < 8; k++) {
29075       for (uint32_t n = 1; n <= 16; n++) {
29076         for (uint32_t m = 1; m <= 1; m++) {
29077           GemmMicrokernelTester()
29078             .mr(1)
29079             .nr(16)
29080             .kr(8)
29081             .sr(1)
29082             .m(m)
29083             .n(n)
29084             .k(k)
29085             .iterations(1)
29086             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29087         }
29088       }
29089     }
29090   }
29091 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8)29092   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
29093     TEST_REQUIRES_X86_AVX512SKX;
29094     for (size_t k = 9; k < 16; k++) {
29095       GemmMicrokernelTester()
29096         .mr(1)
29097         .nr(16)
29098         .kr(8)
29099         .sr(1)
29100         .m(1)
29101         .n(16)
29102         .k(k)
29103         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29104     }
29105   }
29106 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_subtile)29107   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
29108     TEST_REQUIRES_X86_AVX512SKX;
29109     for (size_t k = 9; k < 16; k++) {
29110       for (uint32_t n = 1; n <= 16; n++) {
29111         for (uint32_t m = 1; m <= 1; m++) {
29112           GemmMicrokernelTester()
29113             .mr(1)
29114             .nr(16)
29115             .kr(8)
29116             .sr(1)
29117             .m(m)
29118             .n(n)
29119             .k(k)
29120             .iterations(1)
29121             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29122         }
29123       }
29124     }
29125   }
29126 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8)29127   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
29128     TEST_REQUIRES_X86_AVX512SKX;
29129     for (size_t k = 16; k <= 80; k += 8) {
29130       GemmMicrokernelTester()
29131         .mr(1)
29132         .nr(16)
29133         .kr(8)
29134         .sr(1)
29135         .m(1)
29136         .n(16)
29137         .k(k)
29138         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29139     }
29140   }
29141 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_subtile)29142   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
29143     TEST_REQUIRES_X86_AVX512SKX;
29144     for (size_t k = 16; k <= 80; k += 8) {
29145       for (uint32_t n = 1; n <= 16; n++) {
29146         for (uint32_t m = 1; m <= 1; m++) {
29147           GemmMicrokernelTester()
29148             .mr(1)
29149             .nr(16)
29150             .kr(8)
29151             .sr(1)
29152             .m(m)
29153             .n(n)
29154             .k(k)
29155             .iterations(1)
29156             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29157         }
29158       }
29159     }
29160   }
29161 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16)29162   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
29163     TEST_REQUIRES_X86_AVX512SKX;
29164     for (uint32_t n = 17; n < 32; n++) {
29165       for (size_t k = 1; k <= 40; k += 9) {
29166         GemmMicrokernelTester()
29167           .mr(1)
29168           .nr(16)
29169           .kr(8)
29170           .sr(1)
29171           .m(1)
29172           .n(n)
29173           .k(k)
29174           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29175       }
29176     }
29177   }
29178 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_cn)29179   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
29180     TEST_REQUIRES_X86_AVX512SKX;
29181     for (uint32_t n = 17; n < 32; n++) {
29182       for (size_t k = 1; k <= 40; k += 9) {
29183         GemmMicrokernelTester()
29184           .mr(1)
29185           .nr(16)
29186           .kr(8)
29187           .sr(1)
29188           .m(1)
29189           .n(n)
29190           .k(k)
29191           .cn_stride(19)
29192           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29193       }
29194     }
29195   }
29196 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_subtile)29197   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
29198     TEST_REQUIRES_X86_AVX512SKX;
29199     for (uint32_t n = 17; n < 32; n++) {
29200       for (size_t k = 1; k <= 40; k += 9) {
29201         for (uint32_t m = 1; m <= 1; m++) {
29202           GemmMicrokernelTester()
29203             .mr(1)
29204             .nr(16)
29205             .kr(8)
29206             .sr(1)
29207             .m(m)
29208             .n(n)
29209             .k(k)
29210             .iterations(1)
29211             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29212         }
29213       }
29214     }
29215   }
29216 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16)29217   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
29218     TEST_REQUIRES_X86_AVX512SKX;
29219     for (uint32_t n = 32; n <= 48; n += 16) {
29220       for (size_t k = 1; k <= 40; k += 9) {
29221         GemmMicrokernelTester()
29222           .mr(1)
29223           .nr(16)
29224           .kr(8)
29225           .sr(1)
29226           .m(1)
29227           .n(n)
29228           .k(k)
29229           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29230       }
29231     }
29232   }
29233 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_cn)29234   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
29235     TEST_REQUIRES_X86_AVX512SKX;
29236     for (uint32_t n = 32; n <= 48; n += 16) {
29237       for (size_t k = 1; k <= 40; k += 9) {
29238         GemmMicrokernelTester()
29239           .mr(1)
29240           .nr(16)
29241           .kr(8)
29242           .sr(1)
29243           .m(1)
29244           .n(n)
29245           .k(k)
29246           .cn_stride(19)
29247           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29248       }
29249     }
29250   }
29251 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_subtile)29252   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
29253     TEST_REQUIRES_X86_AVX512SKX;
29254     for (uint32_t n = 32; n <= 48; n += 16) {
29255       for (size_t k = 1; k <= 40; k += 9) {
29256         for (uint32_t m = 1; m <= 1; m++) {
29257           GemmMicrokernelTester()
29258             .mr(1)
29259             .nr(16)
29260             .kr(8)
29261             .sr(1)
29262             .m(m)
29263             .n(n)
29264             .k(k)
29265             .iterations(1)
29266             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29267         }
29268       }
29269     }
29270   }
29271 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel)29272   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
29273     TEST_REQUIRES_X86_AVX512SKX;
29274     for (size_t k = 1; k <= 40; k += 9) {
29275       GemmMicrokernelTester()
29276         .mr(1)
29277         .nr(16)
29278         .kr(8)
29279         .sr(1)
29280         .m(1)
29281         .n(16)
29282         .k(k)
29283         .ks(3)
29284         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29285     }
29286   }
29287 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel_subtile)29288   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
29289     TEST_REQUIRES_X86_AVX512SKX;
29290     for (size_t k = 1; k <= 40; k += 9) {
29291       for (uint32_t n = 1; n <= 16; n++) {
29292         for (uint32_t m = 1; m <= 1; m++) {
29293           GemmMicrokernelTester()
29294             .mr(1)
29295             .nr(16)
29296             .kr(8)
29297             .sr(1)
29298             .m(m)
29299             .n(n)
29300             .k(k)
29301             .ks(3)
29302             .iterations(1)
29303             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29304         }
29305       }
29306     }
29307   }
29308 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_small_kernel)29309   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
29310     TEST_REQUIRES_X86_AVX512SKX;
29311     for (uint32_t n = 17; n < 32; n++) {
29312       for (size_t k = 1; k <= 40; k += 9) {
29313         GemmMicrokernelTester()
29314           .mr(1)
29315           .nr(16)
29316           .kr(8)
29317           .sr(1)
29318           .m(1)
29319           .n(n)
29320           .k(k)
29321           .ks(3)
29322           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29323       }
29324     }
29325   }
29326 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_small_kernel)29327   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
29328     TEST_REQUIRES_X86_AVX512SKX;
29329     for (uint32_t n = 32; n <= 48; n += 16) {
29330       for (size_t k = 1; k <= 40; k += 9) {
29331         GemmMicrokernelTester()
29332           .mr(1)
29333           .nr(16)
29334           .kr(8)
29335           .sr(1)
29336           .m(1)
29337           .n(n)
29338           .k(k)
29339           .ks(3)
29340           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29341       }
29342     }
29343   }
29344 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm_subtile)29345   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
29346     TEST_REQUIRES_X86_AVX512SKX;
29347     for (size_t k = 1; k <= 40; k += 9) {
29348       for (uint32_t n = 1; n <= 16; n++) {
29349         for (uint32_t m = 1; m <= 1; m++) {
29350           GemmMicrokernelTester()
29351             .mr(1)
29352             .nr(16)
29353             .kr(8)
29354             .sr(1)
29355             .m(m)
29356             .n(n)
29357             .k(k)
29358             .cm_stride(19)
29359             .iterations(1)
29360             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29361         }
29362       }
29363     }
29364   }
29365 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,a_offset)29366   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
29367     TEST_REQUIRES_X86_AVX512SKX;
29368     for (size_t k = 1; k <= 40; k += 9) {
29369       GemmMicrokernelTester()
29370         .mr(1)
29371         .nr(16)
29372         .kr(8)
29373         .sr(1)
29374         .m(1)
29375         .n(16)
29376         .k(k)
29377         .ks(3)
29378         .a_offset(43)
29379         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29380     }
29381   }
29382 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,zero)29383   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
29384     TEST_REQUIRES_X86_AVX512SKX;
29385     for (size_t k = 1; k <= 40; k += 9) {
29386       for (uint32_t mz = 0; mz < 1; mz++) {
29387         GemmMicrokernelTester()
29388           .mr(1)
29389           .nr(16)
29390           .kr(8)
29391           .sr(1)
29392           .m(1)
29393           .n(16)
29394           .k(k)
29395           .ks(3)
29396           .a_offset(43)
29397           .zero_index(mz)
29398           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29399       }
29400     }
29401   }
29402 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmin)29403   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
29404     TEST_REQUIRES_X86_AVX512SKX;
29405     GemmMicrokernelTester()
29406       .mr(1)
29407       .nr(16)
29408       .kr(8)
29409       .sr(1)
29410       .m(1)
29411       .n(16)
29412       .k(8)
29413       .qmin(128)
29414       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29415   }
29416 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmax)29417   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
29418     TEST_REQUIRES_X86_AVX512SKX;
29419     GemmMicrokernelTester()
29420       .mr(1)
29421       .nr(16)
29422       .kr(8)
29423       .sr(1)
29424       .m(1)
29425       .n(16)
29426       .k(8)
29427       .qmax(128)
29428       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29429   }
29430 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm)29431   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
29432     TEST_REQUIRES_X86_AVX512SKX;
29433     GemmMicrokernelTester()
29434       .mr(1)
29435       .nr(16)
29436       .kr(8)
29437       .sr(1)
29438       .m(1)
29439       .n(16)
29440       .k(8)
29441       .cm_stride(19)
29442       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29443   }
29444 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_a_zero_point)29445   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_a_zero_point) {
29446     TEST_REQUIRES_X86_AVX512SKX;
29447     for (size_t k = 1; k <= 40; k += 9) {
29448       GemmMicrokernelTester()
29449         .mr(1)
29450         .nr(16)
29451         .kr(8)
29452         .sr(1)
29453         .m(1)
29454         .n(16)
29455         .k(k)
29456         .a_zero_point(0)
29457         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29458     }
29459   }
29460 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_b_zero_point)29461   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_b_zero_point) {
29462     TEST_REQUIRES_X86_AVX512SKX;
29463     for (size_t k = 1; k <= 40; k += 9) {
29464       GemmMicrokernelTester()
29465         .mr(1)
29466         .nr(16)
29467         .kr(8)
29468         .sr(1)
29469         .m(1)
29470         .n(16)
29471         .k(k)
29472         .b_zero_point(0)
29473         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29474     }
29475   }
29476 
TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,no_zero_point)29477   TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_zero_point) {
29478     TEST_REQUIRES_X86_AVX512SKX;
29479     for (size_t k = 1; k <= 40; k += 9) {
29480       GemmMicrokernelTester()
29481         .mr(1)
29482         .nr(16)
29483         .kr(8)
29484         .sr(1)
29485         .m(1)
29486         .n(16)
29487         .k(k)
29488         .a_zero_point(0)
29489         .b_zero_point(0)
29490         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29491     }
29492   }
29493 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29494 
29495 
29496 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)29497   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
29498     TEST_REQUIRES_X86_AVX512SKX;
29499     GemmMicrokernelTester()
29500       .mr(2)
29501       .nr(16)
29502       .kr(8)
29503       .sr(1)
29504       .m(2)
29505       .n(16)
29506       .k(8)
29507       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29508   }
29509 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)29510   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
29511     TEST_REQUIRES_X86_AVX512SKX;
29512     GemmMicrokernelTester()
29513       .mr(2)
29514       .nr(16)
29515       .kr(8)
29516       .sr(1)
29517       .m(2)
29518       .n(16)
29519       .k(8)
29520       .cn_stride(19)
29521       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29522   }
29523 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)29524   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
29525     TEST_REQUIRES_X86_AVX512SKX;
29526     for (uint32_t n = 1; n <= 16; n++) {
29527       for (uint32_t m = 1; m <= 2; m++) {
29528         GemmMicrokernelTester()
29529           .mr(2)
29530           .nr(16)
29531           .kr(8)
29532           .sr(1)
29533           .m(m)
29534           .n(n)
29535           .k(8)
29536           .iterations(1)
29537           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29538       }
29539     }
29540   }
29541 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)29542   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
29543     TEST_REQUIRES_X86_AVX512SKX;
29544     for (uint32_t m = 1; m <= 2; m++) {
29545       GemmMicrokernelTester()
29546         .mr(2)
29547         .nr(16)
29548         .kr(8)
29549         .sr(1)
29550         .m(m)
29551         .n(16)
29552         .k(8)
29553         .iterations(1)
29554         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29555     }
29556   }
29557 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)29558   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
29559     TEST_REQUIRES_X86_AVX512SKX;
29560     for (uint32_t n = 1; n <= 16; n++) {
29561       GemmMicrokernelTester()
29562         .mr(2)
29563         .nr(16)
29564         .kr(8)
29565         .sr(1)
29566         .m(2)
29567         .n(n)
29568         .k(8)
29569         .iterations(1)
29570         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29571     }
29572   }
29573 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)29574   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
29575     TEST_REQUIRES_X86_AVX512SKX;
29576     for (size_t k = 1; k < 8; k++) {
29577       GemmMicrokernelTester()
29578         .mr(2)
29579         .nr(16)
29580         .kr(8)
29581         .sr(1)
29582         .m(2)
29583         .n(16)
29584         .k(k)
29585         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29586     }
29587   }
29588 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)29589   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
29590     TEST_REQUIRES_X86_AVX512SKX;
29591     for (size_t k = 1; k < 8; k++) {
29592       for (uint32_t n = 1; n <= 16; n++) {
29593         for (uint32_t m = 1; m <= 2; m++) {
29594           GemmMicrokernelTester()
29595             .mr(2)
29596             .nr(16)
29597             .kr(8)
29598             .sr(1)
29599             .m(m)
29600             .n(n)
29601             .k(k)
29602             .iterations(1)
29603             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29604         }
29605       }
29606     }
29607   }
29608 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)29609   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
29610     TEST_REQUIRES_X86_AVX512SKX;
29611     for (size_t k = 9; k < 16; k++) {
29612       GemmMicrokernelTester()
29613         .mr(2)
29614         .nr(16)
29615         .kr(8)
29616         .sr(1)
29617         .m(2)
29618         .n(16)
29619         .k(k)
29620         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29621     }
29622   }
29623 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)29624   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
29625     TEST_REQUIRES_X86_AVX512SKX;
29626     for (size_t k = 9; k < 16; k++) {
29627       for (uint32_t n = 1; n <= 16; n++) {
29628         for (uint32_t m = 1; m <= 2; m++) {
29629           GemmMicrokernelTester()
29630             .mr(2)
29631             .nr(16)
29632             .kr(8)
29633             .sr(1)
29634             .m(m)
29635             .n(n)
29636             .k(k)
29637             .iterations(1)
29638             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29639         }
29640       }
29641     }
29642   }
29643 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)29644   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
29645     TEST_REQUIRES_X86_AVX512SKX;
29646     for (size_t k = 16; k <= 80; k += 8) {
29647       GemmMicrokernelTester()
29648         .mr(2)
29649         .nr(16)
29650         .kr(8)
29651         .sr(1)
29652         .m(2)
29653         .n(16)
29654         .k(k)
29655         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29656     }
29657   }
29658 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)29659   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
29660     TEST_REQUIRES_X86_AVX512SKX;
29661     for (size_t k = 16; k <= 80; k += 8) {
29662       for (uint32_t n = 1; n <= 16; n++) {
29663         for (uint32_t m = 1; m <= 2; m++) {
29664           GemmMicrokernelTester()
29665             .mr(2)
29666             .nr(16)
29667             .kr(8)
29668             .sr(1)
29669             .m(m)
29670             .n(n)
29671             .k(k)
29672             .iterations(1)
29673             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29674         }
29675       }
29676     }
29677   }
29678 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)29679   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
29680     TEST_REQUIRES_X86_AVX512SKX;
29681     for (uint32_t n = 17; n < 32; n++) {
29682       for (size_t k = 1; k <= 40; k += 9) {
29683         GemmMicrokernelTester()
29684           .mr(2)
29685           .nr(16)
29686           .kr(8)
29687           .sr(1)
29688           .m(2)
29689           .n(n)
29690           .k(k)
29691           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29692       }
29693     }
29694   }
29695 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)29696   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
29697     TEST_REQUIRES_X86_AVX512SKX;
29698     for (uint32_t n = 17; n < 32; n++) {
29699       for (size_t k = 1; k <= 40; k += 9) {
29700         GemmMicrokernelTester()
29701           .mr(2)
29702           .nr(16)
29703           .kr(8)
29704           .sr(1)
29705           .m(2)
29706           .n(n)
29707           .k(k)
29708           .cn_stride(19)
29709           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29710       }
29711     }
29712   }
29713 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)29714   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
29715     TEST_REQUIRES_X86_AVX512SKX;
29716     for (uint32_t n = 17; n < 32; n++) {
29717       for (size_t k = 1; k <= 40; k += 9) {
29718         for (uint32_t m = 1; m <= 2; m++) {
29719           GemmMicrokernelTester()
29720             .mr(2)
29721             .nr(16)
29722             .kr(8)
29723             .sr(1)
29724             .m(m)
29725             .n(n)
29726             .k(k)
29727             .iterations(1)
29728             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29729         }
29730       }
29731     }
29732   }
29733 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)29734   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
29735     TEST_REQUIRES_X86_AVX512SKX;
29736     for (uint32_t n = 32; n <= 48; n += 16) {
29737       for (size_t k = 1; k <= 40; k += 9) {
29738         GemmMicrokernelTester()
29739           .mr(2)
29740           .nr(16)
29741           .kr(8)
29742           .sr(1)
29743           .m(2)
29744           .n(n)
29745           .k(k)
29746           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29747       }
29748     }
29749   }
29750 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)29751   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
29752     TEST_REQUIRES_X86_AVX512SKX;
29753     for (uint32_t n = 32; n <= 48; n += 16) {
29754       for (size_t k = 1; k <= 40; k += 9) {
29755         GemmMicrokernelTester()
29756           .mr(2)
29757           .nr(16)
29758           .kr(8)
29759           .sr(1)
29760           .m(2)
29761           .n(n)
29762           .k(k)
29763           .cn_stride(19)
29764           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29765       }
29766     }
29767   }
29768 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)29769   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
29770     TEST_REQUIRES_X86_AVX512SKX;
29771     for (uint32_t n = 32; n <= 48; n += 16) {
29772       for (size_t k = 1; k <= 40; k += 9) {
29773         for (uint32_t m = 1; m <= 2; m++) {
29774           GemmMicrokernelTester()
29775             .mr(2)
29776             .nr(16)
29777             .kr(8)
29778             .sr(1)
29779             .m(m)
29780             .n(n)
29781             .k(k)
29782             .iterations(1)
29783             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29784         }
29785       }
29786     }
29787   }
29788 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel)29789   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
29790     TEST_REQUIRES_X86_AVX512SKX;
29791     for (size_t k = 1; k <= 40; k += 9) {
29792       GemmMicrokernelTester()
29793         .mr(2)
29794         .nr(16)
29795         .kr(8)
29796         .sr(1)
29797         .m(2)
29798         .n(16)
29799         .k(k)
29800         .ks(3)
29801         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29802     }
29803   }
29804 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel_subtile)29805   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
29806     TEST_REQUIRES_X86_AVX512SKX;
29807     for (size_t k = 1; k <= 40; k += 9) {
29808       for (uint32_t n = 1; n <= 16; n++) {
29809         for (uint32_t m = 1; m <= 2; m++) {
29810           GemmMicrokernelTester()
29811             .mr(2)
29812             .nr(16)
29813             .kr(8)
29814             .sr(1)
29815             .m(m)
29816             .n(n)
29817             .k(k)
29818             .ks(3)
29819             .iterations(1)
29820             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29821         }
29822       }
29823     }
29824   }
29825 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_small_kernel)29826   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
29827     TEST_REQUIRES_X86_AVX512SKX;
29828     for (uint32_t n = 17; n < 32; n++) {
29829       for (size_t k = 1; k <= 40; k += 9) {
29830         GemmMicrokernelTester()
29831           .mr(2)
29832           .nr(16)
29833           .kr(8)
29834           .sr(1)
29835           .m(2)
29836           .n(n)
29837           .k(k)
29838           .ks(3)
29839           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29840       }
29841     }
29842   }
29843 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_small_kernel)29844   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
29845     TEST_REQUIRES_X86_AVX512SKX;
29846     for (uint32_t n = 32; n <= 48; n += 16) {
29847       for (size_t k = 1; k <= 40; k += 9) {
29848         GemmMicrokernelTester()
29849           .mr(2)
29850           .nr(16)
29851           .kr(8)
29852           .sr(1)
29853           .m(2)
29854           .n(n)
29855           .k(k)
29856           .ks(3)
29857           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29858       }
29859     }
29860   }
29861 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)29862   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
29863     TEST_REQUIRES_X86_AVX512SKX;
29864     for (size_t k = 1; k <= 40; k += 9) {
29865       for (uint32_t n = 1; n <= 16; n++) {
29866         for (uint32_t m = 1; m <= 2; m++) {
29867           GemmMicrokernelTester()
29868             .mr(2)
29869             .nr(16)
29870             .kr(8)
29871             .sr(1)
29872             .m(m)
29873             .n(n)
29874             .k(k)
29875             .cm_stride(19)
29876             .iterations(1)
29877             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29878         }
29879       }
29880     }
29881   }
29882 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,a_offset)29883   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
29884     TEST_REQUIRES_X86_AVX512SKX;
29885     for (size_t k = 1; k <= 40; k += 9) {
29886       GemmMicrokernelTester()
29887         .mr(2)
29888         .nr(16)
29889         .kr(8)
29890         .sr(1)
29891         .m(2)
29892         .n(16)
29893         .k(k)
29894         .ks(3)
29895         .a_offset(83)
29896         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29897     }
29898   }
29899 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,zero)29900   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
29901     TEST_REQUIRES_X86_AVX512SKX;
29902     for (size_t k = 1; k <= 40; k += 9) {
29903       for (uint32_t mz = 0; mz < 2; mz++) {
29904         GemmMicrokernelTester()
29905           .mr(2)
29906           .nr(16)
29907           .kr(8)
29908           .sr(1)
29909           .m(2)
29910           .n(16)
29911           .k(k)
29912           .ks(3)
29913           .a_offset(83)
29914           .zero_index(mz)
29915           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29916       }
29917     }
29918   }
29919 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)29920   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
29921     TEST_REQUIRES_X86_AVX512SKX;
29922     GemmMicrokernelTester()
29923       .mr(2)
29924       .nr(16)
29925       .kr(8)
29926       .sr(1)
29927       .m(2)
29928       .n(16)
29929       .k(8)
29930       .qmin(128)
29931       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29932   }
29933 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)29934   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
29935     TEST_REQUIRES_X86_AVX512SKX;
29936     GemmMicrokernelTester()
29937       .mr(2)
29938       .nr(16)
29939       .kr(8)
29940       .sr(1)
29941       .m(2)
29942       .n(16)
29943       .k(8)
29944       .qmax(128)
29945       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29946   }
29947 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)29948   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
29949     TEST_REQUIRES_X86_AVX512SKX;
29950     GemmMicrokernelTester()
29951       .mr(2)
29952       .nr(16)
29953       .kr(8)
29954       .sr(1)
29955       .m(2)
29956       .n(16)
29957       .k(8)
29958       .cm_stride(19)
29959       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29960   }
29961 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_a_zero_point)29962   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_a_zero_point) {
29963     TEST_REQUIRES_X86_AVX512SKX;
29964     for (size_t k = 1; k <= 40; k += 9) {
29965       GemmMicrokernelTester()
29966         .mr(2)
29967         .nr(16)
29968         .kr(8)
29969         .sr(1)
29970         .m(2)
29971         .n(16)
29972         .k(k)
29973         .a_zero_point(0)
29974         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29975     }
29976   }
29977 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_b_zero_point)29978   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_b_zero_point) {
29979     TEST_REQUIRES_X86_AVX512SKX;
29980     for (size_t k = 1; k <= 40; k += 9) {
29981       GemmMicrokernelTester()
29982         .mr(2)
29983         .nr(16)
29984         .kr(8)
29985         .sr(1)
29986         .m(2)
29987         .n(16)
29988         .k(k)
29989         .b_zero_point(0)
29990         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
29991     }
29992   }
29993 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,no_zero_point)29994   TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_zero_point) {
29995     TEST_REQUIRES_X86_AVX512SKX;
29996     for (size_t k = 1; k <= 40; k += 9) {
29997       GemmMicrokernelTester()
29998         .mr(2)
29999         .nr(16)
30000         .kr(8)
30001         .sr(1)
30002         .m(2)
30003         .n(16)
30004         .k(k)
30005         .a_zero_point(0)
30006         .b_zero_point(0)
30007         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30008     }
30009   }
30010 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30011 
30012 
30013 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8)30014   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
30015     TEST_REQUIRES_X86_AVX512SKX;
30016     GemmMicrokernelTester()
30017       .mr(4)
30018       .nr(16)
30019       .kr(8)
30020       .sr(1)
30021       .m(4)
30022       .n(16)
30023       .k(8)
30024       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30025   }
30026 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cn)30027   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
30028     TEST_REQUIRES_X86_AVX512SKX;
30029     GemmMicrokernelTester()
30030       .mr(4)
30031       .nr(16)
30032       .kr(8)
30033       .sr(1)
30034       .m(4)
30035       .n(16)
30036       .k(8)
30037       .cn_stride(19)
30038       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30039   }
30040 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile)30041   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
30042     TEST_REQUIRES_X86_AVX512SKX;
30043     for (uint32_t n = 1; n <= 16; n++) {
30044       for (uint32_t m = 1; m <= 4; m++) {
30045         GemmMicrokernelTester()
30046           .mr(4)
30047           .nr(16)
30048           .kr(8)
30049           .sr(1)
30050           .m(m)
30051           .n(n)
30052           .k(8)
30053           .iterations(1)
30054           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30055       }
30056     }
30057   }
30058 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_m)30059   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
30060     TEST_REQUIRES_X86_AVX512SKX;
30061     for (uint32_t m = 1; m <= 4; m++) {
30062       GemmMicrokernelTester()
30063         .mr(4)
30064         .nr(16)
30065         .kr(8)
30066         .sr(1)
30067         .m(m)
30068         .n(16)
30069         .k(8)
30070         .iterations(1)
30071         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30072     }
30073   }
30074 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_n)30075   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
30076     TEST_REQUIRES_X86_AVX512SKX;
30077     for (uint32_t n = 1; n <= 16; n++) {
30078       GemmMicrokernelTester()
30079         .mr(4)
30080         .nr(16)
30081         .kr(8)
30082         .sr(1)
30083         .m(4)
30084         .n(n)
30085         .k(8)
30086         .iterations(1)
30087         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30088     }
30089   }
30090 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8)30091   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
30092     TEST_REQUIRES_X86_AVX512SKX;
30093     for (size_t k = 1; k < 8; k++) {
30094       GemmMicrokernelTester()
30095         .mr(4)
30096         .nr(16)
30097         .kr(8)
30098         .sr(1)
30099         .m(4)
30100         .n(16)
30101         .k(k)
30102         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30103     }
30104   }
30105 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_subtile)30106   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
30107     TEST_REQUIRES_X86_AVX512SKX;
30108     for (size_t k = 1; k < 8; k++) {
30109       for (uint32_t n = 1; n <= 16; n++) {
30110         for (uint32_t m = 1; m <= 4; m++) {
30111           GemmMicrokernelTester()
30112             .mr(4)
30113             .nr(16)
30114             .kr(8)
30115             .sr(1)
30116             .m(m)
30117             .n(n)
30118             .k(k)
30119             .iterations(1)
30120             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30121         }
30122       }
30123     }
30124   }
30125 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8)30126   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
30127     TEST_REQUIRES_X86_AVX512SKX;
30128     for (size_t k = 9; k < 16; k++) {
30129       GemmMicrokernelTester()
30130         .mr(4)
30131         .nr(16)
30132         .kr(8)
30133         .sr(1)
30134         .m(4)
30135         .n(16)
30136         .k(k)
30137         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30138     }
30139   }
30140 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_subtile)30141   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
30142     TEST_REQUIRES_X86_AVX512SKX;
30143     for (size_t k = 9; k < 16; k++) {
30144       for (uint32_t n = 1; n <= 16; n++) {
30145         for (uint32_t m = 1; m <= 4; m++) {
30146           GemmMicrokernelTester()
30147             .mr(4)
30148             .nr(16)
30149             .kr(8)
30150             .sr(1)
30151             .m(m)
30152             .n(n)
30153             .k(k)
30154             .iterations(1)
30155             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30156         }
30157       }
30158     }
30159   }
30160 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8)30161   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
30162     TEST_REQUIRES_X86_AVX512SKX;
30163     for (size_t k = 16; k <= 80; k += 8) {
30164       GemmMicrokernelTester()
30165         .mr(4)
30166         .nr(16)
30167         .kr(8)
30168         .sr(1)
30169         .m(4)
30170         .n(16)
30171         .k(k)
30172         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30173     }
30174   }
30175 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_subtile)30176   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
30177     TEST_REQUIRES_X86_AVX512SKX;
30178     for (size_t k = 16; k <= 80; k += 8) {
30179       for (uint32_t n = 1; n <= 16; n++) {
30180         for (uint32_t m = 1; m <= 4; m++) {
30181           GemmMicrokernelTester()
30182             .mr(4)
30183             .nr(16)
30184             .kr(8)
30185             .sr(1)
30186             .m(m)
30187             .n(n)
30188             .k(k)
30189             .iterations(1)
30190             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30191         }
30192       }
30193     }
30194   }
30195 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16)30196   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
30197     TEST_REQUIRES_X86_AVX512SKX;
30198     for (uint32_t n = 17; n < 32; n++) {
30199       for (size_t k = 1; k <= 40; k += 9) {
30200         GemmMicrokernelTester()
30201           .mr(4)
30202           .nr(16)
30203           .kr(8)
30204           .sr(1)
30205           .m(4)
30206           .n(n)
30207           .k(k)
30208           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30209       }
30210     }
30211   }
30212 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_cn)30213   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
30214     TEST_REQUIRES_X86_AVX512SKX;
30215     for (uint32_t n = 17; n < 32; n++) {
30216       for (size_t k = 1; k <= 40; k += 9) {
30217         GemmMicrokernelTester()
30218           .mr(4)
30219           .nr(16)
30220           .kr(8)
30221           .sr(1)
30222           .m(4)
30223           .n(n)
30224           .k(k)
30225           .cn_stride(19)
30226           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30227       }
30228     }
30229   }
30230 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_subtile)30231   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
30232     TEST_REQUIRES_X86_AVX512SKX;
30233     for (uint32_t n = 17; n < 32; n++) {
30234       for (size_t k = 1; k <= 40; k += 9) {
30235         for (uint32_t m = 1; m <= 4; m++) {
30236           GemmMicrokernelTester()
30237             .mr(4)
30238             .nr(16)
30239             .kr(8)
30240             .sr(1)
30241             .m(m)
30242             .n(n)
30243             .k(k)
30244             .iterations(1)
30245             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30246         }
30247       }
30248     }
30249   }
30250 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16)30251   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
30252     TEST_REQUIRES_X86_AVX512SKX;
30253     for (uint32_t n = 32; n <= 48; n += 16) {
30254       for (size_t k = 1; k <= 40; k += 9) {
30255         GemmMicrokernelTester()
30256           .mr(4)
30257           .nr(16)
30258           .kr(8)
30259           .sr(1)
30260           .m(4)
30261           .n(n)
30262           .k(k)
30263           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30264       }
30265     }
30266   }
30267 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_cn)30268   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
30269     TEST_REQUIRES_X86_AVX512SKX;
30270     for (uint32_t n = 32; n <= 48; n += 16) {
30271       for (size_t k = 1; k <= 40; k += 9) {
30272         GemmMicrokernelTester()
30273           .mr(4)
30274           .nr(16)
30275           .kr(8)
30276           .sr(1)
30277           .m(4)
30278           .n(n)
30279           .k(k)
30280           .cn_stride(19)
30281           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30282       }
30283     }
30284   }
30285 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_subtile)30286   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
30287     TEST_REQUIRES_X86_AVX512SKX;
30288     for (uint32_t n = 32; n <= 48; n += 16) {
30289       for (size_t k = 1; k <= 40; k += 9) {
30290         for (uint32_t m = 1; m <= 4; m++) {
30291           GemmMicrokernelTester()
30292             .mr(4)
30293             .nr(16)
30294             .kr(8)
30295             .sr(1)
30296             .m(m)
30297             .n(n)
30298             .k(k)
30299             .iterations(1)
30300             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30301         }
30302       }
30303     }
30304   }
30305 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel)30306   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
30307     TEST_REQUIRES_X86_AVX512SKX;
30308     for (size_t k = 1; k <= 40; k += 9) {
30309       GemmMicrokernelTester()
30310         .mr(4)
30311         .nr(16)
30312         .kr(8)
30313         .sr(1)
30314         .m(4)
30315         .n(16)
30316         .k(k)
30317         .ks(3)
30318         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30319     }
30320   }
30321 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel_subtile)30322   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
30323     TEST_REQUIRES_X86_AVX512SKX;
30324     for (size_t k = 1; k <= 40; k += 9) {
30325       for (uint32_t n = 1; n <= 16; n++) {
30326         for (uint32_t m = 1; m <= 4; m++) {
30327           GemmMicrokernelTester()
30328             .mr(4)
30329             .nr(16)
30330             .kr(8)
30331             .sr(1)
30332             .m(m)
30333             .n(n)
30334             .k(k)
30335             .ks(3)
30336             .iterations(1)
30337             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30338         }
30339       }
30340     }
30341   }
30342 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_small_kernel)30343   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
30344     TEST_REQUIRES_X86_AVX512SKX;
30345     for (uint32_t n = 17; n < 32; n++) {
30346       for (size_t k = 1; k <= 40; k += 9) {
30347         GemmMicrokernelTester()
30348           .mr(4)
30349           .nr(16)
30350           .kr(8)
30351           .sr(1)
30352           .m(4)
30353           .n(n)
30354           .k(k)
30355           .ks(3)
30356           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30357       }
30358     }
30359   }
30360 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_small_kernel)30361   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
30362     TEST_REQUIRES_X86_AVX512SKX;
30363     for (uint32_t n = 32; n <= 48; n += 16) {
30364       for (size_t k = 1; k <= 40; k += 9) {
30365         GemmMicrokernelTester()
30366           .mr(4)
30367           .nr(16)
30368           .kr(8)
30369           .sr(1)
30370           .m(4)
30371           .n(n)
30372           .k(k)
30373           .ks(3)
30374           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30375       }
30376     }
30377   }
30378 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm_subtile)30379   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
30380     TEST_REQUIRES_X86_AVX512SKX;
30381     for (size_t k = 1; k <= 40; k += 9) {
30382       for (uint32_t n = 1; n <= 16; n++) {
30383         for (uint32_t m = 1; m <= 4; m++) {
30384           GemmMicrokernelTester()
30385             .mr(4)
30386             .nr(16)
30387             .kr(8)
30388             .sr(1)
30389             .m(m)
30390             .n(n)
30391             .k(k)
30392             .cm_stride(19)
30393             .iterations(1)
30394             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30395         }
30396       }
30397     }
30398   }
30399 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,a_offset)30400   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
30401     TEST_REQUIRES_X86_AVX512SKX;
30402     for (size_t k = 1; k <= 40; k += 9) {
30403       GemmMicrokernelTester()
30404         .mr(4)
30405         .nr(16)
30406         .kr(8)
30407         .sr(1)
30408         .m(4)
30409         .n(16)
30410         .k(k)
30411         .ks(3)
30412         .a_offset(163)
30413         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30414     }
30415   }
30416 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,zero)30417   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
30418     TEST_REQUIRES_X86_AVX512SKX;
30419     for (size_t k = 1; k <= 40; k += 9) {
30420       for (uint32_t mz = 0; mz < 4; mz++) {
30421         GemmMicrokernelTester()
30422           .mr(4)
30423           .nr(16)
30424           .kr(8)
30425           .sr(1)
30426           .m(4)
30427           .n(16)
30428           .k(k)
30429           .ks(3)
30430           .a_offset(163)
30431           .zero_index(mz)
30432           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30433       }
30434     }
30435   }
30436 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmin)30437   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
30438     TEST_REQUIRES_X86_AVX512SKX;
30439     GemmMicrokernelTester()
30440       .mr(4)
30441       .nr(16)
30442       .kr(8)
30443       .sr(1)
30444       .m(4)
30445       .n(16)
30446       .k(8)
30447       .qmin(128)
30448       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30449   }
30450 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmax)30451   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
30452     TEST_REQUIRES_X86_AVX512SKX;
30453     GemmMicrokernelTester()
30454       .mr(4)
30455       .nr(16)
30456       .kr(8)
30457       .sr(1)
30458       .m(4)
30459       .n(16)
30460       .k(8)
30461       .qmax(128)
30462       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30463   }
30464 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm)30465   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
30466     TEST_REQUIRES_X86_AVX512SKX;
30467     GemmMicrokernelTester()
30468       .mr(4)
30469       .nr(16)
30470       .kr(8)
30471       .sr(1)
30472       .m(4)
30473       .n(16)
30474       .k(8)
30475       .cm_stride(19)
30476       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30477   }
30478 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_a_zero_point)30479   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_a_zero_point) {
30480     TEST_REQUIRES_X86_AVX512SKX;
30481     for (size_t k = 1; k <= 40; k += 9) {
30482       GemmMicrokernelTester()
30483         .mr(4)
30484         .nr(16)
30485         .kr(8)
30486         .sr(1)
30487         .m(4)
30488         .n(16)
30489         .k(k)
30490         .a_zero_point(0)
30491         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30492     }
30493   }
30494 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_b_zero_point)30495   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_b_zero_point) {
30496     TEST_REQUIRES_X86_AVX512SKX;
30497     for (size_t k = 1; k <= 40; k += 9) {
30498       GemmMicrokernelTester()
30499         .mr(4)
30500         .nr(16)
30501         .kr(8)
30502         .sr(1)
30503         .m(4)
30504         .n(16)
30505         .k(k)
30506         .b_zero_point(0)
30507         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30508     }
30509   }
30510 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,no_zero_point)30511   TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_zero_point) {
30512     TEST_REQUIRES_X86_AVX512SKX;
30513     for (size_t k = 1; k <= 40; k += 9) {
30514       GemmMicrokernelTester()
30515         .mr(4)
30516         .nr(16)
30517         .kr(8)
30518         .sr(1)
30519         .m(4)
30520         .n(16)
30521         .k(k)
30522         .a_zero_point(0)
30523         .b_zero_point(0)
30524         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
30525     }
30526   }
30527 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30528 
30529 
30530 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)30531   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30532     GemmMicrokernelTester()
30533       .mr(1)
30534       .nr(4)
30535       .kr(2)
30536       .sr(1)
30537       .m(1)
30538       .n(4)
30539       .k(8)
30540       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30541   }
30542 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)30543   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
30544     GemmMicrokernelTester()
30545       .mr(1)
30546       .nr(4)
30547       .kr(2)
30548       .sr(1)
30549       .m(1)
30550       .n(4)
30551       .k(8)
30552       .cn_stride(7)
30553       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30554   }
30555 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30556   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30557     for (uint32_t n = 1; n <= 4; n++) {
30558       for (uint32_t m = 1; m <= 1; m++) {
30559         GemmMicrokernelTester()
30560           .mr(1)
30561           .nr(4)
30562           .kr(2)
30563           .sr(1)
30564           .m(m)
30565           .n(n)
30566           .k(8)
30567           .iterations(1)
30568           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30569       }
30570     }
30571   }
30572 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30573   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30574     for (uint32_t m = 1; m <= 1; m++) {
30575       GemmMicrokernelTester()
30576         .mr(1)
30577         .nr(4)
30578         .kr(2)
30579         .sr(1)
30580         .m(m)
30581         .n(4)
30582         .k(8)
30583         .iterations(1)
30584         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30585     }
30586   }
30587 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30588   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30589     for (uint32_t n = 1; n <= 4; n++) {
30590       GemmMicrokernelTester()
30591         .mr(1)
30592         .nr(4)
30593         .kr(2)
30594         .sr(1)
30595         .m(1)
30596         .n(n)
30597         .k(8)
30598         .iterations(1)
30599         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30600     }
30601   }
30602 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)30603   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30604     for (size_t k = 1; k < 8; k++) {
30605       GemmMicrokernelTester()
30606         .mr(1)
30607         .nr(4)
30608         .kr(2)
30609         .sr(1)
30610         .m(1)
30611         .n(4)
30612         .k(k)
30613         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30614     }
30615   }
30616 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30617   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30618     for (size_t k = 1; k < 8; k++) {
30619       for (uint32_t n = 1; n <= 4; n++) {
30620         for (uint32_t m = 1; m <= 1; m++) {
30621           GemmMicrokernelTester()
30622             .mr(1)
30623             .nr(4)
30624             .kr(2)
30625             .sr(1)
30626             .m(m)
30627             .n(n)
30628             .k(k)
30629             .iterations(1)
30630             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30631         }
30632       }
30633     }
30634   }
30635 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)30636   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30637     for (size_t k = 9; k < 16; k++) {
30638       GemmMicrokernelTester()
30639         .mr(1)
30640         .nr(4)
30641         .kr(2)
30642         .sr(1)
30643         .m(1)
30644         .n(4)
30645         .k(k)
30646         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30647     }
30648   }
30649 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30650   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30651     for (size_t k = 9; k < 16; k++) {
30652       for (uint32_t n = 1; n <= 4; n++) {
30653         for (uint32_t m = 1; m <= 1; m++) {
30654           GemmMicrokernelTester()
30655             .mr(1)
30656             .nr(4)
30657             .kr(2)
30658             .sr(1)
30659             .m(m)
30660             .n(n)
30661             .k(k)
30662             .iterations(1)
30663             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30664         }
30665       }
30666     }
30667   }
30668 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)30669   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
30670     for (size_t k = 16; k <= 80; k += 8) {
30671       GemmMicrokernelTester()
30672         .mr(1)
30673         .nr(4)
30674         .kr(2)
30675         .sr(1)
30676         .m(1)
30677         .n(4)
30678         .k(k)
30679         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30680     }
30681   }
30682 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30683   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30684     for (size_t k = 16; k <= 80; k += 8) {
30685       for (uint32_t n = 1; n <= 4; n++) {
30686         for (uint32_t m = 1; m <= 1; m++) {
30687           GemmMicrokernelTester()
30688             .mr(1)
30689             .nr(4)
30690             .kr(2)
30691             .sr(1)
30692             .m(m)
30693             .n(n)
30694             .k(k)
30695             .iterations(1)
30696             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30697         }
30698       }
30699     }
30700   }
30701 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)30702   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30703     for (uint32_t n = 5; n < 8; n++) {
30704       for (size_t k = 1; k <= 40; k += 9) {
30705         GemmMicrokernelTester()
30706           .mr(1)
30707           .nr(4)
30708           .kr(2)
30709           .sr(1)
30710           .m(1)
30711           .n(n)
30712           .k(k)
30713           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30714       }
30715     }
30716   }
30717 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30718   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30719     for (uint32_t n = 5; n < 8; n++) {
30720       for (size_t k = 1; k <= 40; k += 9) {
30721         GemmMicrokernelTester()
30722           .mr(1)
30723           .nr(4)
30724           .kr(2)
30725           .sr(1)
30726           .m(1)
30727           .n(n)
30728           .k(k)
30729           .cn_stride(7)
30730           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30731       }
30732     }
30733   }
30734 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30735   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30736     for (uint32_t n = 5; n < 8; n++) {
30737       for (size_t k = 1; k <= 40; k += 9) {
30738         for (uint32_t m = 1; m <= 1; m++) {
30739           GemmMicrokernelTester()
30740             .mr(1)
30741             .nr(4)
30742             .kr(2)
30743             .sr(1)
30744             .m(m)
30745             .n(n)
30746             .k(k)
30747             .iterations(1)
30748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30749         }
30750       }
30751     }
30752   }
30753 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)30754   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
30755     for (uint32_t n = 8; n <= 12; n += 4) {
30756       for (size_t k = 1; k <= 40; k += 9) {
30757         GemmMicrokernelTester()
30758           .mr(1)
30759           .nr(4)
30760           .kr(2)
30761           .sr(1)
30762           .m(1)
30763           .n(n)
30764           .k(k)
30765           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30766       }
30767     }
30768   }
30769 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)30770   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
30771     for (uint32_t n = 8; n <= 12; n += 4) {
30772       for (size_t k = 1; k <= 40; k += 9) {
30773         GemmMicrokernelTester()
30774           .mr(1)
30775           .nr(4)
30776           .kr(2)
30777           .sr(1)
30778           .m(1)
30779           .n(n)
30780           .k(k)
30781           .cn_stride(7)
30782           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30783       }
30784     }
30785   }
30786 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)30787   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
30788     for (uint32_t n = 8; n <= 12; n += 4) {
30789       for (size_t k = 1; k <= 40; k += 9) {
30790         for (uint32_t m = 1; m <= 1; m++) {
30791           GemmMicrokernelTester()
30792             .mr(1)
30793             .nr(4)
30794             .kr(2)
30795             .sr(1)
30796             .m(m)
30797             .n(n)
30798             .k(k)
30799             .iterations(1)
30800             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30801         }
30802       }
30803     }
30804   }
30805 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)30806   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
30807     for (size_t k = 1; k <= 40; k += 9) {
30808       GemmMicrokernelTester()
30809         .mr(1)
30810         .nr(4)
30811         .kr(2)
30812         .sr(1)
30813         .m(1)
30814         .n(4)
30815         .k(k)
30816         .ks(3)
30817         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30818     }
30819   }
30820 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)30821   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
30822     for (size_t k = 1; k <= 40; k += 9) {
30823       for (uint32_t n = 1; n <= 4; n++) {
30824         for (uint32_t m = 1; m <= 1; m++) {
30825           GemmMicrokernelTester()
30826             .mr(1)
30827             .nr(4)
30828             .kr(2)
30829             .sr(1)
30830             .m(m)
30831             .n(n)
30832             .k(k)
30833             .ks(3)
30834             .iterations(1)
30835             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30836         }
30837       }
30838     }
30839   }
30840 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)30841   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
30842     for (uint32_t n = 5; n < 8; n++) {
30843       for (size_t k = 1; k <= 40; k += 9) {
30844         GemmMicrokernelTester()
30845           .mr(1)
30846           .nr(4)
30847           .kr(2)
30848           .sr(1)
30849           .m(1)
30850           .n(n)
30851           .k(k)
30852           .ks(3)
30853           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30854       }
30855     }
30856   }
30857 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)30858   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
30859     for (uint32_t n = 8; n <= 12; n += 4) {
30860       for (size_t k = 1; k <= 40; k += 9) {
30861         GemmMicrokernelTester()
30862           .mr(1)
30863           .nr(4)
30864           .kr(2)
30865           .sr(1)
30866           .m(1)
30867           .n(n)
30868           .k(k)
30869           .ks(3)
30870           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30871       }
30872     }
30873   }
30874 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)30875   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
30876     for (size_t k = 1; k <= 40; k += 9) {
30877       for (uint32_t n = 1; n <= 4; n++) {
30878         for (uint32_t m = 1; m <= 1; m++) {
30879           GemmMicrokernelTester()
30880             .mr(1)
30881             .nr(4)
30882             .kr(2)
30883             .sr(1)
30884             .m(m)
30885             .n(n)
30886             .k(k)
30887             .cm_stride(7)
30888             .iterations(1)
30889             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30890         }
30891       }
30892     }
30893   }
30894 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,a_offset)30895   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
30896     for (size_t k = 1; k <= 40; k += 9) {
30897       GemmMicrokernelTester()
30898         .mr(1)
30899         .nr(4)
30900         .kr(2)
30901         .sr(1)
30902         .m(1)
30903         .n(4)
30904         .k(k)
30905         .ks(3)
30906         .a_offset(43)
30907         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30908     }
30909   }
30910 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,zero)30911   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
30912     for (size_t k = 1; k <= 40; k += 9) {
30913       for (uint32_t mz = 0; mz < 1; mz++) {
30914         GemmMicrokernelTester()
30915           .mr(1)
30916           .nr(4)
30917           .kr(2)
30918           .sr(1)
30919           .m(1)
30920           .n(4)
30921           .k(k)
30922           .ks(3)
30923           .a_offset(43)
30924           .zero_index(mz)
30925           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30926       }
30927     }
30928   }
30929 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)30930   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
30931     GemmMicrokernelTester()
30932       .mr(1)
30933       .nr(4)
30934       .kr(2)
30935       .sr(1)
30936       .m(1)
30937       .n(4)
30938       .k(8)
30939       .qmin(128)
30940       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30941   }
30942 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)30943   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
30944     GemmMicrokernelTester()
30945       .mr(1)
30946       .nr(4)
30947       .kr(2)
30948       .sr(1)
30949       .m(1)
30950       .n(4)
30951       .k(8)
30952       .qmax(128)
30953       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30954   }
30955 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)30956   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
30957     GemmMicrokernelTester()
30958       .mr(1)
30959       .nr(4)
30960       .kr(2)
30961       .sr(1)
30962       .m(1)
30963       .n(4)
30964       .k(8)
30965       .cm_stride(7)
30966       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30967   }
30968 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)30969   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
30970     for (size_t k = 1; k <= 40; k += 9) {
30971       GemmMicrokernelTester()
30972         .mr(1)
30973         .nr(4)
30974         .kr(2)
30975         .sr(1)
30976         .m(1)
30977         .n(4)
30978         .k(k)
30979         .a_zero_point(0)
30980         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30981     }
30982   }
30983 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)30984   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
30985     for (size_t k = 1; k <= 40; k += 9) {
30986       GemmMicrokernelTester()
30987         .mr(1)
30988         .nr(4)
30989         .kr(2)
30990         .sr(1)
30991         .m(1)
30992         .n(4)
30993         .k(k)
30994         .b_zero_point(0)
30995         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30996     }
30997   }
30998 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)30999   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31000     for (size_t k = 1; k <= 40; k += 9) {
31001       GemmMicrokernelTester()
31002         .mr(1)
31003         .nr(4)
31004         .kr(2)
31005         .sr(1)
31006         .m(1)
31007         .n(4)
31008         .k(k)
31009         .a_zero_point(0)
31010         .b_zero_point(0)
31011         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31012     }
31013   }
31014 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31015 
31016 
31017 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)31018   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31019     GemmMicrokernelTester()
31020       .mr(1)
31021       .nr(4)
31022       .kr(2)
31023       .sr(4)
31024       .m(1)
31025       .n(4)
31026       .k(8)
31027       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31028   }
31029 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)31030   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
31031     GemmMicrokernelTester()
31032       .mr(1)
31033       .nr(4)
31034       .kr(2)
31035       .sr(4)
31036       .m(1)
31037       .n(4)
31038       .k(8)
31039       .cn_stride(7)
31040       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31041   }
31042 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31043   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31044     for (uint32_t n = 1; n <= 4; n++) {
31045       for (uint32_t m = 1; m <= 1; m++) {
31046         GemmMicrokernelTester()
31047           .mr(1)
31048           .nr(4)
31049           .kr(2)
31050           .sr(4)
31051           .m(m)
31052           .n(n)
31053           .k(8)
31054           .iterations(1)
31055           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31056       }
31057     }
31058   }
31059 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31060   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31061     for (uint32_t m = 1; m <= 1; m++) {
31062       GemmMicrokernelTester()
31063         .mr(1)
31064         .nr(4)
31065         .kr(2)
31066         .sr(4)
31067         .m(m)
31068         .n(4)
31069         .k(8)
31070         .iterations(1)
31071         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31072     }
31073   }
31074 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31075   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31076     for (uint32_t n = 1; n <= 4; n++) {
31077       GemmMicrokernelTester()
31078         .mr(1)
31079         .nr(4)
31080         .kr(2)
31081         .sr(4)
31082         .m(1)
31083         .n(n)
31084         .k(8)
31085         .iterations(1)
31086         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31087     }
31088   }
31089 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)31090   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31091     for (size_t k = 1; k < 8; k++) {
31092       GemmMicrokernelTester()
31093         .mr(1)
31094         .nr(4)
31095         .kr(2)
31096         .sr(4)
31097         .m(1)
31098         .n(4)
31099         .k(k)
31100         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31101     }
31102   }
31103 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31104   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31105     for (size_t k = 1; k < 8; k++) {
31106       for (uint32_t n = 1; n <= 4; n++) {
31107         for (uint32_t m = 1; m <= 1; m++) {
31108           GemmMicrokernelTester()
31109             .mr(1)
31110             .nr(4)
31111             .kr(2)
31112             .sr(4)
31113             .m(m)
31114             .n(n)
31115             .k(k)
31116             .iterations(1)
31117             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31118         }
31119       }
31120     }
31121   }
31122 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)31123   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31124     for (size_t k = 9; k < 16; k++) {
31125       GemmMicrokernelTester()
31126         .mr(1)
31127         .nr(4)
31128         .kr(2)
31129         .sr(4)
31130         .m(1)
31131         .n(4)
31132         .k(k)
31133         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31134     }
31135   }
31136 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31137   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31138     for (size_t k = 9; k < 16; k++) {
31139       for (uint32_t n = 1; n <= 4; n++) {
31140         for (uint32_t m = 1; m <= 1; m++) {
31141           GemmMicrokernelTester()
31142             .mr(1)
31143             .nr(4)
31144             .kr(2)
31145             .sr(4)
31146             .m(m)
31147             .n(n)
31148             .k(k)
31149             .iterations(1)
31150             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31151         }
31152       }
31153     }
31154   }
31155 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)31156   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
31157     for (size_t k = 16; k <= 80; k += 8) {
31158       GemmMicrokernelTester()
31159         .mr(1)
31160         .nr(4)
31161         .kr(2)
31162         .sr(4)
31163         .m(1)
31164         .n(4)
31165         .k(k)
31166         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31167     }
31168   }
31169 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31170   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31171     for (size_t k = 16; k <= 80; k += 8) {
31172       for (uint32_t n = 1; n <= 4; n++) {
31173         for (uint32_t m = 1; m <= 1; m++) {
31174           GemmMicrokernelTester()
31175             .mr(1)
31176             .nr(4)
31177             .kr(2)
31178             .sr(4)
31179             .m(m)
31180             .n(n)
31181             .k(k)
31182             .iterations(1)
31183             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31184         }
31185       }
31186     }
31187   }
31188 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)31189   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31190     for (uint32_t n = 5; n < 8; n++) {
31191       for (size_t k = 1; k <= 40; k += 9) {
31192         GemmMicrokernelTester()
31193           .mr(1)
31194           .nr(4)
31195           .kr(2)
31196           .sr(4)
31197           .m(1)
31198           .n(n)
31199           .k(k)
31200           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31201       }
31202     }
31203   }
31204 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31205   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31206     for (uint32_t n = 5; n < 8; n++) {
31207       for (size_t k = 1; k <= 40; k += 9) {
31208         GemmMicrokernelTester()
31209           .mr(1)
31210           .nr(4)
31211           .kr(2)
31212           .sr(4)
31213           .m(1)
31214           .n(n)
31215           .k(k)
31216           .cn_stride(7)
31217           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31218       }
31219     }
31220   }
31221 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31222   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31223     for (uint32_t n = 5; n < 8; n++) {
31224       for (size_t k = 1; k <= 40; k += 9) {
31225         for (uint32_t m = 1; m <= 1; m++) {
31226           GemmMicrokernelTester()
31227             .mr(1)
31228             .nr(4)
31229             .kr(2)
31230             .sr(4)
31231             .m(m)
31232             .n(n)
31233             .k(k)
31234             .iterations(1)
31235             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31236         }
31237       }
31238     }
31239   }
31240 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)31241   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
31242     for (uint32_t n = 8; n <= 12; n += 4) {
31243       for (size_t k = 1; k <= 40; k += 9) {
31244         GemmMicrokernelTester()
31245           .mr(1)
31246           .nr(4)
31247           .kr(2)
31248           .sr(4)
31249           .m(1)
31250           .n(n)
31251           .k(k)
31252           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31253       }
31254     }
31255   }
31256 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31257   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31258     for (uint32_t n = 8; n <= 12; n += 4) {
31259       for (size_t k = 1; k <= 40; k += 9) {
31260         GemmMicrokernelTester()
31261           .mr(1)
31262           .nr(4)
31263           .kr(2)
31264           .sr(4)
31265           .m(1)
31266           .n(n)
31267           .k(k)
31268           .cn_stride(7)
31269           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31270       }
31271     }
31272   }
31273 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31274   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31275     for (uint32_t n = 8; n <= 12; n += 4) {
31276       for (size_t k = 1; k <= 40; k += 9) {
31277         for (uint32_t m = 1; m <= 1; m++) {
31278           GemmMicrokernelTester()
31279             .mr(1)
31280             .nr(4)
31281             .kr(2)
31282             .sr(4)
31283             .m(m)
31284             .n(n)
31285             .k(k)
31286             .iterations(1)
31287             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31288         }
31289       }
31290     }
31291   }
31292 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)31293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
31294     for (size_t k = 1; k <= 40; k += 9) {
31295       GemmMicrokernelTester()
31296         .mr(1)
31297         .nr(4)
31298         .kr(2)
31299         .sr(4)
31300         .m(1)
31301         .n(4)
31302         .k(k)
31303         .ks(3)
31304         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31305     }
31306   }
31307 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31308   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31309     for (size_t k = 1; k <= 40; k += 9) {
31310       for (uint32_t n = 1; n <= 4; n++) {
31311         for (uint32_t m = 1; m <= 1; m++) {
31312           GemmMicrokernelTester()
31313             .mr(1)
31314             .nr(4)
31315             .kr(2)
31316             .sr(4)
31317             .m(m)
31318             .n(n)
31319             .k(k)
31320             .ks(3)
31321             .iterations(1)
31322             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31323         }
31324       }
31325     }
31326   }
31327 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31328   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31329     for (uint32_t n = 5; n < 8; n++) {
31330       for (size_t k = 1; k <= 40; k += 9) {
31331         GemmMicrokernelTester()
31332           .mr(1)
31333           .nr(4)
31334           .kr(2)
31335           .sr(4)
31336           .m(1)
31337           .n(n)
31338           .k(k)
31339           .ks(3)
31340           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31341       }
31342     }
31343   }
31344 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31345   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31346     for (uint32_t n = 8; n <= 12; n += 4) {
31347       for (size_t k = 1; k <= 40; k += 9) {
31348         GemmMicrokernelTester()
31349           .mr(1)
31350           .nr(4)
31351           .kr(2)
31352           .sr(4)
31353           .m(1)
31354           .n(n)
31355           .k(k)
31356           .ks(3)
31357           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31358       }
31359     }
31360   }
31361 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31362   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31363     for (size_t k = 1; k <= 40; k += 9) {
31364       for (uint32_t n = 1; n <= 4; n++) {
31365         for (uint32_t m = 1; m <= 1; m++) {
31366           GemmMicrokernelTester()
31367             .mr(1)
31368             .nr(4)
31369             .kr(2)
31370             .sr(4)
31371             .m(m)
31372             .n(n)
31373             .k(k)
31374             .cm_stride(7)
31375             .iterations(1)
31376             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31377         }
31378       }
31379     }
31380   }
31381 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)31382   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
31383     for (size_t k = 1; k <= 40; k += 9) {
31384       GemmMicrokernelTester()
31385         .mr(1)
31386         .nr(4)
31387         .kr(2)
31388         .sr(4)
31389         .m(1)
31390         .n(4)
31391         .k(k)
31392         .ks(3)
31393         .a_offset(43)
31394         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31395     }
31396   }
31397 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,zero)31398   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
31399     for (size_t k = 1; k <= 40; k += 9) {
31400       for (uint32_t mz = 0; mz < 1; mz++) {
31401         GemmMicrokernelTester()
31402           .mr(1)
31403           .nr(4)
31404           .kr(2)
31405           .sr(4)
31406           .m(1)
31407           .n(4)
31408           .k(k)
31409           .ks(3)
31410           .a_offset(43)
31411           .zero_index(mz)
31412           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31413       }
31414     }
31415   }
31416 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)31417   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
31418     GemmMicrokernelTester()
31419       .mr(1)
31420       .nr(4)
31421       .kr(2)
31422       .sr(4)
31423       .m(1)
31424       .n(4)
31425       .k(8)
31426       .qmin(128)
31427       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31428   }
31429 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)31430   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
31431     GemmMicrokernelTester()
31432       .mr(1)
31433       .nr(4)
31434       .kr(2)
31435       .sr(4)
31436       .m(1)
31437       .n(4)
31438       .k(8)
31439       .qmax(128)
31440       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31441   }
31442 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)31443   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
31444     GemmMicrokernelTester()
31445       .mr(1)
31446       .nr(4)
31447       .kr(2)
31448       .sr(4)
31449       .m(1)
31450       .n(4)
31451       .k(8)
31452       .cm_stride(7)
31453       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31454   }
31455 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31456   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31457     for (size_t k = 1; k <= 40; k += 9) {
31458       GemmMicrokernelTester()
31459         .mr(1)
31460         .nr(4)
31461         .kr(2)
31462         .sr(4)
31463         .m(1)
31464         .n(4)
31465         .k(k)
31466         .a_zero_point(0)
31467         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31468     }
31469   }
31470 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31471   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31472     for (size_t k = 1; k <= 40; k += 9) {
31473       GemmMicrokernelTester()
31474         .mr(1)
31475         .nr(4)
31476         .kr(2)
31477         .sr(4)
31478         .m(1)
31479         .n(4)
31480         .k(k)
31481         .b_zero_point(0)
31482         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31483     }
31484   }
31485 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)31486   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31487     for (size_t k = 1; k <= 40; k += 9) {
31488       GemmMicrokernelTester()
31489         .mr(1)
31490         .nr(4)
31491         .kr(2)
31492         .sr(4)
31493         .m(1)
31494         .n(4)
31495         .k(k)
31496         .a_zero_point(0)
31497         .b_zero_point(0)
31498         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31499     }
31500   }
31501 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31502 
31503 
31504 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)31505   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31506     GemmMicrokernelTester()
31507       .mr(2)
31508       .nr(4)
31509       .kr(2)
31510       .sr(4)
31511       .m(2)
31512       .n(4)
31513       .k(8)
31514       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31515   }
31516 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)31517   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
31518     GemmMicrokernelTester()
31519       .mr(2)
31520       .nr(4)
31521       .kr(2)
31522       .sr(4)
31523       .m(2)
31524       .n(4)
31525       .k(8)
31526       .cn_stride(7)
31527       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31528   }
31529 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31530   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31531     for (uint32_t n = 1; n <= 4; n++) {
31532       for (uint32_t m = 1; m <= 2; m++) {
31533         GemmMicrokernelTester()
31534           .mr(2)
31535           .nr(4)
31536           .kr(2)
31537           .sr(4)
31538           .m(m)
31539           .n(n)
31540           .k(8)
31541           .iterations(1)
31542           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31543       }
31544     }
31545   }
31546 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31547   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31548     for (uint32_t m = 1; m <= 2; m++) {
31549       GemmMicrokernelTester()
31550         .mr(2)
31551         .nr(4)
31552         .kr(2)
31553         .sr(4)
31554         .m(m)
31555         .n(4)
31556         .k(8)
31557         .iterations(1)
31558         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31559     }
31560   }
31561 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31562   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31563     for (uint32_t n = 1; n <= 4; n++) {
31564       GemmMicrokernelTester()
31565         .mr(2)
31566         .nr(4)
31567         .kr(2)
31568         .sr(4)
31569         .m(2)
31570         .n(n)
31571         .k(8)
31572         .iterations(1)
31573         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31574     }
31575   }
31576 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)31577   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31578     for (size_t k = 1; k < 8; k++) {
31579       GemmMicrokernelTester()
31580         .mr(2)
31581         .nr(4)
31582         .kr(2)
31583         .sr(4)
31584         .m(2)
31585         .n(4)
31586         .k(k)
31587         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31588     }
31589   }
31590 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31591   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31592     for (size_t k = 1; k < 8; k++) {
31593       for (uint32_t n = 1; n <= 4; n++) {
31594         for (uint32_t m = 1; m <= 2; m++) {
31595           GemmMicrokernelTester()
31596             .mr(2)
31597             .nr(4)
31598             .kr(2)
31599             .sr(4)
31600             .m(m)
31601             .n(n)
31602             .k(k)
31603             .iterations(1)
31604             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31605         }
31606       }
31607     }
31608   }
31609 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)31610   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31611     for (size_t k = 9; k < 16; k++) {
31612       GemmMicrokernelTester()
31613         .mr(2)
31614         .nr(4)
31615         .kr(2)
31616         .sr(4)
31617         .m(2)
31618         .n(4)
31619         .k(k)
31620         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31621     }
31622   }
31623 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31624   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31625     for (size_t k = 9; k < 16; k++) {
31626       for (uint32_t n = 1; n <= 4; n++) {
31627         for (uint32_t m = 1; m <= 2; m++) {
31628           GemmMicrokernelTester()
31629             .mr(2)
31630             .nr(4)
31631             .kr(2)
31632             .sr(4)
31633             .m(m)
31634             .n(n)
31635             .k(k)
31636             .iterations(1)
31637             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31638         }
31639       }
31640     }
31641   }
31642 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)31643   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
31644     for (size_t k = 16; k <= 80; k += 8) {
31645       GemmMicrokernelTester()
31646         .mr(2)
31647         .nr(4)
31648         .kr(2)
31649         .sr(4)
31650         .m(2)
31651         .n(4)
31652         .k(k)
31653         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31654     }
31655   }
31656 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31657   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31658     for (size_t k = 16; k <= 80; k += 8) {
31659       for (uint32_t n = 1; n <= 4; n++) {
31660         for (uint32_t m = 1; m <= 2; m++) {
31661           GemmMicrokernelTester()
31662             .mr(2)
31663             .nr(4)
31664             .kr(2)
31665             .sr(4)
31666             .m(m)
31667             .n(n)
31668             .k(k)
31669             .iterations(1)
31670             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31671         }
31672       }
31673     }
31674   }
31675 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)31676   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31677     for (uint32_t n = 5; n < 8; n++) {
31678       for (size_t k = 1; k <= 40; k += 9) {
31679         GemmMicrokernelTester()
31680           .mr(2)
31681           .nr(4)
31682           .kr(2)
31683           .sr(4)
31684           .m(2)
31685           .n(n)
31686           .k(k)
31687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31688       }
31689     }
31690   }
31691 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31692   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31693     for (uint32_t n = 5; n < 8; n++) {
31694       for (size_t k = 1; k <= 40; k += 9) {
31695         GemmMicrokernelTester()
31696           .mr(2)
31697           .nr(4)
31698           .kr(2)
31699           .sr(4)
31700           .m(2)
31701           .n(n)
31702           .k(k)
31703           .cn_stride(7)
31704           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31705       }
31706     }
31707   }
31708 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31709   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31710     for (uint32_t n = 5; n < 8; n++) {
31711       for (size_t k = 1; k <= 40; k += 9) {
31712         for (uint32_t m = 1; m <= 2; m++) {
31713           GemmMicrokernelTester()
31714             .mr(2)
31715             .nr(4)
31716             .kr(2)
31717             .sr(4)
31718             .m(m)
31719             .n(n)
31720             .k(k)
31721             .iterations(1)
31722             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31723         }
31724       }
31725     }
31726   }
31727 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)31728   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
31729     for (uint32_t n = 8; n <= 12; n += 4) {
31730       for (size_t k = 1; k <= 40; k += 9) {
31731         GemmMicrokernelTester()
31732           .mr(2)
31733           .nr(4)
31734           .kr(2)
31735           .sr(4)
31736           .m(2)
31737           .n(n)
31738           .k(k)
31739           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31740       }
31741     }
31742   }
31743 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31744   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31745     for (uint32_t n = 8; n <= 12; n += 4) {
31746       for (size_t k = 1; k <= 40; k += 9) {
31747         GemmMicrokernelTester()
31748           .mr(2)
31749           .nr(4)
31750           .kr(2)
31751           .sr(4)
31752           .m(2)
31753           .n(n)
31754           .k(k)
31755           .cn_stride(7)
31756           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31757       }
31758     }
31759   }
31760 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31761   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31762     for (uint32_t n = 8; n <= 12; n += 4) {
31763       for (size_t k = 1; k <= 40; k += 9) {
31764         for (uint32_t m = 1; m <= 2; m++) {
31765           GemmMicrokernelTester()
31766             .mr(2)
31767             .nr(4)
31768             .kr(2)
31769             .sr(4)
31770             .m(m)
31771             .n(n)
31772             .k(k)
31773             .iterations(1)
31774             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31775         }
31776       }
31777     }
31778   }
31779 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)31780   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
31781     for (size_t k = 1; k <= 40; k += 9) {
31782       GemmMicrokernelTester()
31783         .mr(2)
31784         .nr(4)
31785         .kr(2)
31786         .sr(4)
31787         .m(2)
31788         .n(4)
31789         .k(k)
31790         .ks(3)
31791         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31792     }
31793   }
31794 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31795   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31796     for (size_t k = 1; k <= 40; k += 9) {
31797       for (uint32_t n = 1; n <= 4; n++) {
31798         for (uint32_t m = 1; m <= 2; m++) {
31799           GemmMicrokernelTester()
31800             .mr(2)
31801             .nr(4)
31802             .kr(2)
31803             .sr(4)
31804             .m(m)
31805             .n(n)
31806             .k(k)
31807             .ks(3)
31808             .iterations(1)
31809             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31810         }
31811       }
31812     }
31813   }
31814 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31815   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31816     for (uint32_t n = 5; n < 8; n++) {
31817       for (size_t k = 1; k <= 40; k += 9) {
31818         GemmMicrokernelTester()
31819           .mr(2)
31820           .nr(4)
31821           .kr(2)
31822           .sr(4)
31823           .m(2)
31824           .n(n)
31825           .k(k)
31826           .ks(3)
31827           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31828       }
31829     }
31830   }
31831 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31832   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31833     for (uint32_t n = 8; n <= 12; n += 4) {
31834       for (size_t k = 1; k <= 40; k += 9) {
31835         GemmMicrokernelTester()
31836           .mr(2)
31837           .nr(4)
31838           .kr(2)
31839           .sr(4)
31840           .m(2)
31841           .n(n)
31842           .k(k)
31843           .ks(3)
31844           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31845       }
31846     }
31847   }
31848 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31849   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31850     for (size_t k = 1; k <= 40; k += 9) {
31851       for (uint32_t n = 1; n <= 4; n++) {
31852         for (uint32_t m = 1; m <= 2; m++) {
31853           GemmMicrokernelTester()
31854             .mr(2)
31855             .nr(4)
31856             .kr(2)
31857             .sr(4)
31858             .m(m)
31859             .n(n)
31860             .k(k)
31861             .cm_stride(7)
31862             .iterations(1)
31863             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31864         }
31865       }
31866     }
31867   }
31868 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)31869   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
31870     for (size_t k = 1; k <= 40; k += 9) {
31871       GemmMicrokernelTester()
31872         .mr(2)
31873         .nr(4)
31874         .kr(2)
31875         .sr(4)
31876         .m(2)
31877         .n(4)
31878         .k(k)
31879         .ks(3)
31880         .a_offset(83)
31881         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31882     }
31883   }
31884 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,zero)31885   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
31886     for (size_t k = 1; k <= 40; k += 9) {
31887       for (uint32_t mz = 0; mz < 2; mz++) {
31888         GemmMicrokernelTester()
31889           .mr(2)
31890           .nr(4)
31891           .kr(2)
31892           .sr(4)
31893           .m(2)
31894           .n(4)
31895           .k(k)
31896           .ks(3)
31897           .a_offset(83)
31898           .zero_index(mz)
31899           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31900       }
31901     }
31902   }
31903 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)31904   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
31905     GemmMicrokernelTester()
31906       .mr(2)
31907       .nr(4)
31908       .kr(2)
31909       .sr(4)
31910       .m(2)
31911       .n(4)
31912       .k(8)
31913       .qmin(128)
31914       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31915   }
31916 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)31917   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
31918     GemmMicrokernelTester()
31919       .mr(2)
31920       .nr(4)
31921       .kr(2)
31922       .sr(4)
31923       .m(2)
31924       .n(4)
31925       .k(8)
31926       .qmax(128)
31927       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31928   }
31929 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)31930   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
31931     GemmMicrokernelTester()
31932       .mr(2)
31933       .nr(4)
31934       .kr(2)
31935       .sr(4)
31936       .m(2)
31937       .n(4)
31938       .k(8)
31939       .cm_stride(7)
31940       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31941   }
31942 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31943   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31944     for (size_t k = 1; k <= 40; k += 9) {
31945       GemmMicrokernelTester()
31946         .mr(2)
31947         .nr(4)
31948         .kr(2)
31949         .sr(4)
31950         .m(2)
31951         .n(4)
31952         .k(k)
31953         .a_zero_point(0)
31954         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31955     }
31956   }
31957 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31958   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31959     for (size_t k = 1; k <= 40; k += 9) {
31960       GemmMicrokernelTester()
31961         .mr(2)
31962         .nr(4)
31963         .kr(2)
31964         .sr(4)
31965         .m(2)
31966         .n(4)
31967         .k(k)
31968         .b_zero_point(0)
31969         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31970     }
31971   }
31972 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)31973   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31974     for (size_t k = 1; k <= 40; k += 9) {
31975       GemmMicrokernelTester()
31976         .mr(2)
31977         .nr(4)
31978         .kr(2)
31979         .sr(4)
31980         .m(2)
31981         .n(4)
31982         .k(k)
31983         .a_zero_point(0)
31984         .b_zero_point(0)
31985         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31986     }
31987   }
31988 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31989 
31990 
31991 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)31992   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31993     GemmMicrokernelTester()
31994       .mr(2)
31995       .nr(4)
31996       .kr(2)
31997       .sr(4)
31998       .m(2)
31999       .n(4)
32000       .k(8)
32001       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32002   }
32003 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)32004   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
32005     GemmMicrokernelTester()
32006       .mr(2)
32007       .nr(4)
32008       .kr(2)
32009       .sr(4)
32010       .m(2)
32011       .n(4)
32012       .k(8)
32013       .cn_stride(7)
32014       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32015   }
32016 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)32017   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
32018     for (uint32_t n = 1; n <= 4; n++) {
32019       for (uint32_t m = 1; m <= 2; m++) {
32020         GemmMicrokernelTester()
32021           .mr(2)
32022           .nr(4)
32023           .kr(2)
32024           .sr(4)
32025           .m(m)
32026           .n(n)
32027           .k(8)
32028           .iterations(1)
32029           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32030       }
32031     }
32032   }
32033 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)32034   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32035     for (uint32_t m = 1; m <= 2; m++) {
32036       GemmMicrokernelTester()
32037         .mr(2)
32038         .nr(4)
32039         .kr(2)
32040         .sr(4)
32041         .m(m)
32042         .n(4)
32043         .k(8)
32044         .iterations(1)
32045         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32046     }
32047   }
32048 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)32049   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32050     for (uint32_t n = 1; n <= 4; n++) {
32051       GemmMicrokernelTester()
32052         .mr(2)
32053         .nr(4)
32054         .kr(2)
32055         .sr(4)
32056         .m(2)
32057         .n(n)
32058         .k(8)
32059         .iterations(1)
32060         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32061     }
32062   }
32063 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)32064   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32065     for (size_t k = 1; k < 8; k++) {
32066       GemmMicrokernelTester()
32067         .mr(2)
32068         .nr(4)
32069         .kr(2)
32070         .sr(4)
32071         .m(2)
32072         .n(4)
32073         .k(k)
32074         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32075     }
32076   }
32077 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)32078   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32079     for (size_t k = 1; k < 8; k++) {
32080       for (uint32_t n = 1; n <= 4; n++) {
32081         for (uint32_t m = 1; m <= 2; m++) {
32082           GemmMicrokernelTester()
32083             .mr(2)
32084             .nr(4)
32085             .kr(2)
32086             .sr(4)
32087             .m(m)
32088             .n(n)
32089             .k(k)
32090             .iterations(1)
32091             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32092         }
32093       }
32094     }
32095   }
32096 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)32097   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32098     for (size_t k = 9; k < 16; k++) {
32099       GemmMicrokernelTester()
32100         .mr(2)
32101         .nr(4)
32102         .kr(2)
32103         .sr(4)
32104         .m(2)
32105         .n(4)
32106         .k(k)
32107         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32108     }
32109   }
32110 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)32111   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32112     for (size_t k = 9; k < 16; k++) {
32113       for (uint32_t n = 1; n <= 4; n++) {
32114         for (uint32_t m = 1; m <= 2; m++) {
32115           GemmMicrokernelTester()
32116             .mr(2)
32117             .nr(4)
32118             .kr(2)
32119             .sr(4)
32120             .m(m)
32121             .n(n)
32122             .k(k)
32123             .iterations(1)
32124             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32125         }
32126       }
32127     }
32128   }
32129 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)32130   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
32131     for (size_t k = 16; k <= 80; k += 8) {
32132       GemmMicrokernelTester()
32133         .mr(2)
32134         .nr(4)
32135         .kr(2)
32136         .sr(4)
32137         .m(2)
32138         .n(4)
32139         .k(k)
32140         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32141     }
32142   }
32143 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)32144   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32145     for (size_t k = 16; k <= 80; k += 8) {
32146       for (uint32_t n = 1; n <= 4; n++) {
32147         for (uint32_t m = 1; m <= 2; m++) {
32148           GemmMicrokernelTester()
32149             .mr(2)
32150             .nr(4)
32151             .kr(2)
32152             .sr(4)
32153             .m(m)
32154             .n(n)
32155             .k(k)
32156             .iterations(1)
32157             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32158         }
32159       }
32160     }
32161   }
32162 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)32163   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32164     for (uint32_t n = 5; n < 8; n++) {
32165       for (size_t k = 1; k <= 40; k += 9) {
32166         GemmMicrokernelTester()
32167           .mr(2)
32168           .nr(4)
32169           .kr(2)
32170           .sr(4)
32171           .m(2)
32172           .n(n)
32173           .k(k)
32174           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32175       }
32176     }
32177   }
32178 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)32179   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32180     for (uint32_t n = 5; n < 8; n++) {
32181       for (size_t k = 1; k <= 40; k += 9) {
32182         GemmMicrokernelTester()
32183           .mr(2)
32184           .nr(4)
32185           .kr(2)
32186           .sr(4)
32187           .m(2)
32188           .n(n)
32189           .k(k)
32190           .cn_stride(7)
32191           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32192       }
32193     }
32194   }
32195 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)32196   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32197     for (uint32_t n = 5; n < 8; n++) {
32198       for (size_t k = 1; k <= 40; k += 9) {
32199         for (uint32_t m = 1; m <= 2; m++) {
32200           GemmMicrokernelTester()
32201             .mr(2)
32202             .nr(4)
32203             .kr(2)
32204             .sr(4)
32205             .m(m)
32206             .n(n)
32207             .k(k)
32208             .iterations(1)
32209             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32210         }
32211       }
32212     }
32213   }
32214 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)32215   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
32216     for (uint32_t n = 8; n <= 12; n += 4) {
32217       for (size_t k = 1; k <= 40; k += 9) {
32218         GemmMicrokernelTester()
32219           .mr(2)
32220           .nr(4)
32221           .kr(2)
32222           .sr(4)
32223           .m(2)
32224           .n(n)
32225           .k(k)
32226           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32227       }
32228     }
32229   }
32230 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)32231   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32232     for (uint32_t n = 8; n <= 12; n += 4) {
32233       for (size_t k = 1; k <= 40; k += 9) {
32234         GemmMicrokernelTester()
32235           .mr(2)
32236           .nr(4)
32237           .kr(2)
32238           .sr(4)
32239           .m(2)
32240           .n(n)
32241           .k(k)
32242           .cn_stride(7)
32243           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32244       }
32245     }
32246   }
32247 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)32248   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32249     for (uint32_t n = 8; n <= 12; n += 4) {
32250       for (size_t k = 1; k <= 40; k += 9) {
32251         for (uint32_t m = 1; m <= 2; m++) {
32252           GemmMicrokernelTester()
32253             .mr(2)
32254             .nr(4)
32255             .kr(2)
32256             .sr(4)
32257             .m(m)
32258             .n(n)
32259             .k(k)
32260             .iterations(1)
32261             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32262         }
32263       }
32264     }
32265   }
32266 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)32267   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
32268     for (size_t k = 1; k <= 40; k += 9) {
32269       GemmMicrokernelTester()
32270         .mr(2)
32271         .nr(4)
32272         .kr(2)
32273         .sr(4)
32274         .m(2)
32275         .n(4)
32276         .k(k)
32277         .ks(3)
32278         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32279     }
32280   }
32281 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)32282   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32283     for (size_t k = 1; k <= 40; k += 9) {
32284       for (uint32_t n = 1; n <= 4; n++) {
32285         for (uint32_t m = 1; m <= 2; m++) {
32286           GemmMicrokernelTester()
32287             .mr(2)
32288             .nr(4)
32289             .kr(2)
32290             .sr(4)
32291             .m(m)
32292             .n(n)
32293             .k(k)
32294             .ks(3)
32295             .iterations(1)
32296             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32297         }
32298       }
32299     }
32300   }
32301 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)32302   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32303     for (uint32_t n = 5; n < 8; n++) {
32304       for (size_t k = 1; k <= 40; k += 9) {
32305         GemmMicrokernelTester()
32306           .mr(2)
32307           .nr(4)
32308           .kr(2)
32309           .sr(4)
32310           .m(2)
32311           .n(n)
32312           .k(k)
32313           .ks(3)
32314           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32315       }
32316     }
32317   }
32318 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)32319   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32320     for (uint32_t n = 8; n <= 12; n += 4) {
32321       for (size_t k = 1; k <= 40; k += 9) {
32322         GemmMicrokernelTester()
32323           .mr(2)
32324           .nr(4)
32325           .kr(2)
32326           .sr(4)
32327           .m(2)
32328           .n(n)
32329           .k(k)
32330           .ks(3)
32331           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32332       }
32333     }
32334   }
32335 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)32336   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32337     for (size_t k = 1; k <= 40; k += 9) {
32338       for (uint32_t n = 1; n <= 4; n++) {
32339         for (uint32_t m = 1; m <= 2; m++) {
32340           GemmMicrokernelTester()
32341             .mr(2)
32342             .nr(4)
32343             .kr(2)
32344             .sr(4)
32345             .m(m)
32346             .n(n)
32347             .k(k)
32348             .cm_stride(7)
32349             .iterations(1)
32350             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32351         }
32352       }
32353     }
32354   }
32355 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)32356   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
32357     for (size_t k = 1; k <= 40; k += 9) {
32358       GemmMicrokernelTester()
32359         .mr(2)
32360         .nr(4)
32361         .kr(2)
32362         .sr(4)
32363         .m(2)
32364         .n(4)
32365         .k(k)
32366         .ks(3)
32367         .a_offset(83)
32368         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32369     }
32370   }
32371 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,zero)32372   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
32373     for (size_t k = 1; k <= 40; k += 9) {
32374       for (uint32_t mz = 0; mz < 2; mz++) {
32375         GemmMicrokernelTester()
32376           .mr(2)
32377           .nr(4)
32378           .kr(2)
32379           .sr(4)
32380           .m(2)
32381           .n(4)
32382           .k(k)
32383           .ks(3)
32384           .a_offset(83)
32385           .zero_index(mz)
32386           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32387       }
32388     }
32389   }
32390 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)32391   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
32392     GemmMicrokernelTester()
32393       .mr(2)
32394       .nr(4)
32395       .kr(2)
32396       .sr(4)
32397       .m(2)
32398       .n(4)
32399       .k(8)
32400       .qmin(128)
32401       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32402   }
32403 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)32404   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
32405     GemmMicrokernelTester()
32406       .mr(2)
32407       .nr(4)
32408       .kr(2)
32409       .sr(4)
32410       .m(2)
32411       .n(4)
32412       .k(8)
32413       .qmax(128)
32414       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32415   }
32416 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)32417   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
32418     GemmMicrokernelTester()
32419       .mr(2)
32420       .nr(4)
32421       .kr(2)
32422       .sr(4)
32423       .m(2)
32424       .n(4)
32425       .k(8)
32426       .cm_stride(7)
32427       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32428   }
32429 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)32430   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
32431     for (size_t k = 1; k <= 40; k += 9) {
32432       GemmMicrokernelTester()
32433         .mr(2)
32434         .nr(4)
32435         .kr(2)
32436         .sr(4)
32437         .m(2)
32438         .n(4)
32439         .k(k)
32440         .a_zero_point(0)
32441         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32442     }
32443   }
32444 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)32445   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
32446     for (size_t k = 1; k <= 40; k += 9) {
32447       GemmMicrokernelTester()
32448         .mr(2)
32449         .nr(4)
32450         .kr(2)
32451         .sr(4)
32452         .m(2)
32453         .n(4)
32454         .k(k)
32455         .b_zero_point(0)
32456         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32457     }
32458   }
32459 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)32460   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
32461     for (size_t k = 1; k <= 40; k += 9) {
32462       GemmMicrokernelTester()
32463         .mr(2)
32464         .nr(4)
32465         .kr(2)
32466         .sr(4)
32467         .m(2)
32468         .n(4)
32469         .k(k)
32470         .a_zero_point(0)
32471         .b_zero_point(0)
32472         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32473     }
32474   }
32475 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32476 
32477 
32478 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)32479   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
32480     GemmMicrokernelTester()
32481       .mr(2)
32482       .nr(4)
32483       .kr(8)
32484       .sr(1)
32485       .m(2)
32486       .n(4)
32487       .k(8)
32488       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32489   }
32490 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)32491   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
32492     GemmMicrokernelTester()
32493       .mr(2)
32494       .nr(4)
32495       .kr(8)
32496       .sr(1)
32497       .m(2)
32498       .n(4)
32499       .k(8)
32500       .cn_stride(7)
32501       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32502   }
32503 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)32504   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
32505     for (uint32_t n = 1; n <= 4; n++) {
32506       for (uint32_t m = 1; m <= 2; m++) {
32507         GemmMicrokernelTester()
32508           .mr(2)
32509           .nr(4)
32510           .kr(8)
32511           .sr(1)
32512           .m(m)
32513           .n(n)
32514           .k(8)
32515           .iterations(1)
32516           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32517       }
32518     }
32519   }
32520 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)32521   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
32522     for (uint32_t m = 1; m <= 2; m++) {
32523       GemmMicrokernelTester()
32524         .mr(2)
32525         .nr(4)
32526         .kr(8)
32527         .sr(1)
32528         .m(m)
32529         .n(4)
32530         .k(8)
32531         .iterations(1)
32532         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32533     }
32534   }
32535 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)32536   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
32537     for (uint32_t n = 1; n <= 4; n++) {
32538       GemmMicrokernelTester()
32539         .mr(2)
32540         .nr(4)
32541         .kr(8)
32542         .sr(1)
32543         .m(2)
32544         .n(n)
32545         .k(8)
32546         .iterations(1)
32547         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32548     }
32549   }
32550 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)32551   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
32552     for (size_t k = 1; k < 8; k++) {
32553       GemmMicrokernelTester()
32554         .mr(2)
32555         .nr(4)
32556         .kr(8)
32557         .sr(1)
32558         .m(2)
32559         .n(4)
32560         .k(k)
32561         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32562     }
32563   }
32564 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)32565   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
32566     for (size_t k = 1; k < 8; k++) {
32567       for (uint32_t n = 1; n <= 4; n++) {
32568         for (uint32_t m = 1; m <= 2; m++) {
32569           GemmMicrokernelTester()
32570             .mr(2)
32571             .nr(4)
32572             .kr(8)
32573             .sr(1)
32574             .m(m)
32575             .n(n)
32576             .k(k)
32577             .iterations(1)
32578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32579         }
32580       }
32581     }
32582   }
32583 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)32584   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
32585     for (size_t k = 9; k < 16; k++) {
32586       GemmMicrokernelTester()
32587         .mr(2)
32588         .nr(4)
32589         .kr(8)
32590         .sr(1)
32591         .m(2)
32592         .n(4)
32593         .k(k)
32594         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32595     }
32596   }
32597 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)32598   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
32599     for (size_t k = 9; k < 16; k++) {
32600       for (uint32_t n = 1; n <= 4; n++) {
32601         for (uint32_t m = 1; m <= 2; m++) {
32602           GemmMicrokernelTester()
32603             .mr(2)
32604             .nr(4)
32605             .kr(8)
32606             .sr(1)
32607             .m(m)
32608             .n(n)
32609             .k(k)
32610             .iterations(1)
32611             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32612         }
32613       }
32614     }
32615   }
32616 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)32617   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
32618     for (size_t k = 16; k <= 80; k += 8) {
32619       GemmMicrokernelTester()
32620         .mr(2)
32621         .nr(4)
32622         .kr(8)
32623         .sr(1)
32624         .m(2)
32625         .n(4)
32626         .k(k)
32627         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32628     }
32629   }
32630 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)32631   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
32632     for (size_t k = 16; k <= 80; k += 8) {
32633       for (uint32_t n = 1; n <= 4; n++) {
32634         for (uint32_t m = 1; m <= 2; m++) {
32635           GemmMicrokernelTester()
32636             .mr(2)
32637             .nr(4)
32638             .kr(8)
32639             .sr(1)
32640             .m(m)
32641             .n(n)
32642             .k(k)
32643             .iterations(1)
32644             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32645         }
32646       }
32647     }
32648   }
32649 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)32650   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
32651     for (uint32_t n = 5; n < 8; n++) {
32652       for (size_t k = 1; k <= 40; k += 9) {
32653         GemmMicrokernelTester()
32654           .mr(2)
32655           .nr(4)
32656           .kr(8)
32657           .sr(1)
32658           .m(2)
32659           .n(n)
32660           .k(k)
32661           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32662       }
32663     }
32664   }
32665 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)32666   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
32667     for (uint32_t n = 5; n < 8; n++) {
32668       for (size_t k = 1; k <= 40; k += 9) {
32669         GemmMicrokernelTester()
32670           .mr(2)
32671           .nr(4)
32672           .kr(8)
32673           .sr(1)
32674           .m(2)
32675           .n(n)
32676           .k(k)
32677           .cn_stride(7)
32678           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32679       }
32680     }
32681   }
32682 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)32683   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
32684     for (uint32_t n = 5; n < 8; n++) {
32685       for (size_t k = 1; k <= 40; k += 9) {
32686         for (uint32_t m = 1; m <= 2; m++) {
32687           GemmMicrokernelTester()
32688             .mr(2)
32689             .nr(4)
32690             .kr(8)
32691             .sr(1)
32692             .m(m)
32693             .n(n)
32694             .k(k)
32695             .iterations(1)
32696             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32697         }
32698       }
32699     }
32700   }
32701 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)32702   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
32703     for (uint32_t n = 8; n <= 12; n += 4) {
32704       for (size_t k = 1; k <= 40; k += 9) {
32705         GemmMicrokernelTester()
32706           .mr(2)
32707           .nr(4)
32708           .kr(8)
32709           .sr(1)
32710           .m(2)
32711           .n(n)
32712           .k(k)
32713           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32714       }
32715     }
32716   }
32717 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)32718   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
32719     for (uint32_t n = 8; n <= 12; n += 4) {
32720       for (size_t k = 1; k <= 40; k += 9) {
32721         GemmMicrokernelTester()
32722           .mr(2)
32723           .nr(4)
32724           .kr(8)
32725           .sr(1)
32726           .m(2)
32727           .n(n)
32728           .k(k)
32729           .cn_stride(7)
32730           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32731       }
32732     }
32733   }
32734 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)32735   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
32736     for (uint32_t n = 8; n <= 12; n += 4) {
32737       for (size_t k = 1; k <= 40; k += 9) {
32738         for (uint32_t m = 1; m <= 2; m++) {
32739           GemmMicrokernelTester()
32740             .mr(2)
32741             .nr(4)
32742             .kr(8)
32743             .sr(1)
32744             .m(m)
32745             .n(n)
32746             .k(k)
32747             .iterations(1)
32748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32749         }
32750       }
32751     }
32752   }
32753 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)32754   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
32755     for (size_t k = 1; k <= 40; k += 9) {
32756       GemmMicrokernelTester()
32757         .mr(2)
32758         .nr(4)
32759         .kr(8)
32760         .sr(1)
32761         .m(2)
32762         .n(4)
32763         .k(k)
32764         .ks(3)
32765         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32766     }
32767   }
32768 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)32769   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
32770     for (size_t k = 1; k <= 40; k += 9) {
32771       for (uint32_t n = 1; n <= 4; n++) {
32772         for (uint32_t m = 1; m <= 2; m++) {
32773           GemmMicrokernelTester()
32774             .mr(2)
32775             .nr(4)
32776             .kr(8)
32777             .sr(1)
32778             .m(m)
32779             .n(n)
32780             .k(k)
32781             .ks(3)
32782             .iterations(1)
32783             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32784         }
32785       }
32786     }
32787   }
32788 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)32789   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
32790     for (uint32_t n = 5; n < 8; n++) {
32791       for (size_t k = 1; k <= 40; k += 9) {
32792         GemmMicrokernelTester()
32793           .mr(2)
32794           .nr(4)
32795           .kr(8)
32796           .sr(1)
32797           .m(2)
32798           .n(n)
32799           .k(k)
32800           .ks(3)
32801           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32802       }
32803     }
32804   }
32805 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)32806   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
32807     for (uint32_t n = 8; n <= 12; n += 4) {
32808       for (size_t k = 1; k <= 40; k += 9) {
32809         GemmMicrokernelTester()
32810           .mr(2)
32811           .nr(4)
32812           .kr(8)
32813           .sr(1)
32814           .m(2)
32815           .n(n)
32816           .k(k)
32817           .ks(3)
32818           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32819       }
32820     }
32821   }
32822 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)32823   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
32824     for (size_t k = 1; k <= 40; k += 9) {
32825       for (uint32_t n = 1; n <= 4; n++) {
32826         for (uint32_t m = 1; m <= 2; m++) {
32827           GemmMicrokernelTester()
32828             .mr(2)
32829             .nr(4)
32830             .kr(8)
32831             .sr(1)
32832             .m(m)
32833             .n(n)
32834             .k(k)
32835             .cm_stride(7)
32836             .iterations(1)
32837             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32838         }
32839       }
32840     }
32841   }
32842 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,a_offset)32843   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
32844     for (size_t k = 1; k <= 40; k += 9) {
32845       GemmMicrokernelTester()
32846         .mr(2)
32847         .nr(4)
32848         .kr(8)
32849         .sr(1)
32850         .m(2)
32851         .n(4)
32852         .k(k)
32853         .ks(3)
32854         .a_offset(83)
32855         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32856     }
32857   }
32858 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,zero)32859   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
32860     for (size_t k = 1; k <= 40; k += 9) {
32861       for (uint32_t mz = 0; mz < 2; mz++) {
32862         GemmMicrokernelTester()
32863           .mr(2)
32864           .nr(4)
32865           .kr(8)
32866           .sr(1)
32867           .m(2)
32868           .n(4)
32869           .k(k)
32870           .ks(3)
32871           .a_offset(83)
32872           .zero_index(mz)
32873           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32874       }
32875     }
32876   }
32877 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)32878   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
32879     GemmMicrokernelTester()
32880       .mr(2)
32881       .nr(4)
32882       .kr(8)
32883       .sr(1)
32884       .m(2)
32885       .n(4)
32886       .k(8)
32887       .qmin(128)
32888       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32889   }
32890 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)32891   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
32892     GemmMicrokernelTester()
32893       .mr(2)
32894       .nr(4)
32895       .kr(8)
32896       .sr(1)
32897       .m(2)
32898       .n(4)
32899       .k(8)
32900       .qmax(128)
32901       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32902   }
32903 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)32904   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
32905     GemmMicrokernelTester()
32906       .mr(2)
32907       .nr(4)
32908       .kr(8)
32909       .sr(1)
32910       .m(2)
32911       .n(4)
32912       .k(8)
32913       .cm_stride(7)
32914       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32915   }
32916 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)32917   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
32918     for (size_t k = 1; k <= 40; k += 9) {
32919       GemmMicrokernelTester()
32920         .mr(2)
32921         .nr(4)
32922         .kr(8)
32923         .sr(1)
32924         .m(2)
32925         .n(4)
32926         .k(k)
32927         .a_zero_point(0)
32928         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32929     }
32930   }
32931 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)32932   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
32933     for (size_t k = 1; k <= 40; k += 9) {
32934       GemmMicrokernelTester()
32935         .mr(2)
32936         .nr(4)
32937         .kr(8)
32938         .sr(1)
32939         .m(2)
32940         .n(4)
32941         .k(k)
32942         .b_zero_point(0)
32943         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32944     }
32945   }
32946 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)32947   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
32948     for (size_t k = 1; k <= 40; k += 9) {
32949       GemmMicrokernelTester()
32950         .mr(2)
32951         .nr(4)
32952         .kr(8)
32953         .sr(1)
32954         .m(2)
32955         .n(4)
32956         .k(k)
32957         .a_zero_point(0)
32958         .b_zero_point(0)
32959         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32960     }
32961   }
32962 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32963 
32964 
32965 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)32966   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
32967     GemmMicrokernelTester()
32968       .mr(3)
32969       .nr(4)
32970       .kr(2)
32971       .sr(1)
32972       .m(3)
32973       .n(4)
32974       .k(8)
32975       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32976   }
32977 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)32978   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
32979     GemmMicrokernelTester()
32980       .mr(3)
32981       .nr(4)
32982       .kr(2)
32983       .sr(1)
32984       .m(3)
32985       .n(4)
32986       .k(8)
32987       .cn_stride(7)
32988       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32989   }
32990 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)32991   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
32992     for (uint32_t n = 1; n <= 4; n++) {
32993       for (uint32_t m = 1; m <= 3; m++) {
32994         GemmMicrokernelTester()
32995           .mr(3)
32996           .nr(4)
32997           .kr(2)
32998           .sr(1)
32999           .m(m)
33000           .n(n)
33001           .k(8)
33002           .iterations(1)
33003           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33004       }
33005     }
33006   }
33007 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33008   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33009     for (uint32_t m = 1; m <= 3; m++) {
33010       GemmMicrokernelTester()
33011         .mr(3)
33012         .nr(4)
33013         .kr(2)
33014         .sr(1)
33015         .m(m)
33016         .n(4)
33017         .k(8)
33018         .iterations(1)
33019         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33020     }
33021   }
33022 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33023   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33024     for (uint32_t n = 1; n <= 4; n++) {
33025       GemmMicrokernelTester()
33026         .mr(3)
33027         .nr(4)
33028         .kr(2)
33029         .sr(1)
33030         .m(3)
33031         .n(n)
33032         .k(8)
33033         .iterations(1)
33034         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33035     }
33036   }
33037 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)33038   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33039     for (size_t k = 1; k < 8; k++) {
33040       GemmMicrokernelTester()
33041         .mr(3)
33042         .nr(4)
33043         .kr(2)
33044         .sr(1)
33045         .m(3)
33046         .n(4)
33047         .k(k)
33048         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33049     }
33050   }
33051 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33052   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33053     for (size_t k = 1; k < 8; k++) {
33054       for (uint32_t n = 1; n <= 4; n++) {
33055         for (uint32_t m = 1; m <= 3; m++) {
33056           GemmMicrokernelTester()
33057             .mr(3)
33058             .nr(4)
33059             .kr(2)
33060             .sr(1)
33061             .m(m)
33062             .n(n)
33063             .k(k)
33064             .iterations(1)
33065             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33066         }
33067       }
33068     }
33069   }
33070 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)33071   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33072     for (size_t k = 9; k < 16; k++) {
33073       GemmMicrokernelTester()
33074         .mr(3)
33075         .nr(4)
33076         .kr(2)
33077         .sr(1)
33078         .m(3)
33079         .n(4)
33080         .k(k)
33081         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33082     }
33083   }
33084 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33085   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33086     for (size_t k = 9; k < 16; k++) {
33087       for (uint32_t n = 1; n <= 4; n++) {
33088         for (uint32_t m = 1; m <= 3; m++) {
33089           GemmMicrokernelTester()
33090             .mr(3)
33091             .nr(4)
33092             .kr(2)
33093             .sr(1)
33094             .m(m)
33095             .n(n)
33096             .k(k)
33097             .iterations(1)
33098             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33099         }
33100       }
33101     }
33102   }
33103 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)33104   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
33105     for (size_t k = 16; k <= 80; k += 8) {
33106       GemmMicrokernelTester()
33107         .mr(3)
33108         .nr(4)
33109         .kr(2)
33110         .sr(1)
33111         .m(3)
33112         .n(4)
33113         .k(k)
33114         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33115     }
33116   }
33117 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33118   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33119     for (size_t k = 16; k <= 80; k += 8) {
33120       for (uint32_t n = 1; n <= 4; n++) {
33121         for (uint32_t m = 1; m <= 3; m++) {
33122           GemmMicrokernelTester()
33123             .mr(3)
33124             .nr(4)
33125             .kr(2)
33126             .sr(1)
33127             .m(m)
33128             .n(n)
33129             .k(k)
33130             .iterations(1)
33131             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33132         }
33133       }
33134     }
33135   }
33136 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)33137   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33138     for (uint32_t n = 5; n < 8; n++) {
33139       for (size_t k = 1; k <= 40; k += 9) {
33140         GemmMicrokernelTester()
33141           .mr(3)
33142           .nr(4)
33143           .kr(2)
33144           .sr(1)
33145           .m(3)
33146           .n(n)
33147           .k(k)
33148           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33149       }
33150     }
33151   }
33152 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33153   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33154     for (uint32_t n = 5; n < 8; n++) {
33155       for (size_t k = 1; k <= 40; k += 9) {
33156         GemmMicrokernelTester()
33157           .mr(3)
33158           .nr(4)
33159           .kr(2)
33160           .sr(1)
33161           .m(3)
33162           .n(n)
33163           .k(k)
33164           .cn_stride(7)
33165           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33166       }
33167     }
33168   }
33169 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33170   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33171     for (uint32_t n = 5; n < 8; n++) {
33172       for (size_t k = 1; k <= 40; k += 9) {
33173         for (uint32_t m = 1; m <= 3; m++) {
33174           GemmMicrokernelTester()
33175             .mr(3)
33176             .nr(4)
33177             .kr(2)
33178             .sr(1)
33179             .m(m)
33180             .n(n)
33181             .k(k)
33182             .iterations(1)
33183             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33184         }
33185       }
33186     }
33187   }
33188 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)33189   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
33190     for (uint32_t n = 8; n <= 12; n += 4) {
33191       for (size_t k = 1; k <= 40; k += 9) {
33192         GemmMicrokernelTester()
33193           .mr(3)
33194           .nr(4)
33195           .kr(2)
33196           .sr(1)
33197           .m(3)
33198           .n(n)
33199           .k(k)
33200           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33201       }
33202     }
33203   }
33204 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33205   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33206     for (uint32_t n = 8; n <= 12; n += 4) {
33207       for (size_t k = 1; k <= 40; k += 9) {
33208         GemmMicrokernelTester()
33209           .mr(3)
33210           .nr(4)
33211           .kr(2)
33212           .sr(1)
33213           .m(3)
33214           .n(n)
33215           .k(k)
33216           .cn_stride(7)
33217           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33218       }
33219     }
33220   }
33221 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33222   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33223     for (uint32_t n = 8; n <= 12; n += 4) {
33224       for (size_t k = 1; k <= 40; k += 9) {
33225         for (uint32_t m = 1; m <= 3; m++) {
33226           GemmMicrokernelTester()
33227             .mr(3)
33228             .nr(4)
33229             .kr(2)
33230             .sr(1)
33231             .m(m)
33232             .n(n)
33233             .k(k)
33234             .iterations(1)
33235             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33236         }
33237       }
33238     }
33239   }
33240 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)33241   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
33242     for (size_t k = 1; k <= 40; k += 9) {
33243       GemmMicrokernelTester()
33244         .mr(3)
33245         .nr(4)
33246         .kr(2)
33247         .sr(1)
33248         .m(3)
33249         .n(4)
33250         .k(k)
33251         .ks(3)
33252         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33253     }
33254   }
33255 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33256   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33257     for (size_t k = 1; k <= 40; k += 9) {
33258       for (uint32_t n = 1; n <= 4; n++) {
33259         for (uint32_t m = 1; m <= 3; m++) {
33260           GemmMicrokernelTester()
33261             .mr(3)
33262             .nr(4)
33263             .kr(2)
33264             .sr(1)
33265             .m(m)
33266             .n(n)
33267             .k(k)
33268             .ks(3)
33269             .iterations(1)
33270             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33271         }
33272       }
33273     }
33274   }
33275 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33276   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33277     for (uint32_t n = 5; n < 8; n++) {
33278       for (size_t k = 1; k <= 40; k += 9) {
33279         GemmMicrokernelTester()
33280           .mr(3)
33281           .nr(4)
33282           .kr(2)
33283           .sr(1)
33284           .m(3)
33285           .n(n)
33286           .k(k)
33287           .ks(3)
33288           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33289       }
33290     }
33291   }
33292 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33293   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33294     for (uint32_t n = 8; n <= 12; n += 4) {
33295       for (size_t k = 1; k <= 40; k += 9) {
33296         GemmMicrokernelTester()
33297           .mr(3)
33298           .nr(4)
33299           .kr(2)
33300           .sr(1)
33301           .m(3)
33302           .n(n)
33303           .k(k)
33304           .ks(3)
33305           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33306       }
33307     }
33308   }
33309 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33310   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33311     for (size_t k = 1; k <= 40; k += 9) {
33312       for (uint32_t n = 1; n <= 4; n++) {
33313         for (uint32_t m = 1; m <= 3; m++) {
33314           GemmMicrokernelTester()
33315             .mr(3)
33316             .nr(4)
33317             .kr(2)
33318             .sr(1)
33319             .m(m)
33320             .n(n)
33321             .k(k)
33322             .cm_stride(7)
33323             .iterations(1)
33324             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33325         }
33326       }
33327     }
33328   }
33329 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,a_offset)33330   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
33331     for (size_t k = 1; k <= 40; k += 9) {
33332       GemmMicrokernelTester()
33333         .mr(3)
33334         .nr(4)
33335         .kr(2)
33336         .sr(1)
33337         .m(3)
33338         .n(4)
33339         .k(k)
33340         .ks(3)
33341         .a_offset(127)
33342         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33343     }
33344   }
33345 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,zero)33346   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
33347     for (size_t k = 1; k <= 40; k += 9) {
33348       for (uint32_t mz = 0; mz < 3; mz++) {
33349         GemmMicrokernelTester()
33350           .mr(3)
33351           .nr(4)
33352           .kr(2)
33353           .sr(1)
33354           .m(3)
33355           .n(4)
33356           .k(k)
33357           .ks(3)
33358           .a_offset(127)
33359           .zero_index(mz)
33360           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33361       }
33362     }
33363   }
33364 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)33365   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
33366     GemmMicrokernelTester()
33367       .mr(3)
33368       .nr(4)
33369       .kr(2)
33370       .sr(1)
33371       .m(3)
33372       .n(4)
33373       .k(8)
33374       .qmin(128)
33375       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33376   }
33377 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)33378   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
33379     GemmMicrokernelTester()
33380       .mr(3)
33381       .nr(4)
33382       .kr(2)
33383       .sr(1)
33384       .m(3)
33385       .n(4)
33386       .k(8)
33387       .qmax(128)
33388       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33389   }
33390 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)33391   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
33392     GemmMicrokernelTester()
33393       .mr(3)
33394       .nr(4)
33395       .kr(2)
33396       .sr(1)
33397       .m(3)
33398       .n(4)
33399       .k(8)
33400       .cm_stride(7)
33401       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33402   }
33403 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)33404   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
33405     for (size_t k = 1; k <= 40; k += 9) {
33406       GemmMicrokernelTester()
33407         .mr(3)
33408         .nr(4)
33409         .kr(2)
33410         .sr(1)
33411         .m(3)
33412         .n(4)
33413         .k(k)
33414         .a_zero_point(0)
33415         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33416     }
33417   }
33418 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)33419   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
33420     for (size_t k = 1; k <= 40; k += 9) {
33421       GemmMicrokernelTester()
33422         .mr(3)
33423         .nr(4)
33424         .kr(2)
33425         .sr(1)
33426         .m(3)
33427         .n(4)
33428         .k(k)
33429         .b_zero_point(0)
33430         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33431     }
33432   }
33433 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)33434   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
33435     for (size_t k = 1; k <= 40; k += 9) {
33436       GemmMicrokernelTester()
33437         .mr(3)
33438         .nr(4)
33439         .kr(2)
33440         .sr(1)
33441         .m(3)
33442         .n(4)
33443         .k(k)
33444         .a_zero_point(0)
33445         .b_zero_point(0)
33446         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33447     }
33448   }
33449 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33450 
33451 
33452 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)33453   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
33454     GemmMicrokernelTester()
33455       .mr(3)
33456       .nr(4)
33457       .kr(2)
33458       .sr(4)
33459       .m(3)
33460       .n(4)
33461       .k(8)
33462       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33463   }
33464 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)33465   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
33466     GemmMicrokernelTester()
33467       .mr(3)
33468       .nr(4)
33469       .kr(2)
33470       .sr(4)
33471       .m(3)
33472       .n(4)
33473       .k(8)
33474       .cn_stride(7)
33475       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33476   }
33477 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)33478   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
33479     for (uint32_t n = 1; n <= 4; n++) {
33480       for (uint32_t m = 1; m <= 3; m++) {
33481         GemmMicrokernelTester()
33482           .mr(3)
33483           .nr(4)
33484           .kr(2)
33485           .sr(4)
33486           .m(m)
33487           .n(n)
33488           .k(8)
33489           .iterations(1)
33490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33491       }
33492     }
33493   }
33494 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33495   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33496     for (uint32_t m = 1; m <= 3; m++) {
33497       GemmMicrokernelTester()
33498         .mr(3)
33499         .nr(4)
33500         .kr(2)
33501         .sr(4)
33502         .m(m)
33503         .n(4)
33504         .k(8)
33505         .iterations(1)
33506         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33507     }
33508   }
33509 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33510   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33511     for (uint32_t n = 1; n <= 4; n++) {
33512       GemmMicrokernelTester()
33513         .mr(3)
33514         .nr(4)
33515         .kr(2)
33516         .sr(4)
33517         .m(3)
33518         .n(n)
33519         .k(8)
33520         .iterations(1)
33521         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33522     }
33523   }
33524 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)33525   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33526     for (size_t k = 1; k < 8; k++) {
33527       GemmMicrokernelTester()
33528         .mr(3)
33529         .nr(4)
33530         .kr(2)
33531         .sr(4)
33532         .m(3)
33533         .n(4)
33534         .k(k)
33535         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33536     }
33537   }
33538 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33539   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33540     for (size_t k = 1; k < 8; k++) {
33541       for (uint32_t n = 1; n <= 4; n++) {
33542         for (uint32_t m = 1; m <= 3; m++) {
33543           GemmMicrokernelTester()
33544             .mr(3)
33545             .nr(4)
33546             .kr(2)
33547             .sr(4)
33548             .m(m)
33549             .n(n)
33550             .k(k)
33551             .iterations(1)
33552             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33553         }
33554       }
33555     }
33556   }
33557 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)33558   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33559     for (size_t k = 9; k < 16; k++) {
33560       GemmMicrokernelTester()
33561         .mr(3)
33562         .nr(4)
33563         .kr(2)
33564         .sr(4)
33565         .m(3)
33566         .n(4)
33567         .k(k)
33568         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33569     }
33570   }
33571 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33572   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33573     for (size_t k = 9; k < 16; k++) {
33574       for (uint32_t n = 1; n <= 4; n++) {
33575         for (uint32_t m = 1; m <= 3; m++) {
33576           GemmMicrokernelTester()
33577             .mr(3)
33578             .nr(4)
33579             .kr(2)
33580             .sr(4)
33581             .m(m)
33582             .n(n)
33583             .k(k)
33584             .iterations(1)
33585             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33586         }
33587       }
33588     }
33589   }
33590 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)33591   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
33592     for (size_t k = 16; k <= 80; k += 8) {
33593       GemmMicrokernelTester()
33594         .mr(3)
33595         .nr(4)
33596         .kr(2)
33597         .sr(4)
33598         .m(3)
33599         .n(4)
33600         .k(k)
33601         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33602     }
33603   }
33604 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33605   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33606     for (size_t k = 16; k <= 80; k += 8) {
33607       for (uint32_t n = 1; n <= 4; n++) {
33608         for (uint32_t m = 1; m <= 3; m++) {
33609           GemmMicrokernelTester()
33610             .mr(3)
33611             .nr(4)
33612             .kr(2)
33613             .sr(4)
33614             .m(m)
33615             .n(n)
33616             .k(k)
33617             .iterations(1)
33618             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33619         }
33620       }
33621     }
33622   }
33623 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)33624   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33625     for (uint32_t n = 5; n < 8; n++) {
33626       for (size_t k = 1; k <= 40; k += 9) {
33627         GemmMicrokernelTester()
33628           .mr(3)
33629           .nr(4)
33630           .kr(2)
33631           .sr(4)
33632           .m(3)
33633           .n(n)
33634           .k(k)
33635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33636       }
33637     }
33638   }
33639 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33640   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33641     for (uint32_t n = 5; n < 8; n++) {
33642       for (size_t k = 1; k <= 40; k += 9) {
33643         GemmMicrokernelTester()
33644           .mr(3)
33645           .nr(4)
33646           .kr(2)
33647           .sr(4)
33648           .m(3)
33649           .n(n)
33650           .k(k)
33651           .cn_stride(7)
33652           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33653       }
33654     }
33655   }
33656 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33657   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33658     for (uint32_t n = 5; n < 8; n++) {
33659       for (size_t k = 1; k <= 40; k += 9) {
33660         for (uint32_t m = 1; m <= 3; m++) {
33661           GemmMicrokernelTester()
33662             .mr(3)
33663             .nr(4)
33664             .kr(2)
33665             .sr(4)
33666             .m(m)
33667             .n(n)
33668             .k(k)
33669             .iterations(1)
33670             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33671         }
33672       }
33673     }
33674   }
33675 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)33676   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
33677     for (uint32_t n = 8; n <= 12; n += 4) {
33678       for (size_t k = 1; k <= 40; k += 9) {
33679         GemmMicrokernelTester()
33680           .mr(3)
33681           .nr(4)
33682           .kr(2)
33683           .sr(4)
33684           .m(3)
33685           .n(n)
33686           .k(k)
33687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33688       }
33689     }
33690   }
33691 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33692   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33693     for (uint32_t n = 8; n <= 12; n += 4) {
33694       for (size_t k = 1; k <= 40; k += 9) {
33695         GemmMicrokernelTester()
33696           .mr(3)
33697           .nr(4)
33698           .kr(2)
33699           .sr(4)
33700           .m(3)
33701           .n(n)
33702           .k(k)
33703           .cn_stride(7)
33704           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33705       }
33706     }
33707   }
33708 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33709   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33710     for (uint32_t n = 8; n <= 12; n += 4) {
33711       for (size_t k = 1; k <= 40; k += 9) {
33712         for (uint32_t m = 1; m <= 3; m++) {
33713           GemmMicrokernelTester()
33714             .mr(3)
33715             .nr(4)
33716             .kr(2)
33717             .sr(4)
33718             .m(m)
33719             .n(n)
33720             .k(k)
33721             .iterations(1)
33722             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33723         }
33724       }
33725     }
33726   }
33727 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)33728   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
33729     for (size_t k = 1; k <= 40; k += 9) {
33730       GemmMicrokernelTester()
33731         .mr(3)
33732         .nr(4)
33733         .kr(2)
33734         .sr(4)
33735         .m(3)
33736         .n(4)
33737         .k(k)
33738         .ks(3)
33739         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33740     }
33741   }
33742 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33743   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33744     for (size_t k = 1; k <= 40; k += 9) {
33745       for (uint32_t n = 1; n <= 4; n++) {
33746         for (uint32_t m = 1; m <= 3; m++) {
33747           GemmMicrokernelTester()
33748             .mr(3)
33749             .nr(4)
33750             .kr(2)
33751             .sr(4)
33752             .m(m)
33753             .n(n)
33754             .k(k)
33755             .ks(3)
33756             .iterations(1)
33757             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33758         }
33759       }
33760     }
33761   }
33762 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33763   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33764     for (uint32_t n = 5; n < 8; n++) {
33765       for (size_t k = 1; k <= 40; k += 9) {
33766         GemmMicrokernelTester()
33767           .mr(3)
33768           .nr(4)
33769           .kr(2)
33770           .sr(4)
33771           .m(3)
33772           .n(n)
33773           .k(k)
33774           .ks(3)
33775           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33776       }
33777     }
33778   }
33779 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33780   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33781     for (uint32_t n = 8; n <= 12; n += 4) {
33782       for (size_t k = 1; k <= 40; k += 9) {
33783         GemmMicrokernelTester()
33784           .mr(3)
33785           .nr(4)
33786           .kr(2)
33787           .sr(4)
33788           .m(3)
33789           .n(n)
33790           .k(k)
33791           .ks(3)
33792           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33793       }
33794     }
33795   }
33796 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33797   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33798     for (size_t k = 1; k <= 40; k += 9) {
33799       for (uint32_t n = 1; n <= 4; n++) {
33800         for (uint32_t m = 1; m <= 3; m++) {
33801           GemmMicrokernelTester()
33802             .mr(3)
33803             .nr(4)
33804             .kr(2)
33805             .sr(4)
33806             .m(m)
33807             .n(n)
33808             .k(k)
33809             .cm_stride(7)
33810             .iterations(1)
33811             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33812         }
33813       }
33814     }
33815   }
33816 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)33817   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
33818     for (size_t k = 1; k <= 40; k += 9) {
33819       GemmMicrokernelTester()
33820         .mr(3)
33821         .nr(4)
33822         .kr(2)
33823         .sr(4)
33824         .m(3)
33825         .n(4)
33826         .k(k)
33827         .ks(3)
33828         .a_offset(127)
33829         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33830     }
33831   }
33832 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,zero)33833   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
33834     for (size_t k = 1; k <= 40; k += 9) {
33835       for (uint32_t mz = 0; mz < 3; mz++) {
33836         GemmMicrokernelTester()
33837           .mr(3)
33838           .nr(4)
33839           .kr(2)
33840           .sr(4)
33841           .m(3)
33842           .n(4)
33843           .k(k)
33844           .ks(3)
33845           .a_offset(127)
33846           .zero_index(mz)
33847           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33848       }
33849     }
33850   }
33851 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)33852   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
33853     GemmMicrokernelTester()
33854       .mr(3)
33855       .nr(4)
33856       .kr(2)
33857       .sr(4)
33858       .m(3)
33859       .n(4)
33860       .k(8)
33861       .qmin(128)
33862       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33863   }
33864 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)33865   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
33866     GemmMicrokernelTester()
33867       .mr(3)
33868       .nr(4)
33869       .kr(2)
33870       .sr(4)
33871       .m(3)
33872       .n(4)
33873       .k(8)
33874       .qmax(128)
33875       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33876   }
33877 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)33878   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
33879     GemmMicrokernelTester()
33880       .mr(3)
33881       .nr(4)
33882       .kr(2)
33883       .sr(4)
33884       .m(3)
33885       .n(4)
33886       .k(8)
33887       .cm_stride(7)
33888       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33889   }
33890 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)33891   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
33892     for (size_t k = 1; k <= 40; k += 9) {
33893       GemmMicrokernelTester()
33894         .mr(3)
33895         .nr(4)
33896         .kr(2)
33897         .sr(4)
33898         .m(3)
33899         .n(4)
33900         .k(k)
33901         .a_zero_point(0)
33902         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33903     }
33904   }
33905 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)33906   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
33907     for (size_t k = 1; k <= 40; k += 9) {
33908       GemmMicrokernelTester()
33909         .mr(3)
33910         .nr(4)
33911         .kr(2)
33912         .sr(4)
33913         .m(3)
33914         .n(4)
33915         .k(k)
33916         .b_zero_point(0)
33917         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33918     }
33919   }
33920 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)33921   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
33922     for (size_t k = 1; k <= 40; k += 9) {
33923       GemmMicrokernelTester()
33924         .mr(3)
33925         .nr(4)
33926         .kr(2)
33927         .sr(4)
33928         .m(3)
33929         .n(4)
33930         .k(k)
33931         .a_zero_point(0)
33932         .b_zero_point(0)
33933         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33934     }
33935   }
33936 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33937 
33938 
33939 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)33940   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33941     GemmMicrokernelTester()
33942       .mr(4)
33943       .nr(4)
33944       .kr(2)
33945       .sr(1)
33946       .m(4)
33947       .n(4)
33948       .k(8)
33949       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33950   }
33951 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)33952   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
33953     GemmMicrokernelTester()
33954       .mr(4)
33955       .nr(4)
33956       .kr(2)
33957       .sr(1)
33958       .m(4)
33959       .n(4)
33960       .k(8)
33961       .cn_stride(7)
33962       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33963   }
33964 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33965   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33966     for (uint32_t n = 1; n <= 4; n++) {
33967       for (uint32_t m = 1; m <= 4; m++) {
33968         GemmMicrokernelTester()
33969           .mr(4)
33970           .nr(4)
33971           .kr(2)
33972           .sr(1)
33973           .m(m)
33974           .n(n)
33975           .k(8)
33976           .iterations(1)
33977           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33978       }
33979     }
33980   }
33981 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33982   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33983     for (uint32_t m = 1; m <= 4; m++) {
33984       GemmMicrokernelTester()
33985         .mr(4)
33986         .nr(4)
33987         .kr(2)
33988         .sr(1)
33989         .m(m)
33990         .n(4)
33991         .k(8)
33992         .iterations(1)
33993         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
33994     }
33995   }
33996 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33997   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33998     for (uint32_t n = 1; n <= 4; n++) {
33999       GemmMicrokernelTester()
34000         .mr(4)
34001         .nr(4)
34002         .kr(2)
34003         .sr(1)
34004         .m(4)
34005         .n(n)
34006         .k(8)
34007         .iterations(1)
34008         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34009     }
34010   }
34011 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)34012   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34013     for (size_t k = 1; k < 8; k++) {
34014       GemmMicrokernelTester()
34015         .mr(4)
34016         .nr(4)
34017         .kr(2)
34018         .sr(1)
34019         .m(4)
34020         .n(4)
34021         .k(k)
34022         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34023     }
34024   }
34025 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34026   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34027     for (size_t k = 1; k < 8; k++) {
34028       for (uint32_t n = 1; n <= 4; n++) {
34029         for (uint32_t m = 1; m <= 4; m++) {
34030           GemmMicrokernelTester()
34031             .mr(4)
34032             .nr(4)
34033             .kr(2)
34034             .sr(1)
34035             .m(m)
34036             .n(n)
34037             .k(k)
34038             .iterations(1)
34039             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34040         }
34041       }
34042     }
34043   }
34044 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)34045   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34046     for (size_t k = 9; k < 16; k++) {
34047       GemmMicrokernelTester()
34048         .mr(4)
34049         .nr(4)
34050         .kr(2)
34051         .sr(1)
34052         .m(4)
34053         .n(4)
34054         .k(k)
34055         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34056     }
34057   }
34058 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34059   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34060     for (size_t k = 9; k < 16; k++) {
34061       for (uint32_t n = 1; n <= 4; n++) {
34062         for (uint32_t m = 1; m <= 4; m++) {
34063           GemmMicrokernelTester()
34064             .mr(4)
34065             .nr(4)
34066             .kr(2)
34067             .sr(1)
34068             .m(m)
34069             .n(n)
34070             .k(k)
34071             .iterations(1)
34072             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34073         }
34074       }
34075     }
34076   }
34077 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)34078   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
34079     for (size_t k = 16; k <= 80; k += 8) {
34080       GemmMicrokernelTester()
34081         .mr(4)
34082         .nr(4)
34083         .kr(2)
34084         .sr(1)
34085         .m(4)
34086         .n(4)
34087         .k(k)
34088         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34089     }
34090   }
34091 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34092   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34093     for (size_t k = 16; k <= 80; k += 8) {
34094       for (uint32_t n = 1; n <= 4; n++) {
34095         for (uint32_t m = 1; m <= 4; m++) {
34096           GemmMicrokernelTester()
34097             .mr(4)
34098             .nr(4)
34099             .kr(2)
34100             .sr(1)
34101             .m(m)
34102             .n(n)
34103             .k(k)
34104             .iterations(1)
34105             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34106         }
34107       }
34108     }
34109   }
34110 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)34111   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34112     for (uint32_t n = 5; n < 8; n++) {
34113       for (size_t k = 1; k <= 40; k += 9) {
34114         GemmMicrokernelTester()
34115           .mr(4)
34116           .nr(4)
34117           .kr(2)
34118           .sr(1)
34119           .m(4)
34120           .n(n)
34121           .k(k)
34122           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34123       }
34124     }
34125   }
34126 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34127   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34128     for (uint32_t n = 5; n < 8; n++) {
34129       for (size_t k = 1; k <= 40; k += 9) {
34130         GemmMicrokernelTester()
34131           .mr(4)
34132           .nr(4)
34133           .kr(2)
34134           .sr(1)
34135           .m(4)
34136           .n(n)
34137           .k(k)
34138           .cn_stride(7)
34139           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34140       }
34141     }
34142   }
34143 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34144   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34145     for (uint32_t n = 5; n < 8; n++) {
34146       for (size_t k = 1; k <= 40; k += 9) {
34147         for (uint32_t m = 1; m <= 4; m++) {
34148           GemmMicrokernelTester()
34149             .mr(4)
34150             .nr(4)
34151             .kr(2)
34152             .sr(1)
34153             .m(m)
34154             .n(n)
34155             .k(k)
34156             .iterations(1)
34157             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34158         }
34159       }
34160     }
34161   }
34162 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)34163   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
34164     for (uint32_t n = 8; n <= 12; n += 4) {
34165       for (size_t k = 1; k <= 40; k += 9) {
34166         GemmMicrokernelTester()
34167           .mr(4)
34168           .nr(4)
34169           .kr(2)
34170           .sr(1)
34171           .m(4)
34172           .n(n)
34173           .k(k)
34174           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34175       }
34176     }
34177   }
34178 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34179   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34180     for (uint32_t n = 8; n <= 12; n += 4) {
34181       for (size_t k = 1; k <= 40; k += 9) {
34182         GemmMicrokernelTester()
34183           .mr(4)
34184           .nr(4)
34185           .kr(2)
34186           .sr(1)
34187           .m(4)
34188           .n(n)
34189           .k(k)
34190           .cn_stride(7)
34191           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34192       }
34193     }
34194   }
34195 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34196   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34197     for (uint32_t n = 8; n <= 12; n += 4) {
34198       for (size_t k = 1; k <= 40; k += 9) {
34199         for (uint32_t m = 1; m <= 4; m++) {
34200           GemmMicrokernelTester()
34201             .mr(4)
34202             .nr(4)
34203             .kr(2)
34204             .sr(1)
34205             .m(m)
34206             .n(n)
34207             .k(k)
34208             .iterations(1)
34209             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34210         }
34211       }
34212     }
34213   }
34214 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)34215   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
34216     for (size_t k = 1; k <= 40; k += 9) {
34217       GemmMicrokernelTester()
34218         .mr(4)
34219         .nr(4)
34220         .kr(2)
34221         .sr(1)
34222         .m(4)
34223         .n(4)
34224         .k(k)
34225         .ks(3)
34226         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34227     }
34228   }
34229 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)34230   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
34231     for (size_t k = 1; k <= 40; k += 9) {
34232       for (uint32_t n = 1; n <= 4; n++) {
34233         for (uint32_t m = 1; m <= 4; m++) {
34234           GemmMicrokernelTester()
34235             .mr(4)
34236             .nr(4)
34237             .kr(2)
34238             .sr(1)
34239             .m(m)
34240             .n(n)
34241             .k(k)
34242             .ks(3)
34243             .iterations(1)
34244             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34245         }
34246       }
34247     }
34248   }
34249 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34250   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34251     for (uint32_t n = 5; n < 8; n++) {
34252       for (size_t k = 1; k <= 40; k += 9) {
34253         GemmMicrokernelTester()
34254           .mr(4)
34255           .nr(4)
34256           .kr(2)
34257           .sr(1)
34258           .m(4)
34259           .n(n)
34260           .k(k)
34261           .ks(3)
34262           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34263       }
34264     }
34265   }
34266 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34267   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34268     for (uint32_t n = 8; n <= 12; n += 4) {
34269       for (size_t k = 1; k <= 40; k += 9) {
34270         GemmMicrokernelTester()
34271           .mr(4)
34272           .nr(4)
34273           .kr(2)
34274           .sr(1)
34275           .m(4)
34276           .n(n)
34277           .k(k)
34278           .ks(3)
34279           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34280       }
34281     }
34282   }
34283 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34284   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34285     for (size_t k = 1; k <= 40; k += 9) {
34286       for (uint32_t n = 1; n <= 4; n++) {
34287         for (uint32_t m = 1; m <= 4; m++) {
34288           GemmMicrokernelTester()
34289             .mr(4)
34290             .nr(4)
34291             .kr(2)
34292             .sr(1)
34293             .m(m)
34294             .n(n)
34295             .k(k)
34296             .cm_stride(7)
34297             .iterations(1)
34298             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34299         }
34300       }
34301     }
34302   }
34303 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,a_offset)34304   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
34305     for (size_t k = 1; k <= 40; k += 9) {
34306       GemmMicrokernelTester()
34307         .mr(4)
34308         .nr(4)
34309         .kr(2)
34310         .sr(1)
34311         .m(4)
34312         .n(4)
34313         .k(k)
34314         .ks(3)
34315         .a_offset(163)
34316         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34317     }
34318   }
34319 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,zero)34320   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, zero) {
34321     for (size_t k = 1; k <= 40; k += 9) {
34322       for (uint32_t mz = 0; mz < 4; mz++) {
34323         GemmMicrokernelTester()
34324           .mr(4)
34325           .nr(4)
34326           .kr(2)
34327           .sr(1)
34328           .m(4)
34329           .n(4)
34330           .k(k)
34331           .ks(3)
34332           .a_offset(163)
34333           .zero_index(mz)
34334           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34335       }
34336     }
34337   }
34338 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)34339   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
34340     GemmMicrokernelTester()
34341       .mr(4)
34342       .nr(4)
34343       .kr(2)
34344       .sr(1)
34345       .m(4)
34346       .n(4)
34347       .k(8)
34348       .qmin(128)
34349       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34350   }
34351 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)34352   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
34353     GemmMicrokernelTester()
34354       .mr(4)
34355       .nr(4)
34356       .kr(2)
34357       .sr(1)
34358       .m(4)
34359       .n(4)
34360       .k(8)
34361       .qmax(128)
34362       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34363   }
34364 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)34365   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
34366     GemmMicrokernelTester()
34367       .mr(4)
34368       .nr(4)
34369       .kr(2)
34370       .sr(1)
34371       .m(4)
34372       .n(4)
34373       .k(8)
34374       .cm_stride(7)
34375       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34376   }
34377 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)34378   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
34379     for (size_t k = 1; k <= 40; k += 9) {
34380       GemmMicrokernelTester()
34381         .mr(4)
34382         .nr(4)
34383         .kr(2)
34384         .sr(1)
34385         .m(4)
34386         .n(4)
34387         .k(k)
34388         .a_zero_point(0)
34389         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34390     }
34391   }
34392 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)34393   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
34394     for (size_t k = 1; k <= 40; k += 9) {
34395       GemmMicrokernelTester()
34396         .mr(4)
34397         .nr(4)
34398         .kr(2)
34399         .sr(1)
34400         .m(4)
34401         .n(4)
34402         .k(k)
34403         .b_zero_point(0)
34404         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34405     }
34406   }
34407 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)34408   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
34409     for (size_t k = 1; k <= 40; k += 9) {
34410       GemmMicrokernelTester()
34411         .mr(4)
34412         .nr(4)
34413         .kr(2)
34414         .sr(1)
34415         .m(4)
34416         .n(4)
34417         .k(k)
34418         .a_zero_point(0)
34419         .b_zero_point(0)
34420         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34421     }
34422   }
34423 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34424 
34425 
34426 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)34427   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34428     GemmMicrokernelTester()
34429       .mr(4)
34430       .nr(4)
34431       .kr(2)
34432       .sr(4)
34433       .m(4)
34434       .n(4)
34435       .k(8)
34436       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34437   }
34438 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)34439   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
34440     GemmMicrokernelTester()
34441       .mr(4)
34442       .nr(4)
34443       .kr(2)
34444       .sr(4)
34445       .m(4)
34446       .n(4)
34447       .k(8)
34448       .cn_stride(7)
34449       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34450   }
34451 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)34452   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
34453     for (uint32_t n = 1; n <= 4; n++) {
34454       for (uint32_t m = 1; m <= 4; m++) {
34455         GemmMicrokernelTester()
34456           .mr(4)
34457           .nr(4)
34458           .kr(2)
34459           .sr(4)
34460           .m(m)
34461           .n(n)
34462           .k(8)
34463           .iterations(1)
34464           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34465       }
34466     }
34467   }
34468 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)34469   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34470     for (uint32_t m = 1; m <= 4; m++) {
34471       GemmMicrokernelTester()
34472         .mr(4)
34473         .nr(4)
34474         .kr(2)
34475         .sr(4)
34476         .m(m)
34477         .n(4)
34478         .k(8)
34479         .iterations(1)
34480         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34481     }
34482   }
34483 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34484   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34485     for (uint32_t n = 1; n <= 4; n++) {
34486       GemmMicrokernelTester()
34487         .mr(4)
34488         .nr(4)
34489         .kr(2)
34490         .sr(4)
34491         .m(4)
34492         .n(n)
34493         .k(8)
34494         .iterations(1)
34495         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34496     }
34497   }
34498 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)34499   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34500     for (size_t k = 1; k < 8; k++) {
34501       GemmMicrokernelTester()
34502         .mr(4)
34503         .nr(4)
34504         .kr(2)
34505         .sr(4)
34506         .m(4)
34507         .n(4)
34508         .k(k)
34509         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34510     }
34511   }
34512 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34513   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34514     for (size_t k = 1; k < 8; k++) {
34515       for (uint32_t n = 1; n <= 4; n++) {
34516         for (uint32_t m = 1; m <= 4; m++) {
34517           GemmMicrokernelTester()
34518             .mr(4)
34519             .nr(4)
34520             .kr(2)
34521             .sr(4)
34522             .m(m)
34523             .n(n)
34524             .k(k)
34525             .iterations(1)
34526             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34527         }
34528       }
34529     }
34530   }
34531 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)34532   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34533     for (size_t k = 9; k < 16; k++) {
34534       GemmMicrokernelTester()
34535         .mr(4)
34536         .nr(4)
34537         .kr(2)
34538         .sr(4)
34539         .m(4)
34540         .n(4)
34541         .k(k)
34542         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34543     }
34544   }
34545 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34546   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34547     for (size_t k = 9; k < 16; k++) {
34548       for (uint32_t n = 1; n <= 4; n++) {
34549         for (uint32_t m = 1; m <= 4; m++) {
34550           GemmMicrokernelTester()
34551             .mr(4)
34552             .nr(4)
34553             .kr(2)
34554             .sr(4)
34555             .m(m)
34556             .n(n)
34557             .k(k)
34558             .iterations(1)
34559             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34560         }
34561       }
34562     }
34563   }
34564 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)34565   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
34566     for (size_t k = 16; k <= 80; k += 8) {
34567       GemmMicrokernelTester()
34568         .mr(4)
34569         .nr(4)
34570         .kr(2)
34571         .sr(4)
34572         .m(4)
34573         .n(4)
34574         .k(k)
34575         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34576     }
34577   }
34578 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34579   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34580     for (size_t k = 16; k <= 80; k += 8) {
34581       for (uint32_t n = 1; n <= 4; n++) {
34582         for (uint32_t m = 1; m <= 4; m++) {
34583           GemmMicrokernelTester()
34584             .mr(4)
34585             .nr(4)
34586             .kr(2)
34587             .sr(4)
34588             .m(m)
34589             .n(n)
34590             .k(k)
34591             .iterations(1)
34592             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34593         }
34594       }
34595     }
34596   }
34597 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)34598   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34599     for (uint32_t n = 5; n < 8; n++) {
34600       for (size_t k = 1; k <= 40; k += 9) {
34601         GemmMicrokernelTester()
34602           .mr(4)
34603           .nr(4)
34604           .kr(2)
34605           .sr(4)
34606           .m(4)
34607           .n(n)
34608           .k(k)
34609           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34610       }
34611     }
34612   }
34613 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34614   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34615     for (uint32_t n = 5; n < 8; n++) {
34616       for (size_t k = 1; k <= 40; k += 9) {
34617         GemmMicrokernelTester()
34618           .mr(4)
34619           .nr(4)
34620           .kr(2)
34621           .sr(4)
34622           .m(4)
34623           .n(n)
34624           .k(k)
34625           .cn_stride(7)
34626           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34627       }
34628     }
34629   }
34630 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34631   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34632     for (uint32_t n = 5; n < 8; n++) {
34633       for (size_t k = 1; k <= 40; k += 9) {
34634         for (uint32_t m = 1; m <= 4; m++) {
34635           GemmMicrokernelTester()
34636             .mr(4)
34637             .nr(4)
34638             .kr(2)
34639             .sr(4)
34640             .m(m)
34641             .n(n)
34642             .k(k)
34643             .iterations(1)
34644             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34645         }
34646       }
34647     }
34648   }
34649 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)34650   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
34651     for (uint32_t n = 8; n <= 12; n += 4) {
34652       for (size_t k = 1; k <= 40; k += 9) {
34653         GemmMicrokernelTester()
34654           .mr(4)
34655           .nr(4)
34656           .kr(2)
34657           .sr(4)
34658           .m(4)
34659           .n(n)
34660           .k(k)
34661           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34662       }
34663     }
34664   }
34665 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34666   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34667     for (uint32_t n = 8; n <= 12; n += 4) {
34668       for (size_t k = 1; k <= 40; k += 9) {
34669         GemmMicrokernelTester()
34670           .mr(4)
34671           .nr(4)
34672           .kr(2)
34673           .sr(4)
34674           .m(4)
34675           .n(n)
34676           .k(k)
34677           .cn_stride(7)
34678           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34679       }
34680     }
34681   }
34682 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)34683   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34684     for (uint32_t n = 8; n <= 12; n += 4) {
34685       for (size_t k = 1; k <= 40; k += 9) {
34686         for (uint32_t m = 1; m <= 4; m++) {
34687           GemmMicrokernelTester()
34688             .mr(4)
34689             .nr(4)
34690             .kr(2)
34691             .sr(4)
34692             .m(m)
34693             .n(n)
34694             .k(k)
34695             .iterations(1)
34696             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34697         }
34698       }
34699     }
34700   }
34701 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)34702   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
34703     for (size_t k = 1; k <= 40; k += 9) {
34704       GemmMicrokernelTester()
34705         .mr(4)
34706         .nr(4)
34707         .kr(2)
34708         .sr(4)
34709         .m(4)
34710         .n(4)
34711         .k(k)
34712         .ks(3)
34713         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34714     }
34715   }
34716 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)34717   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34718     for (size_t k = 1; k <= 40; k += 9) {
34719       for (uint32_t n = 1; n <= 4; n++) {
34720         for (uint32_t m = 1; m <= 4; m++) {
34721           GemmMicrokernelTester()
34722             .mr(4)
34723             .nr(4)
34724             .kr(2)
34725             .sr(4)
34726             .m(m)
34727             .n(n)
34728             .k(k)
34729             .ks(3)
34730             .iterations(1)
34731             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34732         }
34733       }
34734     }
34735   }
34736 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)34737   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34738     for (uint32_t n = 5; n < 8; n++) {
34739       for (size_t k = 1; k <= 40; k += 9) {
34740         GemmMicrokernelTester()
34741           .mr(4)
34742           .nr(4)
34743           .kr(2)
34744           .sr(4)
34745           .m(4)
34746           .n(n)
34747           .k(k)
34748           .ks(3)
34749           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34750       }
34751     }
34752   }
34753 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)34754   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34755     for (uint32_t n = 8; n <= 12; n += 4) {
34756       for (size_t k = 1; k <= 40; k += 9) {
34757         GemmMicrokernelTester()
34758           .mr(4)
34759           .nr(4)
34760           .kr(2)
34761           .sr(4)
34762           .m(4)
34763           .n(n)
34764           .k(k)
34765           .ks(3)
34766           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34767       }
34768     }
34769   }
34770 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)34771   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34772     for (size_t k = 1; k <= 40; k += 9) {
34773       for (uint32_t n = 1; n <= 4; n++) {
34774         for (uint32_t m = 1; m <= 4; m++) {
34775           GemmMicrokernelTester()
34776             .mr(4)
34777             .nr(4)
34778             .kr(2)
34779             .sr(4)
34780             .m(m)
34781             .n(n)
34782             .k(k)
34783             .cm_stride(7)
34784             .iterations(1)
34785             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34786         }
34787       }
34788     }
34789   }
34790 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)34791   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
34792     for (size_t k = 1; k <= 40; k += 9) {
34793       GemmMicrokernelTester()
34794         .mr(4)
34795         .nr(4)
34796         .kr(2)
34797         .sr(4)
34798         .m(4)
34799         .n(4)
34800         .k(k)
34801         .ks(3)
34802         .a_offset(163)
34803         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34804     }
34805   }
34806 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,zero)34807   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
34808     for (size_t k = 1; k <= 40; k += 9) {
34809       for (uint32_t mz = 0; mz < 4; mz++) {
34810         GemmMicrokernelTester()
34811           .mr(4)
34812           .nr(4)
34813           .kr(2)
34814           .sr(4)
34815           .m(4)
34816           .n(4)
34817           .k(k)
34818           .ks(3)
34819           .a_offset(163)
34820           .zero_index(mz)
34821           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34822       }
34823     }
34824   }
34825 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)34826   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
34827     GemmMicrokernelTester()
34828       .mr(4)
34829       .nr(4)
34830       .kr(2)
34831       .sr(4)
34832       .m(4)
34833       .n(4)
34834       .k(8)
34835       .qmin(128)
34836       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34837   }
34838 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)34839   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
34840     GemmMicrokernelTester()
34841       .mr(4)
34842       .nr(4)
34843       .kr(2)
34844       .sr(4)
34845       .m(4)
34846       .n(4)
34847       .k(8)
34848       .qmax(128)
34849       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34850   }
34851 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)34852   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
34853     GemmMicrokernelTester()
34854       .mr(4)
34855       .nr(4)
34856       .kr(2)
34857       .sr(4)
34858       .m(4)
34859       .n(4)
34860       .k(8)
34861       .cm_stride(7)
34862       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34863   }
34864 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_a_zero_point)34865   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
34866     for (size_t k = 1; k <= 40; k += 9) {
34867       GemmMicrokernelTester()
34868         .mr(4)
34869         .nr(4)
34870         .kr(2)
34871         .sr(4)
34872         .m(4)
34873         .n(4)
34874         .k(k)
34875         .a_zero_point(0)
34876         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34877     }
34878   }
34879 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_b_zero_point)34880   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
34881     for (size_t k = 1; k <= 40; k += 9) {
34882       GemmMicrokernelTester()
34883         .mr(4)
34884         .nr(4)
34885         .kr(2)
34886         .sr(4)
34887         .m(4)
34888         .n(4)
34889         .k(k)
34890         .b_zero_point(0)
34891         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34892     }
34893   }
34894 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,no_zero_point)34895   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
34896     for (size_t k = 1; k <= 40; k += 9) {
34897       GemmMicrokernelTester()
34898         .mr(4)
34899         .nr(4)
34900         .kr(2)
34901         .sr(4)
34902         .m(4)
34903         .n(4)
34904         .k(k)
34905         .a_zero_point(0)
34906         .b_zero_point(0)
34907         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34908     }
34909   }
34910 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34911 
34912 
34913 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)34914   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34915     GemmMicrokernelTester()
34916       .mr(4)
34917       .nr(4)
34918       .kr(2)
34919       .sr(4)
34920       .m(4)
34921       .n(4)
34922       .k(8)
34923       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34924   }
34925 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)34926   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
34927     GemmMicrokernelTester()
34928       .mr(4)
34929       .nr(4)
34930       .kr(2)
34931       .sr(4)
34932       .m(4)
34933       .n(4)
34934       .k(8)
34935       .cn_stride(7)
34936       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34937   }
34938 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34939   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34940     for (uint32_t n = 1; n <= 4; n++) {
34941       for (uint32_t m = 1; m <= 4; m++) {
34942         GemmMicrokernelTester()
34943           .mr(4)
34944           .nr(4)
34945           .kr(2)
34946           .sr(4)
34947           .m(m)
34948           .n(n)
34949           .k(8)
34950           .iterations(1)
34951           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34952       }
34953     }
34954   }
34955 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34956   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34957     for (uint32_t m = 1; m <= 4; m++) {
34958       GemmMicrokernelTester()
34959         .mr(4)
34960         .nr(4)
34961         .kr(2)
34962         .sr(4)
34963         .m(m)
34964         .n(4)
34965         .k(8)
34966         .iterations(1)
34967         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34968     }
34969   }
34970 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34971   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34972     for (uint32_t n = 1; n <= 4; n++) {
34973       GemmMicrokernelTester()
34974         .mr(4)
34975         .nr(4)
34976         .kr(2)
34977         .sr(4)
34978         .m(4)
34979         .n(n)
34980         .k(8)
34981         .iterations(1)
34982         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34983     }
34984   }
34985 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)34986   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34987     for (size_t k = 1; k < 8; k++) {
34988       GemmMicrokernelTester()
34989         .mr(4)
34990         .nr(4)
34991         .kr(2)
34992         .sr(4)
34993         .m(4)
34994         .n(4)
34995         .k(k)
34996         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
34997     }
34998   }
34999 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35000   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35001     for (size_t k = 1; k < 8; k++) {
35002       for (uint32_t n = 1; n <= 4; n++) {
35003         for (uint32_t m = 1; m <= 4; m++) {
35004           GemmMicrokernelTester()
35005             .mr(4)
35006             .nr(4)
35007             .kr(2)
35008             .sr(4)
35009             .m(m)
35010             .n(n)
35011             .k(k)
35012             .iterations(1)
35013             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35014         }
35015       }
35016     }
35017   }
35018 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)35019   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35020     for (size_t k = 9; k < 16; k++) {
35021       GemmMicrokernelTester()
35022         .mr(4)
35023         .nr(4)
35024         .kr(2)
35025         .sr(4)
35026         .m(4)
35027         .n(4)
35028         .k(k)
35029         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35030     }
35031   }
35032 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)35033   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35034     for (size_t k = 9; k < 16; k++) {
35035       for (uint32_t n = 1; n <= 4; n++) {
35036         for (uint32_t m = 1; m <= 4; m++) {
35037           GemmMicrokernelTester()
35038             .mr(4)
35039             .nr(4)
35040             .kr(2)
35041             .sr(4)
35042             .m(m)
35043             .n(n)
35044             .k(k)
35045             .iterations(1)
35046             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35047         }
35048       }
35049     }
35050   }
35051 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)35052   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
35053     for (size_t k = 16; k <= 80; k += 8) {
35054       GemmMicrokernelTester()
35055         .mr(4)
35056         .nr(4)
35057         .kr(2)
35058         .sr(4)
35059         .m(4)
35060         .n(4)
35061         .k(k)
35062         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35063     }
35064   }
35065 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)35066   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35067     for (size_t k = 16; k <= 80; k += 8) {
35068       for (uint32_t n = 1; n <= 4; n++) {
35069         for (uint32_t m = 1; m <= 4; m++) {
35070           GemmMicrokernelTester()
35071             .mr(4)
35072             .nr(4)
35073             .kr(2)
35074             .sr(4)
35075             .m(m)
35076             .n(n)
35077             .k(k)
35078             .iterations(1)
35079             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35080         }
35081       }
35082     }
35083   }
35084 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)35085   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35086     for (uint32_t n = 5; n < 8; n++) {
35087       for (size_t k = 1; k <= 40; k += 9) {
35088         GemmMicrokernelTester()
35089           .mr(4)
35090           .nr(4)
35091           .kr(2)
35092           .sr(4)
35093           .m(4)
35094           .n(n)
35095           .k(k)
35096           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35097       }
35098     }
35099   }
35100 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)35101   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35102     for (uint32_t n = 5; n < 8; n++) {
35103       for (size_t k = 1; k <= 40; k += 9) {
35104         GemmMicrokernelTester()
35105           .mr(4)
35106           .nr(4)
35107           .kr(2)
35108           .sr(4)
35109           .m(4)
35110           .n(n)
35111           .k(k)
35112           .cn_stride(7)
35113           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35114       }
35115     }
35116   }
35117 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)35118   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35119     for (uint32_t n = 5; n < 8; n++) {
35120       for (size_t k = 1; k <= 40; k += 9) {
35121         for (uint32_t m = 1; m <= 4; m++) {
35122           GemmMicrokernelTester()
35123             .mr(4)
35124             .nr(4)
35125             .kr(2)
35126             .sr(4)
35127             .m(m)
35128             .n(n)
35129             .k(k)
35130             .iterations(1)
35131             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35132         }
35133       }
35134     }
35135   }
35136 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)35137   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
35138     for (uint32_t n = 8; n <= 12; n += 4) {
35139       for (size_t k = 1; k <= 40; k += 9) {
35140         GemmMicrokernelTester()
35141           .mr(4)
35142           .nr(4)
35143           .kr(2)
35144           .sr(4)
35145           .m(4)
35146           .n(n)
35147           .k(k)
35148           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35149       }
35150     }
35151   }
35152 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)35153   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35154     for (uint32_t n = 8; n <= 12; n += 4) {
35155       for (size_t k = 1; k <= 40; k += 9) {
35156         GemmMicrokernelTester()
35157           .mr(4)
35158           .nr(4)
35159           .kr(2)
35160           .sr(4)
35161           .m(4)
35162           .n(n)
35163           .k(k)
35164           .cn_stride(7)
35165           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35166       }
35167     }
35168   }
35169 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)35170   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35171     for (uint32_t n = 8; n <= 12; n += 4) {
35172       for (size_t k = 1; k <= 40; k += 9) {
35173         for (uint32_t m = 1; m <= 4; m++) {
35174           GemmMicrokernelTester()
35175             .mr(4)
35176             .nr(4)
35177             .kr(2)
35178             .sr(4)
35179             .m(m)
35180             .n(n)
35181             .k(k)
35182             .iterations(1)
35183             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35184         }
35185       }
35186     }
35187   }
35188 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)35189   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
35190     for (size_t k = 1; k <= 40; k += 9) {
35191       GemmMicrokernelTester()
35192         .mr(4)
35193         .nr(4)
35194         .kr(2)
35195         .sr(4)
35196         .m(4)
35197         .n(4)
35198         .k(k)
35199         .ks(3)
35200         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35201     }
35202   }
35203 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)35204   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
35205     for (size_t k = 1; k <= 40; k += 9) {
35206       for (uint32_t n = 1; n <= 4; n++) {
35207         for (uint32_t m = 1; m <= 4; m++) {
35208           GemmMicrokernelTester()
35209             .mr(4)
35210             .nr(4)
35211             .kr(2)
35212             .sr(4)
35213             .m(m)
35214             .n(n)
35215             .k(k)
35216             .ks(3)
35217             .iterations(1)
35218             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35219         }
35220       }
35221     }
35222   }
35223 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)35224   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
35225     for (uint32_t n = 5; n < 8; n++) {
35226       for (size_t k = 1; k <= 40; k += 9) {
35227         GemmMicrokernelTester()
35228           .mr(4)
35229           .nr(4)
35230           .kr(2)
35231           .sr(4)
35232           .m(4)
35233           .n(n)
35234           .k(k)
35235           .ks(3)
35236           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35237       }
35238     }
35239   }
35240 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)35241   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
35242     for (uint32_t n = 8; n <= 12; n += 4) {
35243       for (size_t k = 1; k <= 40; k += 9) {
35244         GemmMicrokernelTester()
35245           .mr(4)
35246           .nr(4)
35247           .kr(2)
35248           .sr(4)
35249           .m(4)
35250           .n(n)
35251           .k(k)
35252           .ks(3)
35253           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35254       }
35255     }
35256   }
35257 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)35258   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35259     for (size_t k = 1; k <= 40; k += 9) {
35260       for (uint32_t n = 1; n <= 4; n++) {
35261         for (uint32_t m = 1; m <= 4; m++) {
35262           GemmMicrokernelTester()
35263             .mr(4)
35264             .nr(4)
35265             .kr(2)
35266             .sr(4)
35267             .m(m)
35268             .n(n)
35269             .k(k)
35270             .cm_stride(7)
35271             .iterations(1)
35272             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35273         }
35274       }
35275     }
35276   }
35277 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)35278   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
35279     for (size_t k = 1; k <= 40; k += 9) {
35280       GemmMicrokernelTester()
35281         .mr(4)
35282         .nr(4)
35283         .kr(2)
35284         .sr(4)
35285         .m(4)
35286         .n(4)
35287         .k(k)
35288         .ks(3)
35289         .a_offset(163)
35290         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35291     }
35292   }
35293 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,zero)35294   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
35295     for (size_t k = 1; k <= 40; k += 9) {
35296       for (uint32_t mz = 0; mz < 4; mz++) {
35297         GemmMicrokernelTester()
35298           .mr(4)
35299           .nr(4)
35300           .kr(2)
35301           .sr(4)
35302           .m(4)
35303           .n(4)
35304           .k(k)
35305           .ks(3)
35306           .a_offset(163)
35307           .zero_index(mz)
35308           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35309       }
35310     }
35311   }
35312 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)35313   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
35314     GemmMicrokernelTester()
35315       .mr(4)
35316       .nr(4)
35317       .kr(2)
35318       .sr(4)
35319       .m(4)
35320       .n(4)
35321       .k(8)
35322       .qmin(128)
35323       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35324   }
35325 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)35326   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
35327     GemmMicrokernelTester()
35328       .mr(4)
35329       .nr(4)
35330       .kr(2)
35331       .sr(4)
35332       .m(4)
35333       .n(4)
35334       .k(8)
35335       .qmax(128)
35336       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35337   }
35338 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)35339   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
35340     GemmMicrokernelTester()
35341       .mr(4)
35342       .nr(4)
35343       .kr(2)
35344       .sr(4)
35345       .m(4)
35346       .n(4)
35347       .k(8)
35348       .cm_stride(7)
35349       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35350   }
35351 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)35352   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
35353     for (size_t k = 1; k <= 40; k += 9) {
35354       GemmMicrokernelTester()
35355         .mr(4)
35356         .nr(4)
35357         .kr(2)
35358         .sr(4)
35359         .m(4)
35360         .n(4)
35361         .k(k)
35362         .a_zero_point(0)
35363         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35364     }
35365   }
35366 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)35367   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
35368     for (size_t k = 1; k <= 40; k += 9) {
35369       GemmMicrokernelTester()
35370         .mr(4)
35371         .nr(4)
35372         .kr(2)
35373         .sr(4)
35374         .m(4)
35375         .n(4)
35376         .k(k)
35377         .b_zero_point(0)
35378         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35379     }
35380   }
35381 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)35382   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
35383     for (size_t k = 1; k <= 40; k += 9) {
35384       GemmMicrokernelTester()
35385         .mr(4)
35386         .nr(4)
35387         .kr(2)
35388         .sr(4)
35389         .m(4)
35390         .n(4)
35391         .k(k)
35392         .a_zero_point(0)
35393         .b_zero_point(0)
35394         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35395     }
35396   }
35397 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35398 
35399 
35400 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)35401   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
35402     GemmMicrokernelTester()
35403       .mr(4)
35404       .nr(4)
35405       .kr(8)
35406       .sr(1)
35407       .m(4)
35408       .n(4)
35409       .k(8)
35410       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35411   }
35412 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)35413   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
35414     GemmMicrokernelTester()
35415       .mr(4)
35416       .nr(4)
35417       .kr(8)
35418       .sr(1)
35419       .m(4)
35420       .n(4)
35421       .k(8)
35422       .cn_stride(7)
35423       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35424   }
35425 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)35426   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
35427     for (uint32_t n = 1; n <= 4; n++) {
35428       for (uint32_t m = 1; m <= 4; m++) {
35429         GemmMicrokernelTester()
35430           .mr(4)
35431           .nr(4)
35432           .kr(8)
35433           .sr(1)
35434           .m(m)
35435           .n(n)
35436           .k(8)
35437           .iterations(1)
35438           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35439       }
35440     }
35441   }
35442 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)35443   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
35444     for (uint32_t m = 1; m <= 4; m++) {
35445       GemmMicrokernelTester()
35446         .mr(4)
35447         .nr(4)
35448         .kr(8)
35449         .sr(1)
35450         .m(m)
35451         .n(4)
35452         .k(8)
35453         .iterations(1)
35454         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35455     }
35456   }
35457 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)35458   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
35459     for (uint32_t n = 1; n <= 4; n++) {
35460       GemmMicrokernelTester()
35461         .mr(4)
35462         .nr(4)
35463         .kr(8)
35464         .sr(1)
35465         .m(4)
35466         .n(n)
35467         .k(8)
35468         .iterations(1)
35469         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35470     }
35471   }
35472 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)35473   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
35474     for (size_t k = 1; k < 8; k++) {
35475       GemmMicrokernelTester()
35476         .mr(4)
35477         .nr(4)
35478         .kr(8)
35479         .sr(1)
35480         .m(4)
35481         .n(4)
35482         .k(k)
35483         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35484     }
35485   }
35486 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)35487   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
35488     for (size_t k = 1; k < 8; k++) {
35489       for (uint32_t n = 1; n <= 4; n++) {
35490         for (uint32_t m = 1; m <= 4; m++) {
35491           GemmMicrokernelTester()
35492             .mr(4)
35493             .nr(4)
35494             .kr(8)
35495             .sr(1)
35496             .m(m)
35497             .n(n)
35498             .k(k)
35499             .iterations(1)
35500             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35501         }
35502       }
35503     }
35504   }
35505 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)35506   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
35507     for (size_t k = 9; k < 16; k++) {
35508       GemmMicrokernelTester()
35509         .mr(4)
35510         .nr(4)
35511         .kr(8)
35512         .sr(1)
35513         .m(4)
35514         .n(4)
35515         .k(k)
35516         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35517     }
35518   }
35519 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)35520   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
35521     for (size_t k = 9; k < 16; k++) {
35522       for (uint32_t n = 1; n <= 4; n++) {
35523         for (uint32_t m = 1; m <= 4; m++) {
35524           GemmMicrokernelTester()
35525             .mr(4)
35526             .nr(4)
35527             .kr(8)
35528             .sr(1)
35529             .m(m)
35530             .n(n)
35531             .k(k)
35532             .iterations(1)
35533             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35534         }
35535       }
35536     }
35537   }
35538 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)35539   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
35540     for (size_t k = 16; k <= 80; k += 8) {
35541       GemmMicrokernelTester()
35542         .mr(4)
35543         .nr(4)
35544         .kr(8)
35545         .sr(1)
35546         .m(4)
35547         .n(4)
35548         .k(k)
35549         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35550     }
35551   }
35552 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)35553   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
35554     for (size_t k = 16; k <= 80; k += 8) {
35555       for (uint32_t n = 1; n <= 4; n++) {
35556         for (uint32_t m = 1; m <= 4; m++) {
35557           GemmMicrokernelTester()
35558             .mr(4)
35559             .nr(4)
35560             .kr(8)
35561             .sr(1)
35562             .m(m)
35563             .n(n)
35564             .k(k)
35565             .iterations(1)
35566             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35567         }
35568       }
35569     }
35570   }
35571 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)35572   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
35573     for (uint32_t n = 5; n < 8; n++) {
35574       for (size_t k = 1; k <= 40; k += 9) {
35575         GemmMicrokernelTester()
35576           .mr(4)
35577           .nr(4)
35578           .kr(8)
35579           .sr(1)
35580           .m(4)
35581           .n(n)
35582           .k(k)
35583           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35584       }
35585     }
35586   }
35587 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)35588   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
35589     for (uint32_t n = 5; n < 8; n++) {
35590       for (size_t k = 1; k <= 40; k += 9) {
35591         GemmMicrokernelTester()
35592           .mr(4)
35593           .nr(4)
35594           .kr(8)
35595           .sr(1)
35596           .m(4)
35597           .n(n)
35598           .k(k)
35599           .cn_stride(7)
35600           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35601       }
35602     }
35603   }
35604 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)35605   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
35606     for (uint32_t n = 5; n < 8; n++) {
35607       for (size_t k = 1; k <= 40; k += 9) {
35608         for (uint32_t m = 1; m <= 4; m++) {
35609           GemmMicrokernelTester()
35610             .mr(4)
35611             .nr(4)
35612             .kr(8)
35613             .sr(1)
35614             .m(m)
35615             .n(n)
35616             .k(k)
35617             .iterations(1)
35618             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35619         }
35620       }
35621     }
35622   }
35623 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)35624   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
35625     for (uint32_t n = 8; n <= 12; n += 4) {
35626       for (size_t k = 1; k <= 40; k += 9) {
35627         GemmMicrokernelTester()
35628           .mr(4)
35629           .nr(4)
35630           .kr(8)
35631           .sr(1)
35632           .m(4)
35633           .n(n)
35634           .k(k)
35635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35636       }
35637     }
35638   }
35639 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)35640   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
35641     for (uint32_t n = 8; n <= 12; n += 4) {
35642       for (size_t k = 1; k <= 40; k += 9) {
35643         GemmMicrokernelTester()
35644           .mr(4)
35645           .nr(4)
35646           .kr(8)
35647           .sr(1)
35648           .m(4)
35649           .n(n)
35650           .k(k)
35651           .cn_stride(7)
35652           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35653       }
35654     }
35655   }
35656 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)35657   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
35658     for (uint32_t n = 8; n <= 12; n += 4) {
35659       for (size_t k = 1; k <= 40; k += 9) {
35660         for (uint32_t m = 1; m <= 4; m++) {
35661           GemmMicrokernelTester()
35662             .mr(4)
35663             .nr(4)
35664             .kr(8)
35665             .sr(1)
35666             .m(m)
35667             .n(n)
35668             .k(k)
35669             .iterations(1)
35670             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35671         }
35672       }
35673     }
35674   }
35675 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)35676   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
35677     for (size_t k = 1; k <= 40; k += 9) {
35678       GemmMicrokernelTester()
35679         .mr(4)
35680         .nr(4)
35681         .kr(8)
35682         .sr(1)
35683         .m(4)
35684         .n(4)
35685         .k(k)
35686         .ks(3)
35687         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35688     }
35689   }
35690 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)35691   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
35692     for (size_t k = 1; k <= 40; k += 9) {
35693       for (uint32_t n = 1; n <= 4; n++) {
35694         for (uint32_t m = 1; m <= 4; m++) {
35695           GemmMicrokernelTester()
35696             .mr(4)
35697             .nr(4)
35698             .kr(8)
35699             .sr(1)
35700             .m(m)
35701             .n(n)
35702             .k(k)
35703             .ks(3)
35704             .iterations(1)
35705             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35706         }
35707       }
35708     }
35709   }
35710 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)35711   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
35712     for (uint32_t n = 5; n < 8; n++) {
35713       for (size_t k = 1; k <= 40; k += 9) {
35714         GemmMicrokernelTester()
35715           .mr(4)
35716           .nr(4)
35717           .kr(8)
35718           .sr(1)
35719           .m(4)
35720           .n(n)
35721           .k(k)
35722           .ks(3)
35723           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35724       }
35725     }
35726   }
35727 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)35728   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
35729     for (uint32_t n = 8; n <= 12; n += 4) {
35730       for (size_t k = 1; k <= 40; k += 9) {
35731         GemmMicrokernelTester()
35732           .mr(4)
35733           .nr(4)
35734           .kr(8)
35735           .sr(1)
35736           .m(4)
35737           .n(n)
35738           .k(k)
35739           .ks(3)
35740           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35741       }
35742     }
35743   }
35744 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)35745   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
35746     for (size_t k = 1; k <= 40; k += 9) {
35747       for (uint32_t n = 1; n <= 4; n++) {
35748         for (uint32_t m = 1; m <= 4; m++) {
35749           GemmMicrokernelTester()
35750             .mr(4)
35751             .nr(4)
35752             .kr(8)
35753             .sr(1)
35754             .m(m)
35755             .n(n)
35756             .k(k)
35757             .cm_stride(7)
35758             .iterations(1)
35759             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35760         }
35761       }
35762     }
35763   }
35764 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,a_offset)35765   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
35766     for (size_t k = 1; k <= 40; k += 9) {
35767       GemmMicrokernelTester()
35768         .mr(4)
35769         .nr(4)
35770         .kr(8)
35771         .sr(1)
35772         .m(4)
35773         .n(4)
35774         .k(k)
35775         .ks(3)
35776         .a_offset(163)
35777         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35778     }
35779   }
35780 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,zero)35781   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
35782     for (size_t k = 1; k <= 40; k += 9) {
35783       for (uint32_t mz = 0; mz < 4; mz++) {
35784         GemmMicrokernelTester()
35785           .mr(4)
35786           .nr(4)
35787           .kr(8)
35788           .sr(1)
35789           .m(4)
35790           .n(4)
35791           .k(k)
35792           .ks(3)
35793           .a_offset(163)
35794           .zero_index(mz)
35795           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35796       }
35797     }
35798   }
35799 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)35800   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
35801     GemmMicrokernelTester()
35802       .mr(4)
35803       .nr(4)
35804       .kr(8)
35805       .sr(1)
35806       .m(4)
35807       .n(4)
35808       .k(8)
35809       .qmin(128)
35810       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35811   }
35812 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)35813   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
35814     GemmMicrokernelTester()
35815       .mr(4)
35816       .nr(4)
35817       .kr(8)
35818       .sr(1)
35819       .m(4)
35820       .n(4)
35821       .k(8)
35822       .qmax(128)
35823       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35824   }
35825 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)35826   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
35827     GemmMicrokernelTester()
35828       .mr(4)
35829       .nr(4)
35830       .kr(8)
35831       .sr(1)
35832       .m(4)
35833       .n(4)
35834       .k(8)
35835       .cm_stride(7)
35836       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35837   }
35838 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)35839   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
35840     for (size_t k = 1; k <= 40; k += 9) {
35841       GemmMicrokernelTester()
35842         .mr(4)
35843         .nr(4)
35844         .kr(8)
35845         .sr(1)
35846         .m(4)
35847         .n(4)
35848         .k(k)
35849         .a_zero_point(0)
35850         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35851     }
35852   }
35853 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)35854   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
35855     for (size_t k = 1; k <= 40; k += 9) {
35856       GemmMicrokernelTester()
35857         .mr(4)
35858         .nr(4)
35859         .kr(8)
35860         .sr(1)
35861         .m(4)
35862         .n(4)
35863         .k(k)
35864         .b_zero_point(0)
35865         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35866     }
35867   }
35868 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)35869   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
35870     for (size_t k = 1; k <= 40; k += 9) {
35871       GemmMicrokernelTester()
35872         .mr(4)
35873         .nr(4)
35874         .kr(8)
35875         .sr(1)
35876         .m(4)
35877         .n(4)
35878         .k(k)
35879         .a_zero_point(0)
35880         .b_zero_point(0)
35881         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35882     }
35883   }
35884 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35885 
35886 
35887 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)35888   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35889     GemmMicrokernelTester()
35890       .mr(4)
35891       .nr(4)
35892       .kr(8)
35893       .sr(1)
35894       .m(4)
35895       .n(4)
35896       .k(8)
35897       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35898   }
35899 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)35900   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
35901     GemmMicrokernelTester()
35902       .mr(4)
35903       .nr(4)
35904       .kr(8)
35905       .sr(1)
35906       .m(4)
35907       .n(4)
35908       .k(8)
35909       .cn_stride(7)
35910       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35911   }
35912 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)35913   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
35914     for (uint32_t n = 1; n <= 4; n++) {
35915       for (uint32_t m = 1; m <= 4; m++) {
35916         GemmMicrokernelTester()
35917           .mr(4)
35918           .nr(4)
35919           .kr(8)
35920           .sr(1)
35921           .m(m)
35922           .n(n)
35923           .k(8)
35924           .iterations(1)
35925           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35926       }
35927     }
35928   }
35929 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)35930   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35931     for (uint32_t m = 1; m <= 4; m++) {
35932       GemmMicrokernelTester()
35933         .mr(4)
35934         .nr(4)
35935         .kr(8)
35936         .sr(1)
35937         .m(m)
35938         .n(4)
35939         .k(8)
35940         .iterations(1)
35941         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35942     }
35943   }
35944 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)35945   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35946     for (uint32_t n = 1; n <= 4; n++) {
35947       GemmMicrokernelTester()
35948         .mr(4)
35949         .nr(4)
35950         .kr(8)
35951         .sr(1)
35952         .m(4)
35953         .n(n)
35954         .k(8)
35955         .iterations(1)
35956         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35957     }
35958   }
35959 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)35960   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35961     for (size_t k = 1; k < 8; k++) {
35962       GemmMicrokernelTester()
35963         .mr(4)
35964         .nr(4)
35965         .kr(8)
35966         .sr(1)
35967         .m(4)
35968         .n(4)
35969         .k(k)
35970         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35971     }
35972   }
35973 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35974   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35975     for (size_t k = 1; k < 8; k++) {
35976       for (uint32_t n = 1; n <= 4; n++) {
35977         for (uint32_t m = 1; m <= 4; m++) {
35978           GemmMicrokernelTester()
35979             .mr(4)
35980             .nr(4)
35981             .kr(8)
35982             .sr(1)
35983             .m(m)
35984             .n(n)
35985             .k(k)
35986             .iterations(1)
35987             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
35988         }
35989       }
35990     }
35991   }
35992 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)35993   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35994     for (size_t k = 9; k < 16; k++) {
35995       GemmMicrokernelTester()
35996         .mr(4)
35997         .nr(4)
35998         .kr(8)
35999         .sr(1)
36000         .m(4)
36001         .n(4)
36002         .k(k)
36003         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36004     }
36005   }
36006 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)36007   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
36008     for (size_t k = 9; k < 16; k++) {
36009       for (uint32_t n = 1; n <= 4; n++) {
36010         for (uint32_t m = 1; m <= 4; m++) {
36011           GemmMicrokernelTester()
36012             .mr(4)
36013             .nr(4)
36014             .kr(8)
36015             .sr(1)
36016             .m(m)
36017             .n(n)
36018             .k(k)
36019             .iterations(1)
36020             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36021         }
36022       }
36023     }
36024   }
36025 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)36026   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
36027     for (size_t k = 16; k <= 80; k += 8) {
36028       GemmMicrokernelTester()
36029         .mr(4)
36030         .nr(4)
36031         .kr(8)
36032         .sr(1)
36033         .m(4)
36034         .n(4)
36035         .k(k)
36036         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36037     }
36038   }
36039 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)36040   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
36041     for (size_t k = 16; k <= 80; k += 8) {
36042       for (uint32_t n = 1; n <= 4; n++) {
36043         for (uint32_t m = 1; m <= 4; m++) {
36044           GemmMicrokernelTester()
36045             .mr(4)
36046             .nr(4)
36047             .kr(8)
36048             .sr(1)
36049             .m(m)
36050             .n(n)
36051             .k(k)
36052             .iterations(1)
36053             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36054         }
36055       }
36056     }
36057   }
36058 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)36059   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
36060     for (uint32_t n = 5; n < 8; n++) {
36061       for (size_t k = 1; k <= 40; k += 9) {
36062         GemmMicrokernelTester()
36063           .mr(4)
36064           .nr(4)
36065           .kr(8)
36066           .sr(1)
36067           .m(4)
36068           .n(n)
36069           .k(k)
36070           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36071       }
36072     }
36073   }
36074 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)36075   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
36076     for (uint32_t n = 5; n < 8; n++) {
36077       for (size_t k = 1; k <= 40; k += 9) {
36078         GemmMicrokernelTester()
36079           .mr(4)
36080           .nr(4)
36081           .kr(8)
36082           .sr(1)
36083           .m(4)
36084           .n(n)
36085           .k(k)
36086           .cn_stride(7)
36087           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36088       }
36089     }
36090   }
36091 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)36092   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
36093     for (uint32_t n = 5; n < 8; n++) {
36094       for (size_t k = 1; k <= 40; k += 9) {
36095         for (uint32_t m = 1; m <= 4; m++) {
36096           GemmMicrokernelTester()
36097             .mr(4)
36098             .nr(4)
36099             .kr(8)
36100             .sr(1)
36101             .m(m)
36102             .n(n)
36103             .k(k)
36104             .iterations(1)
36105             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36106         }
36107       }
36108     }
36109   }
36110 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)36111   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
36112     for (uint32_t n = 8; n <= 12; n += 4) {
36113       for (size_t k = 1; k <= 40; k += 9) {
36114         GemmMicrokernelTester()
36115           .mr(4)
36116           .nr(4)
36117           .kr(8)
36118           .sr(1)
36119           .m(4)
36120           .n(n)
36121           .k(k)
36122           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36123       }
36124     }
36125   }
36126 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)36127   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
36128     for (uint32_t n = 8; n <= 12; n += 4) {
36129       for (size_t k = 1; k <= 40; k += 9) {
36130         GemmMicrokernelTester()
36131           .mr(4)
36132           .nr(4)
36133           .kr(8)
36134           .sr(1)
36135           .m(4)
36136           .n(n)
36137           .k(k)
36138           .cn_stride(7)
36139           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36140       }
36141     }
36142   }
36143 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)36144   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
36145     for (uint32_t n = 8; n <= 12; n += 4) {
36146       for (size_t k = 1; k <= 40; k += 9) {
36147         for (uint32_t m = 1; m <= 4; m++) {
36148           GemmMicrokernelTester()
36149             .mr(4)
36150             .nr(4)
36151             .kr(8)
36152             .sr(1)
36153             .m(m)
36154             .n(n)
36155             .k(k)
36156             .iterations(1)
36157             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36158         }
36159       }
36160     }
36161   }
36162 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)36163   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
36164     for (size_t k = 1; k <= 40; k += 9) {
36165       GemmMicrokernelTester()
36166         .mr(4)
36167         .nr(4)
36168         .kr(8)
36169         .sr(1)
36170         .m(4)
36171         .n(4)
36172         .k(k)
36173         .ks(3)
36174         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36175     }
36176   }
36177 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)36178   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
36179     for (size_t k = 1; k <= 40; k += 9) {
36180       for (uint32_t n = 1; n <= 4; n++) {
36181         for (uint32_t m = 1; m <= 4; m++) {
36182           GemmMicrokernelTester()
36183             .mr(4)
36184             .nr(4)
36185             .kr(8)
36186             .sr(1)
36187             .m(m)
36188             .n(n)
36189             .k(k)
36190             .ks(3)
36191             .iterations(1)
36192             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36193         }
36194       }
36195     }
36196   }
36197 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)36198   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
36199     for (uint32_t n = 5; n < 8; n++) {
36200       for (size_t k = 1; k <= 40; k += 9) {
36201         GemmMicrokernelTester()
36202           .mr(4)
36203           .nr(4)
36204           .kr(8)
36205           .sr(1)
36206           .m(4)
36207           .n(n)
36208           .k(k)
36209           .ks(3)
36210           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36211       }
36212     }
36213   }
36214 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)36215   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
36216     for (uint32_t n = 8; n <= 12; n += 4) {
36217       for (size_t k = 1; k <= 40; k += 9) {
36218         GemmMicrokernelTester()
36219           .mr(4)
36220           .nr(4)
36221           .kr(8)
36222           .sr(1)
36223           .m(4)
36224           .n(n)
36225           .k(k)
36226           .ks(3)
36227           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36228       }
36229     }
36230   }
36231 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)36232   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
36233     for (size_t k = 1; k <= 40; k += 9) {
36234       for (uint32_t n = 1; n <= 4; n++) {
36235         for (uint32_t m = 1; m <= 4; m++) {
36236           GemmMicrokernelTester()
36237             .mr(4)
36238             .nr(4)
36239             .kr(8)
36240             .sr(1)
36241             .m(m)
36242             .n(n)
36243             .k(k)
36244             .cm_stride(7)
36245             .iterations(1)
36246             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36247         }
36248       }
36249     }
36250   }
36251 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,a_offset)36252   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
36253     for (size_t k = 1; k <= 40; k += 9) {
36254       GemmMicrokernelTester()
36255         .mr(4)
36256         .nr(4)
36257         .kr(8)
36258         .sr(1)
36259         .m(4)
36260         .n(4)
36261         .k(k)
36262         .ks(3)
36263         .a_offset(163)
36264         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36265     }
36266   }
36267 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,zero)36268   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, zero) {
36269     for (size_t k = 1; k <= 40; k += 9) {
36270       for (uint32_t mz = 0; mz < 4; mz++) {
36271         GemmMicrokernelTester()
36272           .mr(4)
36273           .nr(4)
36274           .kr(8)
36275           .sr(1)
36276           .m(4)
36277           .n(4)
36278           .k(k)
36279           .ks(3)
36280           .a_offset(163)
36281           .zero_index(mz)
36282           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36283       }
36284     }
36285   }
36286 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)36287   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
36288     GemmMicrokernelTester()
36289       .mr(4)
36290       .nr(4)
36291       .kr(8)
36292       .sr(1)
36293       .m(4)
36294       .n(4)
36295       .k(8)
36296       .qmin(128)
36297       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36298   }
36299 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)36300   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
36301     GemmMicrokernelTester()
36302       .mr(4)
36303       .nr(4)
36304       .kr(8)
36305       .sr(1)
36306       .m(4)
36307       .n(4)
36308       .k(8)
36309       .qmax(128)
36310       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36311   }
36312 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)36313   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
36314     GemmMicrokernelTester()
36315       .mr(4)
36316       .nr(4)
36317       .kr(8)
36318       .sr(1)
36319       .m(4)
36320       .n(4)
36321       .k(8)
36322       .cm_stride(7)
36323       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36324   }
36325 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)36326   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
36327     for (size_t k = 1; k <= 40; k += 9) {
36328       GemmMicrokernelTester()
36329         .mr(4)
36330         .nr(4)
36331         .kr(8)
36332         .sr(1)
36333         .m(4)
36334         .n(4)
36335         .k(k)
36336         .a_zero_point(0)
36337         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36338     }
36339   }
36340 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)36341   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
36342     for (size_t k = 1; k <= 40; k += 9) {
36343       GemmMicrokernelTester()
36344         .mr(4)
36345         .nr(4)
36346         .kr(8)
36347         .sr(1)
36348         .m(4)
36349         .n(4)
36350         .k(k)
36351         .b_zero_point(0)
36352         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36353     }
36354   }
36355 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)36356   TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
36357     for (size_t k = 1; k <= 40; k += 9) {
36358       GemmMicrokernelTester()
36359         .mr(4)
36360         .nr(4)
36361         .kr(8)
36362         .sr(1)
36363         .m(4)
36364         .n(4)
36365         .k(k)
36366         .a_zero_point(0)
36367         .b_zero_point(0)
36368         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
36369     }
36370   }
36371 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36372 
36373 
36374 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)36375   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
36376     GemmMicrokernelTester()
36377       .mr(1)
36378       .nr(2)
36379       .kr(1)
36380       .sr(1)
36381       .m(1)
36382       .n(2)
36383       .k(1)
36384       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36385   }
36386 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)36387   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
36388     GemmMicrokernelTester()
36389       .mr(1)
36390       .nr(2)
36391       .kr(1)
36392       .sr(1)
36393       .m(1)
36394       .n(2)
36395       .k(1)
36396       .cn_stride(5)
36397       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36398   }
36399 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)36400   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
36401     for (uint32_t n = 1; n <= 2; n++) {
36402       for (uint32_t m = 1; m <= 1; m++) {
36403         GemmMicrokernelTester()
36404           .mr(1)
36405           .nr(2)
36406           .kr(1)
36407           .sr(1)
36408           .m(m)
36409           .n(n)
36410           .k(1)
36411           .iterations(1)
36412           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36413       }
36414     }
36415   }
36416 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)36417   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
36418     for (uint32_t m = 1; m <= 1; m++) {
36419       GemmMicrokernelTester()
36420         .mr(1)
36421         .nr(2)
36422         .kr(1)
36423         .sr(1)
36424         .m(m)
36425         .n(2)
36426         .k(1)
36427         .iterations(1)
36428         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36429     }
36430   }
36431 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)36432   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
36433     for (uint32_t n = 1; n <= 2; n++) {
36434       GemmMicrokernelTester()
36435         .mr(1)
36436         .nr(2)
36437         .kr(1)
36438         .sr(1)
36439         .m(1)
36440         .n(n)
36441         .k(1)
36442         .iterations(1)
36443         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36444     }
36445   }
36446 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)36447   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
36448     for (size_t k = 2; k < 10; k++) {
36449       GemmMicrokernelTester()
36450         .mr(1)
36451         .nr(2)
36452         .kr(1)
36453         .sr(1)
36454         .m(1)
36455         .n(2)
36456         .k(k)
36457         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36458     }
36459   }
36460 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)36461   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
36462     for (size_t k = 2; k < 10; k++) {
36463       for (uint32_t n = 1; n <= 2; n++) {
36464         for (uint32_t m = 1; m <= 1; m++) {
36465           GemmMicrokernelTester()
36466             .mr(1)
36467             .nr(2)
36468             .kr(1)
36469             .sr(1)
36470             .m(m)
36471             .n(n)
36472             .k(k)
36473             .iterations(1)
36474             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36475         }
36476       }
36477     }
36478   }
36479 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)36480   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
36481     for (uint32_t n = 3; n < 4; n++) {
36482       for (size_t k = 1; k <= 5; k += 2) {
36483         GemmMicrokernelTester()
36484           .mr(1)
36485           .nr(2)
36486           .kr(1)
36487           .sr(1)
36488           .m(1)
36489           .n(n)
36490           .k(k)
36491           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36492       }
36493     }
36494   }
36495 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)36496   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
36497     for (uint32_t n = 3; n < 4; n++) {
36498       for (size_t k = 1; k <= 5; k += 2) {
36499         GemmMicrokernelTester()
36500           .mr(1)
36501           .nr(2)
36502           .kr(1)
36503           .sr(1)
36504           .m(1)
36505           .n(n)
36506           .k(k)
36507           .cn_stride(5)
36508           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36509       }
36510     }
36511   }
36512 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)36513   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
36514     for (uint32_t n = 3; n < 4; n++) {
36515       for (size_t k = 1; k <= 5; k += 2) {
36516         for (uint32_t m = 1; m <= 1; m++) {
36517           GemmMicrokernelTester()
36518             .mr(1)
36519             .nr(2)
36520             .kr(1)
36521             .sr(1)
36522             .m(m)
36523             .n(n)
36524             .k(k)
36525             .iterations(1)
36526             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36527         }
36528       }
36529     }
36530   }
36531 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)36532   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
36533     for (uint32_t n = 4; n <= 6; n += 2) {
36534       for (size_t k = 1; k <= 5; k += 2) {
36535         GemmMicrokernelTester()
36536           .mr(1)
36537           .nr(2)
36538           .kr(1)
36539           .sr(1)
36540           .m(1)
36541           .n(n)
36542           .k(k)
36543           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36544       }
36545     }
36546   }
36547 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)36548   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
36549     for (uint32_t n = 4; n <= 6; n += 2) {
36550       for (size_t k = 1; k <= 5; k += 2) {
36551         GemmMicrokernelTester()
36552           .mr(1)
36553           .nr(2)
36554           .kr(1)
36555           .sr(1)
36556           .m(1)
36557           .n(n)
36558           .k(k)
36559           .cn_stride(5)
36560           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36561       }
36562     }
36563   }
36564 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)36565   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
36566     for (uint32_t n = 4; n <= 6; n += 2) {
36567       for (size_t k = 1; k <= 5; k += 2) {
36568         for (uint32_t m = 1; m <= 1; m++) {
36569           GemmMicrokernelTester()
36570             .mr(1)
36571             .nr(2)
36572             .kr(1)
36573             .sr(1)
36574             .m(m)
36575             .n(n)
36576             .k(k)
36577             .iterations(1)
36578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36579         }
36580       }
36581     }
36582   }
36583 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel)36584   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
36585     for (size_t k = 1; k <= 5; k += 2) {
36586       GemmMicrokernelTester()
36587         .mr(1)
36588         .nr(2)
36589         .kr(1)
36590         .sr(1)
36591         .m(1)
36592         .n(2)
36593         .k(k)
36594         .ks(3)
36595         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36596     }
36597   }
36598 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel_subtile)36599   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
36600     for (size_t k = 1; k <= 5; k += 2) {
36601       for (uint32_t n = 1; n <= 2; n++) {
36602         for (uint32_t m = 1; m <= 1; m++) {
36603           GemmMicrokernelTester()
36604             .mr(1)
36605             .nr(2)
36606             .kr(1)
36607             .sr(1)
36608             .m(m)
36609             .n(n)
36610             .k(k)
36611             .ks(3)
36612             .iterations(1)
36613             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36614         }
36615       }
36616     }
36617   }
36618 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_small_kernel)36619   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
36620     for (uint32_t n = 3; n < 4; n++) {
36621       for (size_t k = 1; k <= 5; k += 2) {
36622         GemmMicrokernelTester()
36623           .mr(1)
36624           .nr(2)
36625           .kr(1)
36626           .sr(1)
36627           .m(1)
36628           .n(n)
36629           .k(k)
36630           .ks(3)
36631           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36632       }
36633     }
36634   }
36635 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_small_kernel)36636   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
36637     for (uint32_t n = 4; n <= 6; n += 2) {
36638       for (size_t k = 1; k <= 5; k += 2) {
36639         GemmMicrokernelTester()
36640           .mr(1)
36641           .nr(2)
36642           .kr(1)
36643           .sr(1)
36644           .m(1)
36645           .n(n)
36646           .k(k)
36647           .ks(3)
36648           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36649       }
36650     }
36651   }
36652 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)36653   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
36654     for (size_t k = 1; k <= 5; k += 2) {
36655       for (uint32_t n = 1; n <= 2; n++) {
36656         for (uint32_t m = 1; m <= 1; m++) {
36657           GemmMicrokernelTester()
36658             .mr(1)
36659             .nr(2)
36660             .kr(1)
36661             .sr(1)
36662             .m(m)
36663             .n(n)
36664             .k(k)
36665             .cm_stride(5)
36666             .iterations(1)
36667             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36668         }
36669       }
36670     }
36671   }
36672 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,a_offset)36673   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
36674     for (size_t k = 1; k <= 5; k += 2) {
36675       GemmMicrokernelTester()
36676         .mr(1)
36677         .nr(2)
36678         .kr(1)
36679         .sr(1)
36680         .m(1)
36681         .n(2)
36682         .k(k)
36683         .ks(3)
36684         .a_offset(7)
36685         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36686     }
36687   }
36688 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,zero)36689   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
36690     for (size_t k = 1; k <= 5; k += 2) {
36691       for (uint32_t mz = 0; mz < 1; mz++) {
36692         GemmMicrokernelTester()
36693           .mr(1)
36694           .nr(2)
36695           .kr(1)
36696           .sr(1)
36697           .m(1)
36698           .n(2)
36699           .k(k)
36700           .ks(3)
36701           .a_offset(7)
36702           .zero_index(mz)
36703           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36704       }
36705     }
36706   }
36707 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)36708   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
36709     GemmMicrokernelTester()
36710       .mr(1)
36711       .nr(2)
36712       .kr(1)
36713       .sr(1)
36714       .m(1)
36715       .n(2)
36716       .k(1)
36717       .qmin(128)
36718       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36719   }
36720 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)36721   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
36722     GemmMicrokernelTester()
36723       .mr(1)
36724       .nr(2)
36725       .kr(1)
36726       .sr(1)
36727       .m(1)
36728       .n(2)
36729       .k(1)
36730       .qmax(128)
36731       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36732   }
36733 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)36734   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
36735     GemmMicrokernelTester()
36736       .mr(1)
36737       .nr(2)
36738       .kr(1)
36739       .sr(1)
36740       .m(1)
36741       .n(2)
36742       .k(1)
36743       .cm_stride(5)
36744       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36745   }
36746 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_a_zero_point)36747   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_a_zero_point) {
36748     for (size_t k = 1; k <= 5; k += 2) {
36749       GemmMicrokernelTester()
36750         .mr(1)
36751         .nr(2)
36752         .kr(1)
36753         .sr(1)
36754         .m(1)
36755         .n(2)
36756         .k(k)
36757         .a_zero_point(0)
36758         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36759     }
36760   }
36761 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_b_zero_point)36762   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_b_zero_point) {
36763     for (size_t k = 1; k <= 5; k += 2) {
36764       GemmMicrokernelTester()
36765         .mr(1)
36766         .nr(2)
36767         .kr(1)
36768         .sr(1)
36769         .m(1)
36770         .n(2)
36771         .k(k)
36772         .b_zero_point(0)
36773         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36774     }
36775   }
36776 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,no_zero_point)36777   TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_zero_point) {
36778     for (size_t k = 1; k <= 5; k += 2) {
36779       GemmMicrokernelTester()
36780         .mr(1)
36781         .nr(2)
36782         .kr(1)
36783         .sr(1)
36784         .m(1)
36785         .n(2)
36786         .k(k)
36787         .a_zero_point(0)
36788         .b_zero_point(0)
36789         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36790     }
36791   }
36792 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36793 
36794 
36795 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)36796   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
36797     GemmMicrokernelTester()
36798       .mr(1)
36799       .nr(4)
36800       .kr(1)
36801       .sr(1)
36802       .m(1)
36803       .n(4)
36804       .k(1)
36805       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36806   }
36807 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)36808   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
36809     GemmMicrokernelTester()
36810       .mr(1)
36811       .nr(4)
36812       .kr(1)
36813       .sr(1)
36814       .m(1)
36815       .n(4)
36816       .k(1)
36817       .cn_stride(7)
36818       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36819   }
36820 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)36821   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
36822     for (uint32_t n = 1; n <= 4; n++) {
36823       for (uint32_t m = 1; m <= 1; m++) {
36824         GemmMicrokernelTester()
36825           .mr(1)
36826           .nr(4)
36827           .kr(1)
36828           .sr(1)
36829           .m(m)
36830           .n(n)
36831           .k(1)
36832           .iterations(1)
36833           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36834       }
36835     }
36836   }
36837 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)36838   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
36839     for (uint32_t m = 1; m <= 1; m++) {
36840       GemmMicrokernelTester()
36841         .mr(1)
36842         .nr(4)
36843         .kr(1)
36844         .sr(1)
36845         .m(m)
36846         .n(4)
36847         .k(1)
36848         .iterations(1)
36849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36850     }
36851   }
36852 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)36853   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
36854     for (uint32_t n = 1; n <= 4; n++) {
36855       GemmMicrokernelTester()
36856         .mr(1)
36857         .nr(4)
36858         .kr(1)
36859         .sr(1)
36860         .m(1)
36861         .n(n)
36862         .k(1)
36863         .iterations(1)
36864         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36865     }
36866   }
36867 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)36868   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
36869     for (size_t k = 2; k < 10; k++) {
36870       GemmMicrokernelTester()
36871         .mr(1)
36872         .nr(4)
36873         .kr(1)
36874         .sr(1)
36875         .m(1)
36876         .n(4)
36877         .k(k)
36878         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36879     }
36880   }
36881 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)36882   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
36883     for (size_t k = 2; k < 10; k++) {
36884       for (uint32_t n = 1; n <= 4; n++) {
36885         for (uint32_t m = 1; m <= 1; m++) {
36886           GemmMicrokernelTester()
36887             .mr(1)
36888             .nr(4)
36889             .kr(1)
36890             .sr(1)
36891             .m(m)
36892             .n(n)
36893             .k(k)
36894             .iterations(1)
36895             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36896         }
36897       }
36898     }
36899   }
36900 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)36901   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
36902     for (uint32_t n = 5; n < 8; n++) {
36903       for (size_t k = 1; k <= 5; k += 2) {
36904         GemmMicrokernelTester()
36905           .mr(1)
36906           .nr(4)
36907           .kr(1)
36908           .sr(1)
36909           .m(1)
36910           .n(n)
36911           .k(k)
36912           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36913       }
36914     }
36915   }
36916 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)36917   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
36918     for (uint32_t n = 5; n < 8; n++) {
36919       for (size_t k = 1; k <= 5; k += 2) {
36920         GemmMicrokernelTester()
36921           .mr(1)
36922           .nr(4)
36923           .kr(1)
36924           .sr(1)
36925           .m(1)
36926           .n(n)
36927           .k(k)
36928           .cn_stride(7)
36929           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36930       }
36931     }
36932   }
36933 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)36934   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
36935     for (uint32_t n = 5; n < 8; n++) {
36936       for (size_t k = 1; k <= 5; k += 2) {
36937         for (uint32_t m = 1; m <= 1; m++) {
36938           GemmMicrokernelTester()
36939             .mr(1)
36940             .nr(4)
36941             .kr(1)
36942             .sr(1)
36943             .m(m)
36944             .n(n)
36945             .k(k)
36946             .iterations(1)
36947             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36948         }
36949       }
36950     }
36951   }
36952 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)36953   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
36954     for (uint32_t n = 8; n <= 12; n += 4) {
36955       for (size_t k = 1; k <= 5; k += 2) {
36956         GemmMicrokernelTester()
36957           .mr(1)
36958           .nr(4)
36959           .kr(1)
36960           .sr(1)
36961           .m(1)
36962           .n(n)
36963           .k(k)
36964           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36965       }
36966     }
36967   }
36968 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)36969   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
36970     for (uint32_t n = 8; n <= 12; n += 4) {
36971       for (size_t k = 1; k <= 5; k += 2) {
36972         GemmMicrokernelTester()
36973           .mr(1)
36974           .nr(4)
36975           .kr(1)
36976           .sr(1)
36977           .m(1)
36978           .n(n)
36979           .k(k)
36980           .cn_stride(7)
36981           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36982       }
36983     }
36984   }
36985 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)36986   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
36987     for (uint32_t n = 8; n <= 12; n += 4) {
36988       for (size_t k = 1; k <= 5; k += 2) {
36989         for (uint32_t m = 1; m <= 1; m++) {
36990           GemmMicrokernelTester()
36991             .mr(1)
36992             .nr(4)
36993             .kr(1)
36994             .sr(1)
36995             .m(m)
36996             .n(n)
36997             .k(k)
36998             .iterations(1)
36999             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37000         }
37001       }
37002     }
37003   }
37004 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel)37005   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
37006     for (size_t k = 1; k <= 5; k += 2) {
37007       GemmMicrokernelTester()
37008         .mr(1)
37009         .nr(4)
37010         .kr(1)
37011         .sr(1)
37012         .m(1)
37013         .n(4)
37014         .k(k)
37015         .ks(3)
37016         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37017     }
37018   }
37019 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel_subtile)37020   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
37021     for (size_t k = 1; k <= 5; k += 2) {
37022       for (uint32_t n = 1; n <= 4; n++) {
37023         for (uint32_t m = 1; m <= 1; m++) {
37024           GemmMicrokernelTester()
37025             .mr(1)
37026             .nr(4)
37027             .kr(1)
37028             .sr(1)
37029             .m(m)
37030             .n(n)
37031             .k(k)
37032             .ks(3)
37033             .iterations(1)
37034             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37035         }
37036       }
37037     }
37038   }
37039 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_small_kernel)37040   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
37041     for (uint32_t n = 5; n < 8; n++) {
37042       for (size_t k = 1; k <= 5; k += 2) {
37043         GemmMicrokernelTester()
37044           .mr(1)
37045           .nr(4)
37046           .kr(1)
37047           .sr(1)
37048           .m(1)
37049           .n(n)
37050           .k(k)
37051           .ks(3)
37052           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37053       }
37054     }
37055   }
37056 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_small_kernel)37057   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
37058     for (uint32_t n = 8; n <= 12; n += 4) {
37059       for (size_t k = 1; k <= 5; k += 2) {
37060         GemmMicrokernelTester()
37061           .mr(1)
37062           .nr(4)
37063           .kr(1)
37064           .sr(1)
37065           .m(1)
37066           .n(n)
37067           .k(k)
37068           .ks(3)
37069           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37070       }
37071     }
37072   }
37073 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)37074   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
37075     for (size_t k = 1; k <= 5; k += 2) {
37076       for (uint32_t n = 1; n <= 4; n++) {
37077         for (uint32_t m = 1; m <= 1; m++) {
37078           GemmMicrokernelTester()
37079             .mr(1)
37080             .nr(4)
37081             .kr(1)
37082             .sr(1)
37083             .m(m)
37084             .n(n)
37085             .k(k)
37086             .cm_stride(7)
37087             .iterations(1)
37088             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37089         }
37090       }
37091     }
37092   }
37093 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,a_offset)37094   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
37095     for (size_t k = 1; k <= 5; k += 2) {
37096       GemmMicrokernelTester()
37097         .mr(1)
37098         .nr(4)
37099         .kr(1)
37100         .sr(1)
37101         .m(1)
37102         .n(4)
37103         .k(k)
37104         .ks(3)
37105         .a_offset(7)
37106         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37107     }
37108   }
37109 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,zero)37110   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
37111     for (size_t k = 1; k <= 5; k += 2) {
37112       for (uint32_t mz = 0; mz < 1; mz++) {
37113         GemmMicrokernelTester()
37114           .mr(1)
37115           .nr(4)
37116           .kr(1)
37117           .sr(1)
37118           .m(1)
37119           .n(4)
37120           .k(k)
37121           .ks(3)
37122           .a_offset(7)
37123           .zero_index(mz)
37124           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37125       }
37126     }
37127   }
37128 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)37129   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
37130     GemmMicrokernelTester()
37131       .mr(1)
37132       .nr(4)
37133       .kr(1)
37134       .sr(1)
37135       .m(1)
37136       .n(4)
37137       .k(1)
37138       .qmin(128)
37139       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37140   }
37141 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)37142   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
37143     GemmMicrokernelTester()
37144       .mr(1)
37145       .nr(4)
37146       .kr(1)
37147       .sr(1)
37148       .m(1)
37149       .n(4)
37150       .k(1)
37151       .qmax(128)
37152       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37153   }
37154 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)37155   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
37156     GemmMicrokernelTester()
37157       .mr(1)
37158       .nr(4)
37159       .kr(1)
37160       .sr(1)
37161       .m(1)
37162       .n(4)
37163       .k(1)
37164       .cm_stride(7)
37165       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37166   }
37167 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_a_zero_point)37168   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_a_zero_point) {
37169     for (size_t k = 1; k <= 5; k += 2) {
37170       GemmMicrokernelTester()
37171         .mr(1)
37172         .nr(4)
37173         .kr(1)
37174         .sr(1)
37175         .m(1)
37176         .n(4)
37177         .k(k)
37178         .a_zero_point(0)
37179         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37180     }
37181   }
37182 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_b_zero_point)37183   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_b_zero_point) {
37184     for (size_t k = 1; k <= 5; k += 2) {
37185       GemmMicrokernelTester()
37186         .mr(1)
37187         .nr(4)
37188         .kr(1)
37189         .sr(1)
37190         .m(1)
37191         .n(4)
37192         .k(k)
37193         .b_zero_point(0)
37194         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37195     }
37196   }
37197 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,no_zero_point)37198   TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_zero_point) {
37199     for (size_t k = 1; k <= 5; k += 2) {
37200       GemmMicrokernelTester()
37201         .mr(1)
37202         .nr(4)
37203         .kr(1)
37204         .sr(1)
37205         .m(1)
37206         .n(4)
37207         .k(k)
37208         .a_zero_point(0)
37209         .b_zero_point(0)
37210         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37211     }
37212   }
37213 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37214 
37215 
37216 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)37217   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
37218     GemmMicrokernelTester()
37219       .mr(2)
37220       .nr(2)
37221       .kr(1)
37222       .sr(1)
37223       .m(2)
37224       .n(2)
37225       .k(1)
37226       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37227   }
37228 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)37229   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
37230     GemmMicrokernelTester()
37231       .mr(2)
37232       .nr(2)
37233       .kr(1)
37234       .sr(1)
37235       .m(2)
37236       .n(2)
37237       .k(1)
37238       .cn_stride(5)
37239       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37240   }
37241 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)37242   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
37243     for (uint32_t n = 1; n <= 2; n++) {
37244       for (uint32_t m = 1; m <= 2; m++) {
37245         GemmMicrokernelTester()
37246           .mr(2)
37247           .nr(2)
37248           .kr(1)
37249           .sr(1)
37250           .m(m)
37251           .n(n)
37252           .k(1)
37253           .iterations(1)
37254           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37255       }
37256     }
37257   }
37258 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)37259   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
37260     for (uint32_t m = 1; m <= 2; m++) {
37261       GemmMicrokernelTester()
37262         .mr(2)
37263         .nr(2)
37264         .kr(1)
37265         .sr(1)
37266         .m(m)
37267         .n(2)
37268         .k(1)
37269         .iterations(1)
37270         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37271     }
37272   }
37273 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)37274   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
37275     for (uint32_t n = 1; n <= 2; n++) {
37276       GemmMicrokernelTester()
37277         .mr(2)
37278         .nr(2)
37279         .kr(1)
37280         .sr(1)
37281         .m(2)
37282         .n(n)
37283         .k(1)
37284         .iterations(1)
37285         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37286     }
37287   }
37288 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)37289   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
37290     for (size_t k = 2; k < 10; k++) {
37291       GemmMicrokernelTester()
37292         .mr(2)
37293         .nr(2)
37294         .kr(1)
37295         .sr(1)
37296         .m(2)
37297         .n(2)
37298         .k(k)
37299         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37300     }
37301   }
37302 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)37303   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
37304     for (size_t k = 2; k < 10; k++) {
37305       for (uint32_t n = 1; n <= 2; n++) {
37306         for (uint32_t m = 1; m <= 2; m++) {
37307           GemmMicrokernelTester()
37308             .mr(2)
37309             .nr(2)
37310             .kr(1)
37311             .sr(1)
37312             .m(m)
37313             .n(n)
37314             .k(k)
37315             .iterations(1)
37316             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37317         }
37318       }
37319     }
37320   }
37321 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)37322   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
37323     for (uint32_t n = 3; n < 4; n++) {
37324       for (size_t k = 1; k <= 5; k += 2) {
37325         GemmMicrokernelTester()
37326           .mr(2)
37327           .nr(2)
37328           .kr(1)
37329           .sr(1)
37330           .m(2)
37331           .n(n)
37332           .k(k)
37333           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37334       }
37335     }
37336   }
37337 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)37338   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
37339     for (uint32_t n = 3; n < 4; n++) {
37340       for (size_t k = 1; k <= 5; k += 2) {
37341         GemmMicrokernelTester()
37342           .mr(2)
37343           .nr(2)
37344           .kr(1)
37345           .sr(1)
37346           .m(2)
37347           .n(n)
37348           .k(k)
37349           .cn_stride(5)
37350           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37351       }
37352     }
37353   }
37354 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)37355   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
37356     for (uint32_t n = 3; n < 4; n++) {
37357       for (size_t k = 1; k <= 5; k += 2) {
37358         for (uint32_t m = 1; m <= 2; m++) {
37359           GemmMicrokernelTester()
37360             .mr(2)
37361             .nr(2)
37362             .kr(1)
37363             .sr(1)
37364             .m(m)
37365             .n(n)
37366             .k(k)
37367             .iterations(1)
37368             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37369         }
37370       }
37371     }
37372   }
37373 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)37374   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
37375     for (uint32_t n = 4; n <= 6; n += 2) {
37376       for (size_t k = 1; k <= 5; k += 2) {
37377         GemmMicrokernelTester()
37378           .mr(2)
37379           .nr(2)
37380           .kr(1)
37381           .sr(1)
37382           .m(2)
37383           .n(n)
37384           .k(k)
37385           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37386       }
37387     }
37388   }
37389 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)37390   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
37391     for (uint32_t n = 4; n <= 6; n += 2) {
37392       for (size_t k = 1; k <= 5; k += 2) {
37393         GemmMicrokernelTester()
37394           .mr(2)
37395           .nr(2)
37396           .kr(1)
37397           .sr(1)
37398           .m(2)
37399           .n(n)
37400           .k(k)
37401           .cn_stride(5)
37402           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37403       }
37404     }
37405   }
37406 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)37407   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
37408     for (uint32_t n = 4; n <= 6; n += 2) {
37409       for (size_t k = 1; k <= 5; k += 2) {
37410         for (uint32_t m = 1; m <= 2; m++) {
37411           GemmMicrokernelTester()
37412             .mr(2)
37413             .nr(2)
37414             .kr(1)
37415             .sr(1)
37416             .m(m)
37417             .n(n)
37418             .k(k)
37419             .iterations(1)
37420             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37421         }
37422       }
37423     }
37424   }
37425 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel)37426   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
37427     for (size_t k = 1; k <= 5; k += 2) {
37428       GemmMicrokernelTester()
37429         .mr(2)
37430         .nr(2)
37431         .kr(1)
37432         .sr(1)
37433         .m(2)
37434         .n(2)
37435         .k(k)
37436         .ks(3)
37437         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37438     }
37439   }
37440 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel_subtile)37441   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
37442     for (size_t k = 1; k <= 5; k += 2) {
37443       for (uint32_t n = 1; n <= 2; n++) {
37444         for (uint32_t m = 1; m <= 2; m++) {
37445           GemmMicrokernelTester()
37446             .mr(2)
37447             .nr(2)
37448             .kr(1)
37449             .sr(1)
37450             .m(m)
37451             .n(n)
37452             .k(k)
37453             .ks(3)
37454             .iterations(1)
37455             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37456         }
37457       }
37458     }
37459   }
37460 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_small_kernel)37461   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
37462     for (uint32_t n = 3; n < 4; n++) {
37463       for (size_t k = 1; k <= 5; k += 2) {
37464         GemmMicrokernelTester()
37465           .mr(2)
37466           .nr(2)
37467           .kr(1)
37468           .sr(1)
37469           .m(2)
37470           .n(n)
37471           .k(k)
37472           .ks(3)
37473           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37474       }
37475     }
37476   }
37477 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_small_kernel)37478   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
37479     for (uint32_t n = 4; n <= 6; n += 2) {
37480       for (size_t k = 1; k <= 5; k += 2) {
37481         GemmMicrokernelTester()
37482           .mr(2)
37483           .nr(2)
37484           .kr(1)
37485           .sr(1)
37486           .m(2)
37487           .n(n)
37488           .k(k)
37489           .ks(3)
37490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37491       }
37492     }
37493   }
37494 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)37495   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
37496     for (size_t k = 1; k <= 5; k += 2) {
37497       for (uint32_t n = 1; n <= 2; n++) {
37498         for (uint32_t m = 1; m <= 2; m++) {
37499           GemmMicrokernelTester()
37500             .mr(2)
37501             .nr(2)
37502             .kr(1)
37503             .sr(1)
37504             .m(m)
37505             .n(n)
37506             .k(k)
37507             .cm_stride(5)
37508             .iterations(1)
37509             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37510         }
37511       }
37512     }
37513   }
37514 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,a_offset)37515   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
37516     for (size_t k = 1; k <= 5; k += 2) {
37517       GemmMicrokernelTester()
37518         .mr(2)
37519         .nr(2)
37520         .kr(1)
37521         .sr(1)
37522         .m(2)
37523         .n(2)
37524         .k(k)
37525         .ks(3)
37526         .a_offset(13)
37527         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37528     }
37529   }
37530 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,zero)37531   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
37532     for (size_t k = 1; k <= 5; k += 2) {
37533       for (uint32_t mz = 0; mz < 2; mz++) {
37534         GemmMicrokernelTester()
37535           .mr(2)
37536           .nr(2)
37537           .kr(1)
37538           .sr(1)
37539           .m(2)
37540           .n(2)
37541           .k(k)
37542           .ks(3)
37543           .a_offset(13)
37544           .zero_index(mz)
37545           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37546       }
37547     }
37548   }
37549 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)37550   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
37551     GemmMicrokernelTester()
37552       .mr(2)
37553       .nr(2)
37554       .kr(1)
37555       .sr(1)
37556       .m(2)
37557       .n(2)
37558       .k(1)
37559       .qmin(128)
37560       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37561   }
37562 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)37563   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
37564     GemmMicrokernelTester()
37565       .mr(2)
37566       .nr(2)
37567       .kr(1)
37568       .sr(1)
37569       .m(2)
37570       .n(2)
37571       .k(1)
37572       .qmax(128)
37573       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37574   }
37575 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)37576   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
37577     GemmMicrokernelTester()
37578       .mr(2)
37579       .nr(2)
37580       .kr(1)
37581       .sr(1)
37582       .m(2)
37583       .n(2)
37584       .k(1)
37585       .cm_stride(5)
37586       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37587   }
37588 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_a_zero_point)37589   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_a_zero_point) {
37590     for (size_t k = 1; k <= 5; k += 2) {
37591       GemmMicrokernelTester()
37592         .mr(2)
37593         .nr(2)
37594         .kr(1)
37595         .sr(1)
37596         .m(2)
37597         .n(2)
37598         .k(k)
37599         .a_zero_point(0)
37600         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37601     }
37602   }
37603 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_b_zero_point)37604   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_b_zero_point) {
37605     for (size_t k = 1; k <= 5; k += 2) {
37606       GemmMicrokernelTester()
37607         .mr(2)
37608         .nr(2)
37609         .kr(1)
37610         .sr(1)
37611         .m(2)
37612         .n(2)
37613         .k(k)
37614         .b_zero_point(0)
37615         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37616     }
37617   }
37618 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,no_zero_point)37619   TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_zero_point) {
37620     for (size_t k = 1; k <= 5; k += 2) {
37621       GemmMicrokernelTester()
37622         .mr(2)
37623         .nr(2)
37624         .kr(1)
37625         .sr(1)
37626         .m(2)
37627         .n(2)
37628         .k(k)
37629         .a_zero_point(0)
37630         .b_zero_point(0)
37631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37632     }
37633   }
37634 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37635 
37636 
37637 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)37638   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
37639     GemmMicrokernelTester()
37640       .mr(2)
37641       .nr(4)
37642       .kr(1)
37643       .sr(1)
37644       .m(2)
37645       .n(4)
37646       .k(1)
37647       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37648   }
37649 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)37650   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
37651     GemmMicrokernelTester()
37652       .mr(2)
37653       .nr(4)
37654       .kr(1)
37655       .sr(1)
37656       .m(2)
37657       .n(4)
37658       .k(1)
37659       .cn_stride(7)
37660       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37661   }
37662 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)37663   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
37664     for (uint32_t n = 1; n <= 4; n++) {
37665       for (uint32_t m = 1; m <= 2; m++) {
37666         GemmMicrokernelTester()
37667           .mr(2)
37668           .nr(4)
37669           .kr(1)
37670           .sr(1)
37671           .m(m)
37672           .n(n)
37673           .k(1)
37674           .iterations(1)
37675           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37676       }
37677     }
37678   }
37679 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)37680   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
37681     for (uint32_t m = 1; m <= 2; m++) {
37682       GemmMicrokernelTester()
37683         .mr(2)
37684         .nr(4)
37685         .kr(1)
37686         .sr(1)
37687         .m(m)
37688         .n(4)
37689         .k(1)
37690         .iterations(1)
37691         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37692     }
37693   }
37694 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)37695   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
37696     for (uint32_t n = 1; n <= 4; n++) {
37697       GemmMicrokernelTester()
37698         .mr(2)
37699         .nr(4)
37700         .kr(1)
37701         .sr(1)
37702         .m(2)
37703         .n(n)
37704         .k(1)
37705         .iterations(1)
37706         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37707     }
37708   }
37709 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)37710   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
37711     for (size_t k = 2; k < 10; k++) {
37712       GemmMicrokernelTester()
37713         .mr(2)
37714         .nr(4)
37715         .kr(1)
37716         .sr(1)
37717         .m(2)
37718         .n(4)
37719         .k(k)
37720         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37721     }
37722   }
37723 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)37724   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
37725     for (size_t k = 2; k < 10; k++) {
37726       for (uint32_t n = 1; n <= 4; n++) {
37727         for (uint32_t m = 1; m <= 2; m++) {
37728           GemmMicrokernelTester()
37729             .mr(2)
37730             .nr(4)
37731             .kr(1)
37732             .sr(1)
37733             .m(m)
37734             .n(n)
37735             .k(k)
37736             .iterations(1)
37737             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37738         }
37739       }
37740     }
37741   }
37742 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)37743   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
37744     for (uint32_t n = 5; n < 8; n++) {
37745       for (size_t k = 1; k <= 5; k += 2) {
37746         GemmMicrokernelTester()
37747           .mr(2)
37748           .nr(4)
37749           .kr(1)
37750           .sr(1)
37751           .m(2)
37752           .n(n)
37753           .k(k)
37754           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37755       }
37756     }
37757   }
37758 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)37759   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
37760     for (uint32_t n = 5; n < 8; n++) {
37761       for (size_t k = 1; k <= 5; k += 2) {
37762         GemmMicrokernelTester()
37763           .mr(2)
37764           .nr(4)
37765           .kr(1)
37766           .sr(1)
37767           .m(2)
37768           .n(n)
37769           .k(k)
37770           .cn_stride(7)
37771           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37772       }
37773     }
37774   }
37775 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)37776   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
37777     for (uint32_t n = 5; n < 8; n++) {
37778       for (size_t k = 1; k <= 5; k += 2) {
37779         for (uint32_t m = 1; m <= 2; m++) {
37780           GemmMicrokernelTester()
37781             .mr(2)
37782             .nr(4)
37783             .kr(1)
37784             .sr(1)
37785             .m(m)
37786             .n(n)
37787             .k(k)
37788             .iterations(1)
37789             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37790         }
37791       }
37792     }
37793   }
37794 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)37795   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
37796     for (uint32_t n = 8; n <= 12; n += 4) {
37797       for (size_t k = 1; k <= 5; k += 2) {
37798         GemmMicrokernelTester()
37799           .mr(2)
37800           .nr(4)
37801           .kr(1)
37802           .sr(1)
37803           .m(2)
37804           .n(n)
37805           .k(k)
37806           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37807       }
37808     }
37809   }
37810 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)37811   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
37812     for (uint32_t n = 8; n <= 12; n += 4) {
37813       for (size_t k = 1; k <= 5; k += 2) {
37814         GemmMicrokernelTester()
37815           .mr(2)
37816           .nr(4)
37817           .kr(1)
37818           .sr(1)
37819           .m(2)
37820           .n(n)
37821           .k(k)
37822           .cn_stride(7)
37823           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37824       }
37825     }
37826   }
37827 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)37828   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
37829     for (uint32_t n = 8; n <= 12; n += 4) {
37830       for (size_t k = 1; k <= 5; k += 2) {
37831         for (uint32_t m = 1; m <= 2; m++) {
37832           GemmMicrokernelTester()
37833             .mr(2)
37834             .nr(4)
37835             .kr(1)
37836             .sr(1)
37837             .m(m)
37838             .n(n)
37839             .k(k)
37840             .iterations(1)
37841             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37842         }
37843       }
37844     }
37845   }
37846 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel)37847   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
37848     for (size_t k = 1; k <= 5; k += 2) {
37849       GemmMicrokernelTester()
37850         .mr(2)
37851         .nr(4)
37852         .kr(1)
37853         .sr(1)
37854         .m(2)
37855         .n(4)
37856         .k(k)
37857         .ks(3)
37858         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37859     }
37860   }
37861 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel_subtile)37862   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
37863     for (size_t k = 1; k <= 5; k += 2) {
37864       for (uint32_t n = 1; n <= 4; n++) {
37865         for (uint32_t m = 1; m <= 2; m++) {
37866           GemmMicrokernelTester()
37867             .mr(2)
37868             .nr(4)
37869             .kr(1)
37870             .sr(1)
37871             .m(m)
37872             .n(n)
37873             .k(k)
37874             .ks(3)
37875             .iterations(1)
37876             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37877         }
37878       }
37879     }
37880   }
37881 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_small_kernel)37882   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
37883     for (uint32_t n = 5; n < 8; n++) {
37884       for (size_t k = 1; k <= 5; k += 2) {
37885         GemmMicrokernelTester()
37886           .mr(2)
37887           .nr(4)
37888           .kr(1)
37889           .sr(1)
37890           .m(2)
37891           .n(n)
37892           .k(k)
37893           .ks(3)
37894           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37895       }
37896     }
37897   }
37898 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_small_kernel)37899   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
37900     for (uint32_t n = 8; n <= 12; n += 4) {
37901       for (size_t k = 1; k <= 5; k += 2) {
37902         GemmMicrokernelTester()
37903           .mr(2)
37904           .nr(4)
37905           .kr(1)
37906           .sr(1)
37907           .m(2)
37908           .n(n)
37909           .k(k)
37910           .ks(3)
37911           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37912       }
37913     }
37914   }
37915 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)37916   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
37917     for (size_t k = 1; k <= 5; k += 2) {
37918       for (uint32_t n = 1; n <= 4; n++) {
37919         for (uint32_t m = 1; m <= 2; m++) {
37920           GemmMicrokernelTester()
37921             .mr(2)
37922             .nr(4)
37923             .kr(1)
37924             .sr(1)
37925             .m(m)
37926             .n(n)
37927             .k(k)
37928             .cm_stride(7)
37929             .iterations(1)
37930             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37931         }
37932       }
37933     }
37934   }
37935 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,a_offset)37936   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
37937     for (size_t k = 1; k <= 5; k += 2) {
37938       GemmMicrokernelTester()
37939         .mr(2)
37940         .nr(4)
37941         .kr(1)
37942         .sr(1)
37943         .m(2)
37944         .n(4)
37945         .k(k)
37946         .ks(3)
37947         .a_offset(13)
37948         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37949     }
37950   }
37951 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,zero)37952   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
37953     for (size_t k = 1; k <= 5; k += 2) {
37954       for (uint32_t mz = 0; mz < 2; mz++) {
37955         GemmMicrokernelTester()
37956           .mr(2)
37957           .nr(4)
37958           .kr(1)
37959           .sr(1)
37960           .m(2)
37961           .n(4)
37962           .k(k)
37963           .ks(3)
37964           .a_offset(13)
37965           .zero_index(mz)
37966           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37967       }
37968     }
37969   }
37970 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)37971   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
37972     GemmMicrokernelTester()
37973       .mr(2)
37974       .nr(4)
37975       .kr(1)
37976       .sr(1)
37977       .m(2)
37978       .n(4)
37979       .k(1)
37980       .qmin(128)
37981       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37982   }
37983 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)37984   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
37985     GemmMicrokernelTester()
37986       .mr(2)
37987       .nr(4)
37988       .kr(1)
37989       .sr(1)
37990       .m(2)
37991       .n(4)
37992       .k(1)
37993       .qmax(128)
37994       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37995   }
37996 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)37997   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
37998     GemmMicrokernelTester()
37999       .mr(2)
38000       .nr(4)
38001       .kr(1)
38002       .sr(1)
38003       .m(2)
38004       .n(4)
38005       .k(1)
38006       .cm_stride(7)
38007       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38008   }
38009 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_a_zero_point)38010   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_a_zero_point) {
38011     for (size_t k = 1; k <= 5; k += 2) {
38012       GemmMicrokernelTester()
38013         .mr(2)
38014         .nr(4)
38015         .kr(1)
38016         .sr(1)
38017         .m(2)
38018         .n(4)
38019         .k(k)
38020         .a_zero_point(0)
38021         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38022     }
38023   }
38024 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_b_zero_point)38025   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_b_zero_point) {
38026     for (size_t k = 1; k <= 5; k += 2) {
38027       GemmMicrokernelTester()
38028         .mr(2)
38029         .nr(4)
38030         .kr(1)
38031         .sr(1)
38032         .m(2)
38033         .n(4)
38034         .k(k)
38035         .b_zero_point(0)
38036         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38037     }
38038   }
38039 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,no_zero_point)38040   TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_zero_point) {
38041     for (size_t k = 1; k <= 5; k += 2) {
38042       GemmMicrokernelTester()
38043         .mr(2)
38044         .nr(4)
38045         .kr(1)
38046         .sr(1)
38047         .m(2)
38048         .n(4)
38049         .k(k)
38050         .a_zero_point(0)
38051         .b_zero_point(0)
38052         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38053     }
38054   }
38055 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38056 
38057 
38058 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)38059   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
38060     GemmMicrokernelTester()
38061       .mr(4)
38062       .nr(2)
38063       .kr(1)
38064       .sr(1)
38065       .m(4)
38066       .n(2)
38067       .k(1)
38068       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38069   }
38070 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)38071   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
38072     GemmMicrokernelTester()
38073       .mr(4)
38074       .nr(2)
38075       .kr(1)
38076       .sr(1)
38077       .m(4)
38078       .n(2)
38079       .k(1)
38080       .cn_stride(5)
38081       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38082   }
38083 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)38084   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
38085     for (uint32_t n = 1; n <= 2; n++) {
38086       for (uint32_t m = 1; m <= 4; m++) {
38087         GemmMicrokernelTester()
38088           .mr(4)
38089           .nr(2)
38090           .kr(1)
38091           .sr(1)
38092           .m(m)
38093           .n(n)
38094           .k(1)
38095           .iterations(1)
38096           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38097       }
38098     }
38099   }
38100 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)38101   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
38102     for (uint32_t m = 1; m <= 4; m++) {
38103       GemmMicrokernelTester()
38104         .mr(4)
38105         .nr(2)
38106         .kr(1)
38107         .sr(1)
38108         .m(m)
38109         .n(2)
38110         .k(1)
38111         .iterations(1)
38112         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38113     }
38114   }
38115 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)38116   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
38117     for (uint32_t n = 1; n <= 2; n++) {
38118       GemmMicrokernelTester()
38119         .mr(4)
38120         .nr(2)
38121         .kr(1)
38122         .sr(1)
38123         .m(4)
38124         .n(n)
38125         .k(1)
38126         .iterations(1)
38127         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38128     }
38129   }
38130 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)38131   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
38132     for (size_t k = 2; k < 10; k++) {
38133       GemmMicrokernelTester()
38134         .mr(4)
38135         .nr(2)
38136         .kr(1)
38137         .sr(1)
38138         .m(4)
38139         .n(2)
38140         .k(k)
38141         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38142     }
38143   }
38144 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)38145   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
38146     for (size_t k = 2; k < 10; k++) {
38147       for (uint32_t n = 1; n <= 2; n++) {
38148         for (uint32_t m = 1; m <= 4; m++) {
38149           GemmMicrokernelTester()
38150             .mr(4)
38151             .nr(2)
38152             .kr(1)
38153             .sr(1)
38154             .m(m)
38155             .n(n)
38156             .k(k)
38157             .iterations(1)
38158             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38159         }
38160       }
38161     }
38162   }
38163 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)38164   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
38165     for (uint32_t n = 3; n < 4; n++) {
38166       for (size_t k = 1; k <= 5; k += 2) {
38167         GemmMicrokernelTester()
38168           .mr(4)
38169           .nr(2)
38170           .kr(1)
38171           .sr(1)
38172           .m(4)
38173           .n(n)
38174           .k(k)
38175           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38176       }
38177     }
38178   }
38179 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)38180   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
38181     for (uint32_t n = 3; n < 4; n++) {
38182       for (size_t k = 1; k <= 5; k += 2) {
38183         GemmMicrokernelTester()
38184           .mr(4)
38185           .nr(2)
38186           .kr(1)
38187           .sr(1)
38188           .m(4)
38189           .n(n)
38190           .k(k)
38191           .cn_stride(5)
38192           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38193       }
38194     }
38195   }
38196 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)38197   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
38198     for (uint32_t n = 3; n < 4; n++) {
38199       for (size_t k = 1; k <= 5; k += 2) {
38200         for (uint32_t m = 1; m <= 4; m++) {
38201           GemmMicrokernelTester()
38202             .mr(4)
38203             .nr(2)
38204             .kr(1)
38205             .sr(1)
38206             .m(m)
38207             .n(n)
38208             .k(k)
38209             .iterations(1)
38210             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38211         }
38212       }
38213     }
38214   }
38215 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)38216   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
38217     for (uint32_t n = 4; n <= 6; n += 2) {
38218       for (size_t k = 1; k <= 5; k += 2) {
38219         GemmMicrokernelTester()
38220           .mr(4)
38221           .nr(2)
38222           .kr(1)
38223           .sr(1)
38224           .m(4)
38225           .n(n)
38226           .k(k)
38227           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38228       }
38229     }
38230   }
38231 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)38232   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
38233     for (uint32_t n = 4; n <= 6; n += 2) {
38234       for (size_t k = 1; k <= 5; k += 2) {
38235         GemmMicrokernelTester()
38236           .mr(4)
38237           .nr(2)
38238           .kr(1)
38239           .sr(1)
38240           .m(4)
38241           .n(n)
38242           .k(k)
38243           .cn_stride(5)
38244           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38245       }
38246     }
38247   }
38248 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)38249   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
38250     for (uint32_t n = 4; n <= 6; n += 2) {
38251       for (size_t k = 1; k <= 5; k += 2) {
38252         for (uint32_t m = 1; m <= 4; m++) {
38253           GemmMicrokernelTester()
38254             .mr(4)
38255             .nr(2)
38256             .kr(1)
38257             .sr(1)
38258             .m(m)
38259             .n(n)
38260             .k(k)
38261             .iterations(1)
38262             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38263         }
38264       }
38265     }
38266   }
38267 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)38268   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
38269     for (size_t k = 1; k <= 5; k += 2) {
38270       GemmMicrokernelTester()
38271         .mr(4)
38272         .nr(2)
38273         .kr(1)
38274         .sr(1)
38275         .m(4)
38276         .n(2)
38277         .k(k)
38278         .ks(3)
38279         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38280     }
38281   }
38282 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)38283   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
38284     for (size_t k = 1; k <= 5; k += 2) {
38285       for (uint32_t n = 1; n <= 2; n++) {
38286         for (uint32_t m = 1; m <= 4; m++) {
38287           GemmMicrokernelTester()
38288             .mr(4)
38289             .nr(2)
38290             .kr(1)
38291             .sr(1)
38292             .m(m)
38293             .n(n)
38294             .k(k)
38295             .ks(3)
38296             .iterations(1)
38297             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38298         }
38299       }
38300     }
38301   }
38302 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)38303   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
38304     for (uint32_t n = 3; n < 4; n++) {
38305       for (size_t k = 1; k <= 5; k += 2) {
38306         GemmMicrokernelTester()
38307           .mr(4)
38308           .nr(2)
38309           .kr(1)
38310           .sr(1)
38311           .m(4)
38312           .n(n)
38313           .k(k)
38314           .ks(3)
38315           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38316       }
38317     }
38318   }
38319 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)38320   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
38321     for (uint32_t n = 4; n <= 6; n += 2) {
38322       for (size_t k = 1; k <= 5; k += 2) {
38323         GemmMicrokernelTester()
38324           .mr(4)
38325           .nr(2)
38326           .kr(1)
38327           .sr(1)
38328           .m(4)
38329           .n(n)
38330           .k(k)
38331           .ks(3)
38332           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38333       }
38334     }
38335   }
38336 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)38337   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
38338     for (size_t k = 1; k <= 5; k += 2) {
38339       for (uint32_t n = 1; n <= 2; n++) {
38340         for (uint32_t m = 1; m <= 4; m++) {
38341           GemmMicrokernelTester()
38342             .mr(4)
38343             .nr(2)
38344             .kr(1)
38345             .sr(1)
38346             .m(m)
38347             .n(n)
38348             .k(k)
38349             .cm_stride(5)
38350             .iterations(1)
38351             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38352         }
38353       }
38354     }
38355   }
38356 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)38357   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
38358     for (size_t k = 1; k <= 5; k += 2) {
38359       GemmMicrokernelTester()
38360         .mr(4)
38361         .nr(2)
38362         .kr(1)
38363         .sr(1)
38364         .m(4)
38365         .n(2)
38366         .k(k)
38367         .ks(3)
38368         .a_offset(23)
38369         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38370     }
38371   }
38372 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)38373   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
38374     for (size_t k = 1; k <= 5; k += 2) {
38375       for (uint32_t mz = 0; mz < 4; mz++) {
38376         GemmMicrokernelTester()
38377           .mr(4)
38378           .nr(2)
38379           .kr(1)
38380           .sr(1)
38381           .m(4)
38382           .n(2)
38383           .k(k)
38384           .ks(3)
38385           .a_offset(23)
38386           .zero_index(mz)
38387           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38388       }
38389     }
38390   }
38391 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)38392   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
38393     GemmMicrokernelTester()
38394       .mr(4)
38395       .nr(2)
38396       .kr(1)
38397       .sr(1)
38398       .m(4)
38399       .n(2)
38400       .k(1)
38401       .qmin(128)
38402       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38403   }
38404 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)38405   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
38406     GemmMicrokernelTester()
38407       .mr(4)
38408       .nr(2)
38409       .kr(1)
38410       .sr(1)
38411       .m(4)
38412       .n(2)
38413       .k(1)
38414       .qmax(128)
38415       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38416   }
38417 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)38418   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
38419     GemmMicrokernelTester()
38420       .mr(4)
38421       .nr(2)
38422       .kr(1)
38423       .sr(1)
38424       .m(4)
38425       .n(2)
38426       .k(1)
38427       .cm_stride(5)
38428       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38429   }
38430 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_a_zero_point)38431   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_a_zero_point) {
38432     for (size_t k = 1; k <= 5; k += 2) {
38433       GemmMicrokernelTester()
38434         .mr(4)
38435         .nr(2)
38436         .kr(1)
38437         .sr(1)
38438         .m(4)
38439         .n(2)
38440         .k(k)
38441         .a_zero_point(0)
38442         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38443     }
38444   }
38445 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_b_zero_point)38446   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_b_zero_point) {
38447     for (size_t k = 1; k <= 5; k += 2) {
38448       GemmMicrokernelTester()
38449         .mr(4)
38450         .nr(2)
38451         .kr(1)
38452         .sr(1)
38453         .m(4)
38454         .n(2)
38455         .k(k)
38456         .b_zero_point(0)
38457         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38458     }
38459   }
38460 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,no_zero_point)38461   TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_zero_point) {
38462     for (size_t k = 1; k <= 5; k += 2) {
38463       GemmMicrokernelTester()
38464         .mr(4)
38465         .nr(2)
38466         .kr(1)
38467         .sr(1)
38468         .m(4)
38469         .n(2)
38470         .k(k)
38471         .a_zero_point(0)
38472         .b_zero_point(0)
38473         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38474     }
38475   }
38476 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38477 
38478 
38479 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)38480   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
38481     GemmMicrokernelTester()
38482       .mr(4)
38483       .nr(4)
38484       .kr(1)
38485       .sr(1)
38486       .m(4)
38487       .n(4)
38488       .k(1)
38489       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38490   }
38491 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)38492   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
38493     GemmMicrokernelTester()
38494       .mr(4)
38495       .nr(4)
38496       .kr(1)
38497       .sr(1)
38498       .m(4)
38499       .n(4)
38500       .k(1)
38501       .cn_stride(7)
38502       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38503   }
38504 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)38505   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
38506     for (uint32_t n = 1; n <= 4; n++) {
38507       for (uint32_t m = 1; m <= 4; m++) {
38508         GemmMicrokernelTester()
38509           .mr(4)
38510           .nr(4)
38511           .kr(1)
38512           .sr(1)
38513           .m(m)
38514           .n(n)
38515           .k(1)
38516           .iterations(1)
38517           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38518       }
38519     }
38520   }
38521 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)38522   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
38523     for (uint32_t m = 1; m <= 4; m++) {
38524       GemmMicrokernelTester()
38525         .mr(4)
38526         .nr(4)
38527         .kr(1)
38528         .sr(1)
38529         .m(m)
38530         .n(4)
38531         .k(1)
38532         .iterations(1)
38533         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38534     }
38535   }
38536 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)38537   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
38538     for (uint32_t n = 1; n <= 4; n++) {
38539       GemmMicrokernelTester()
38540         .mr(4)
38541         .nr(4)
38542         .kr(1)
38543         .sr(1)
38544         .m(4)
38545         .n(n)
38546         .k(1)
38547         .iterations(1)
38548         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38549     }
38550   }
38551 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)38552   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
38553     for (size_t k = 2; k < 10; k++) {
38554       GemmMicrokernelTester()
38555         .mr(4)
38556         .nr(4)
38557         .kr(1)
38558         .sr(1)
38559         .m(4)
38560         .n(4)
38561         .k(k)
38562         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38563     }
38564   }
38565 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)38566   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
38567     for (size_t k = 2; k < 10; k++) {
38568       for (uint32_t n = 1; n <= 4; n++) {
38569         for (uint32_t m = 1; m <= 4; m++) {
38570           GemmMicrokernelTester()
38571             .mr(4)
38572             .nr(4)
38573             .kr(1)
38574             .sr(1)
38575             .m(m)
38576             .n(n)
38577             .k(k)
38578             .iterations(1)
38579             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38580         }
38581       }
38582     }
38583   }
38584 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)38585   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
38586     for (uint32_t n = 5; n < 8; n++) {
38587       for (size_t k = 1; k <= 5; k += 2) {
38588         GemmMicrokernelTester()
38589           .mr(4)
38590           .nr(4)
38591           .kr(1)
38592           .sr(1)
38593           .m(4)
38594           .n(n)
38595           .k(k)
38596           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38597       }
38598     }
38599   }
38600 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)38601   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
38602     for (uint32_t n = 5; n < 8; n++) {
38603       for (size_t k = 1; k <= 5; k += 2) {
38604         GemmMicrokernelTester()
38605           .mr(4)
38606           .nr(4)
38607           .kr(1)
38608           .sr(1)
38609           .m(4)
38610           .n(n)
38611           .k(k)
38612           .cn_stride(7)
38613           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38614       }
38615     }
38616   }
38617 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)38618   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
38619     for (uint32_t n = 5; n < 8; n++) {
38620       for (size_t k = 1; k <= 5; k += 2) {
38621         for (uint32_t m = 1; m <= 4; m++) {
38622           GemmMicrokernelTester()
38623             .mr(4)
38624             .nr(4)
38625             .kr(1)
38626             .sr(1)
38627             .m(m)
38628             .n(n)
38629             .k(k)
38630             .iterations(1)
38631             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38632         }
38633       }
38634     }
38635   }
38636 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)38637   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
38638     for (uint32_t n = 8; n <= 12; n += 4) {
38639       for (size_t k = 1; k <= 5; k += 2) {
38640         GemmMicrokernelTester()
38641           .mr(4)
38642           .nr(4)
38643           .kr(1)
38644           .sr(1)
38645           .m(4)
38646           .n(n)
38647           .k(k)
38648           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38649       }
38650     }
38651   }
38652 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)38653   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
38654     for (uint32_t n = 8; n <= 12; n += 4) {
38655       for (size_t k = 1; k <= 5; k += 2) {
38656         GemmMicrokernelTester()
38657           .mr(4)
38658           .nr(4)
38659           .kr(1)
38660           .sr(1)
38661           .m(4)
38662           .n(n)
38663           .k(k)
38664           .cn_stride(7)
38665           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38666       }
38667     }
38668   }
38669 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)38670   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
38671     for (uint32_t n = 8; n <= 12; n += 4) {
38672       for (size_t k = 1; k <= 5; k += 2) {
38673         for (uint32_t m = 1; m <= 4; m++) {
38674           GemmMicrokernelTester()
38675             .mr(4)
38676             .nr(4)
38677             .kr(1)
38678             .sr(1)
38679             .m(m)
38680             .n(n)
38681             .k(k)
38682             .iterations(1)
38683             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38684         }
38685       }
38686     }
38687   }
38688 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel)38689   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
38690     for (size_t k = 1; k <= 5; k += 2) {
38691       GemmMicrokernelTester()
38692         .mr(4)
38693         .nr(4)
38694         .kr(1)
38695         .sr(1)
38696         .m(4)
38697         .n(4)
38698         .k(k)
38699         .ks(3)
38700         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38701     }
38702   }
38703 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel_subtile)38704   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
38705     for (size_t k = 1; k <= 5; k += 2) {
38706       for (uint32_t n = 1; n <= 4; n++) {
38707         for (uint32_t m = 1; m <= 4; m++) {
38708           GemmMicrokernelTester()
38709             .mr(4)
38710             .nr(4)
38711             .kr(1)
38712             .sr(1)
38713             .m(m)
38714             .n(n)
38715             .k(k)
38716             .ks(3)
38717             .iterations(1)
38718             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38719         }
38720       }
38721     }
38722   }
38723 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_small_kernel)38724   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
38725     for (uint32_t n = 5; n < 8; n++) {
38726       for (size_t k = 1; k <= 5; k += 2) {
38727         GemmMicrokernelTester()
38728           .mr(4)
38729           .nr(4)
38730           .kr(1)
38731           .sr(1)
38732           .m(4)
38733           .n(n)
38734           .k(k)
38735           .ks(3)
38736           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38737       }
38738     }
38739   }
38740 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_small_kernel)38741   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
38742     for (uint32_t n = 8; n <= 12; n += 4) {
38743       for (size_t k = 1; k <= 5; k += 2) {
38744         GemmMicrokernelTester()
38745           .mr(4)
38746           .nr(4)
38747           .kr(1)
38748           .sr(1)
38749           .m(4)
38750           .n(n)
38751           .k(k)
38752           .ks(3)
38753           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38754       }
38755     }
38756   }
38757 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)38758   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
38759     for (size_t k = 1; k <= 5; k += 2) {
38760       for (uint32_t n = 1; n <= 4; n++) {
38761         for (uint32_t m = 1; m <= 4; m++) {
38762           GemmMicrokernelTester()
38763             .mr(4)
38764             .nr(4)
38765             .kr(1)
38766             .sr(1)
38767             .m(m)
38768             .n(n)
38769             .k(k)
38770             .cm_stride(7)
38771             .iterations(1)
38772             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38773         }
38774       }
38775     }
38776   }
38777 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,a_offset)38778   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
38779     for (size_t k = 1; k <= 5; k += 2) {
38780       GemmMicrokernelTester()
38781         .mr(4)
38782         .nr(4)
38783         .kr(1)
38784         .sr(1)
38785         .m(4)
38786         .n(4)
38787         .k(k)
38788         .ks(3)
38789         .a_offset(23)
38790         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38791     }
38792   }
38793 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,zero)38794   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
38795     for (size_t k = 1; k <= 5; k += 2) {
38796       for (uint32_t mz = 0; mz < 4; mz++) {
38797         GemmMicrokernelTester()
38798           .mr(4)
38799           .nr(4)
38800           .kr(1)
38801           .sr(1)
38802           .m(4)
38803           .n(4)
38804           .k(k)
38805           .ks(3)
38806           .a_offset(23)
38807           .zero_index(mz)
38808           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38809       }
38810     }
38811   }
38812 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)38813   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
38814     GemmMicrokernelTester()
38815       .mr(4)
38816       .nr(4)
38817       .kr(1)
38818       .sr(1)
38819       .m(4)
38820       .n(4)
38821       .k(1)
38822       .qmin(128)
38823       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38824   }
38825 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)38826   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
38827     GemmMicrokernelTester()
38828       .mr(4)
38829       .nr(4)
38830       .kr(1)
38831       .sr(1)
38832       .m(4)
38833       .n(4)
38834       .k(1)
38835       .qmax(128)
38836       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38837   }
38838 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)38839   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
38840     GemmMicrokernelTester()
38841       .mr(4)
38842       .nr(4)
38843       .kr(1)
38844       .sr(1)
38845       .m(4)
38846       .n(4)
38847       .k(1)
38848       .cm_stride(7)
38849       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38850   }
38851 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_a_zero_point)38852   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_a_zero_point) {
38853     for (size_t k = 1; k <= 5; k += 2) {
38854       GemmMicrokernelTester()
38855         .mr(4)
38856         .nr(4)
38857         .kr(1)
38858         .sr(1)
38859         .m(4)
38860         .n(4)
38861         .k(k)
38862         .a_zero_point(0)
38863         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38864     }
38865   }
38866 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_b_zero_point)38867   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_b_zero_point) {
38868     for (size_t k = 1; k <= 5; k += 2) {
38869       GemmMicrokernelTester()
38870         .mr(4)
38871         .nr(4)
38872         .kr(1)
38873         .sr(1)
38874         .m(4)
38875         .n(4)
38876         .k(k)
38877         .b_zero_point(0)
38878         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38879     }
38880   }
38881 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,no_zero_point)38882   TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_zero_point) {
38883     for (size_t k = 1; k <= 5; k += 2) {
38884       GemmMicrokernelTester()
38885         .mr(4)
38886         .nr(4)
38887         .kr(1)
38888         .sr(1)
38889         .m(4)
38890         .n(4)
38891         .k(k)
38892         .a_zero_point(0)
38893         .b_zero_point(0)
38894         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38895     }
38896   }
38897 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38898 
38899 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)38900 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
38901   GemmMicrokernelTester()
38902     .mr(1)
38903     .nr(2)
38904     .kr(1)
38905     .sr(1)
38906     .m(1)
38907     .n(2)
38908     .k(1)
38909     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38910 }
38911 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)38912 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
38913   GemmMicrokernelTester()
38914     .mr(1)
38915     .nr(2)
38916     .kr(1)
38917     .sr(1)
38918     .m(1)
38919     .n(2)
38920     .k(1)
38921     .cn_stride(5)
38922     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38923 }
38924 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)38925 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
38926   for (uint32_t n = 1; n <= 2; n++) {
38927     for (uint32_t m = 1; m <= 1; m++) {
38928       GemmMicrokernelTester()
38929         .mr(1)
38930         .nr(2)
38931         .kr(1)
38932         .sr(1)
38933         .m(m)
38934         .n(n)
38935         .k(1)
38936         .iterations(1)
38937         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38938     }
38939   }
38940 }
38941 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)38942 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
38943   for (uint32_t m = 1; m <= 1; m++) {
38944     GemmMicrokernelTester()
38945       .mr(1)
38946       .nr(2)
38947       .kr(1)
38948       .sr(1)
38949       .m(m)
38950       .n(2)
38951       .k(1)
38952       .iterations(1)
38953       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38954   }
38955 }
38956 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)38957 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
38958   for (uint32_t n = 1; n <= 2; n++) {
38959     GemmMicrokernelTester()
38960       .mr(1)
38961       .nr(2)
38962       .kr(1)
38963       .sr(1)
38964       .m(1)
38965       .n(n)
38966       .k(1)
38967       .iterations(1)
38968       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38969   }
38970 }
38971 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)38972 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
38973   for (size_t k = 2; k < 10; k++) {
38974     GemmMicrokernelTester()
38975       .mr(1)
38976       .nr(2)
38977       .kr(1)
38978       .sr(1)
38979       .m(1)
38980       .n(2)
38981       .k(k)
38982       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
38983   }
38984 }
38985 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)38986 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
38987   for (size_t k = 2; k < 10; k++) {
38988     for (uint32_t n = 1; n <= 2; n++) {
38989       for (uint32_t m = 1; m <= 1; m++) {
38990         GemmMicrokernelTester()
38991           .mr(1)
38992           .nr(2)
38993           .kr(1)
38994           .sr(1)
38995           .m(m)
38996           .n(n)
38997           .k(k)
38998           .iterations(1)
38999           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39000       }
39001     }
39002   }
39003 }
39004 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)39005 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
39006   for (uint32_t n = 3; n < 4; n++) {
39007     for (size_t k = 1; k <= 5; k += 2) {
39008       GemmMicrokernelTester()
39009         .mr(1)
39010         .nr(2)
39011         .kr(1)
39012         .sr(1)
39013         .m(1)
39014         .n(n)
39015         .k(k)
39016         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39017     }
39018   }
39019 }
39020 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)39021 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
39022   for (uint32_t n = 3; n < 4; n++) {
39023     for (size_t k = 1; k <= 5; k += 2) {
39024       GemmMicrokernelTester()
39025         .mr(1)
39026         .nr(2)
39027         .kr(1)
39028         .sr(1)
39029         .m(1)
39030         .n(n)
39031         .k(k)
39032         .cn_stride(5)
39033         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39034     }
39035   }
39036 }
39037 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)39038 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
39039   for (uint32_t n = 3; n < 4; n++) {
39040     for (size_t k = 1; k <= 5; k += 2) {
39041       for (uint32_t m = 1; m <= 1; m++) {
39042         GemmMicrokernelTester()
39043           .mr(1)
39044           .nr(2)
39045           .kr(1)
39046           .sr(1)
39047           .m(m)
39048           .n(n)
39049           .k(k)
39050           .iterations(1)
39051           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39052       }
39053     }
39054   }
39055 }
39056 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)39057 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
39058   for (uint32_t n = 4; n <= 6; n += 2) {
39059     for (size_t k = 1; k <= 5; k += 2) {
39060       GemmMicrokernelTester()
39061         .mr(1)
39062         .nr(2)
39063         .kr(1)
39064         .sr(1)
39065         .m(1)
39066         .n(n)
39067         .k(k)
39068         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39069     }
39070   }
39071 }
39072 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)39073 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
39074   for (uint32_t n = 4; n <= 6; n += 2) {
39075     for (size_t k = 1; k <= 5; k += 2) {
39076       GemmMicrokernelTester()
39077         .mr(1)
39078         .nr(2)
39079         .kr(1)
39080         .sr(1)
39081         .m(1)
39082         .n(n)
39083         .k(k)
39084         .cn_stride(5)
39085         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39086     }
39087   }
39088 }
39089 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)39090 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
39091   for (uint32_t n = 4; n <= 6; n += 2) {
39092     for (size_t k = 1; k <= 5; k += 2) {
39093       for (uint32_t m = 1; m <= 1; m++) {
39094         GemmMicrokernelTester()
39095           .mr(1)
39096           .nr(2)
39097           .kr(1)
39098           .sr(1)
39099           .m(m)
39100           .n(n)
39101           .k(k)
39102           .iterations(1)
39103           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39104       }
39105     }
39106   }
39107 }
39108 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel)39109 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel) {
39110   for (size_t k = 1; k <= 5; k += 2) {
39111     GemmMicrokernelTester()
39112       .mr(1)
39113       .nr(2)
39114       .kr(1)
39115       .sr(1)
39116       .m(1)
39117       .n(2)
39118       .k(k)
39119       .ks(3)
39120       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39121   }
39122 }
39123 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel_subtile)39124 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel_subtile) {
39125   for (size_t k = 1; k <= 5; k += 2) {
39126     for (uint32_t n = 1; n <= 2; n++) {
39127       for (uint32_t m = 1; m <= 1; m++) {
39128         GemmMicrokernelTester()
39129           .mr(1)
39130           .nr(2)
39131           .kr(1)
39132           .sr(1)
39133           .m(m)
39134           .n(n)
39135           .k(k)
39136           .ks(3)
39137           .iterations(1)
39138           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39139       }
39140     }
39141   }
39142 }
39143 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_small_kernel)39144 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
39145   for (uint32_t n = 3; n < 4; n++) {
39146     for (size_t k = 1; k <= 5; k += 2) {
39147       GemmMicrokernelTester()
39148         .mr(1)
39149         .nr(2)
39150         .kr(1)
39151         .sr(1)
39152         .m(1)
39153         .n(n)
39154         .k(k)
39155         .ks(3)
39156         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39157     }
39158   }
39159 }
39160 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_small_kernel)39161 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
39162   for (uint32_t n = 4; n <= 6; n += 2) {
39163     for (size_t k = 1; k <= 5; k += 2) {
39164       GemmMicrokernelTester()
39165         .mr(1)
39166         .nr(2)
39167         .kr(1)
39168         .sr(1)
39169         .m(1)
39170         .n(n)
39171         .k(k)
39172         .ks(3)
39173         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39174     }
39175   }
39176 }
39177 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)39178 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
39179   for (size_t k = 1; k <= 5; k += 2) {
39180     for (uint32_t n = 1; n <= 2; n++) {
39181       for (uint32_t m = 1; m <= 1; m++) {
39182         GemmMicrokernelTester()
39183           .mr(1)
39184           .nr(2)
39185           .kr(1)
39186           .sr(1)
39187           .m(m)
39188           .n(n)
39189           .k(k)
39190           .cm_stride(5)
39191           .iterations(1)
39192           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39193       }
39194     }
39195   }
39196 }
39197 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,a_offset)39198 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, a_offset) {
39199   for (size_t k = 1; k <= 5; k += 2) {
39200     GemmMicrokernelTester()
39201       .mr(1)
39202       .nr(2)
39203       .kr(1)
39204       .sr(1)
39205       .m(1)
39206       .n(2)
39207       .k(k)
39208       .ks(3)
39209       .a_offset(7)
39210       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39211   }
39212 }
39213 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,zero)39214 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, zero) {
39215   for (size_t k = 1; k <= 5; k += 2) {
39216     for (uint32_t mz = 0; mz < 1; mz++) {
39217       GemmMicrokernelTester()
39218         .mr(1)
39219         .nr(2)
39220         .kr(1)
39221         .sr(1)
39222         .m(1)
39223         .n(2)
39224         .k(k)
39225         .ks(3)
39226         .a_offset(7)
39227         .zero_index(mz)
39228         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39229     }
39230   }
39231 }
39232 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)39233 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
39234   GemmMicrokernelTester()
39235     .mr(1)
39236     .nr(2)
39237     .kr(1)
39238     .sr(1)
39239     .m(1)
39240     .n(2)
39241     .k(1)
39242     .qmin(128)
39243     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39244 }
39245 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)39246 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
39247   GemmMicrokernelTester()
39248     .mr(1)
39249     .nr(2)
39250     .kr(1)
39251     .sr(1)
39252     .m(1)
39253     .n(2)
39254     .k(1)
39255     .qmax(128)
39256     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39257 }
39258 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)39259 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
39260   GemmMicrokernelTester()
39261     .mr(1)
39262     .nr(2)
39263     .kr(1)
39264     .sr(1)
39265     .m(1)
39266     .n(2)
39267     .k(1)
39268     .cm_stride(5)
39269     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39270 }
39271 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_a_zero_point)39272 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_a_zero_point) {
39273   for (size_t k = 1; k <= 5; k += 2) {
39274     GemmMicrokernelTester()
39275       .mr(1)
39276       .nr(2)
39277       .kr(1)
39278       .sr(1)
39279       .m(1)
39280       .n(2)
39281       .k(k)
39282       .a_zero_point(0)
39283       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39284   }
39285 }
39286 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_b_zero_point)39287 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_b_zero_point) {
39288   for (size_t k = 1; k <= 5; k += 2) {
39289     GemmMicrokernelTester()
39290       .mr(1)
39291       .nr(2)
39292       .kr(1)
39293       .sr(1)
39294       .m(1)
39295       .n(2)
39296       .k(k)
39297       .b_zero_point(0)
39298       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39299   }
39300 }
39301 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,no_zero_point)39302 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_zero_point) {
39303   for (size_t k = 1; k <= 5; k += 2) {
39304     GemmMicrokernelTester()
39305       .mr(1)
39306       .nr(2)
39307       .kr(1)
39308       .sr(1)
39309       .m(1)
39310       .n(2)
39311       .k(k)
39312       .a_zero_point(0)
39313       .b_zero_point(0)
39314       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39315   }
39316 }
39317 
39318 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)39319 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
39320   GemmMicrokernelTester()
39321     .mr(1)
39322     .nr(2)
39323     .kr(1)
39324     .sr(1)
39325     .m(1)
39326     .n(2)
39327     .k(1)
39328     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39329 }
39330 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)39331 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
39332   GemmMicrokernelTester()
39333     .mr(1)
39334     .nr(2)
39335     .kr(1)
39336     .sr(1)
39337     .m(1)
39338     .n(2)
39339     .k(1)
39340     .cn_stride(5)
39341     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39342 }
39343 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)39344 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
39345   for (uint32_t n = 1; n <= 2; n++) {
39346     for (uint32_t m = 1; m <= 1; m++) {
39347       GemmMicrokernelTester()
39348         .mr(1)
39349         .nr(2)
39350         .kr(1)
39351         .sr(1)
39352         .m(m)
39353         .n(n)
39354         .k(1)
39355         .iterations(1)
39356         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39357     }
39358   }
39359 }
39360 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)39361 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
39362   for (uint32_t m = 1; m <= 1; m++) {
39363     GemmMicrokernelTester()
39364       .mr(1)
39365       .nr(2)
39366       .kr(1)
39367       .sr(1)
39368       .m(m)
39369       .n(2)
39370       .k(1)
39371       .iterations(1)
39372       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39373   }
39374 }
39375 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)39376 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
39377   for (uint32_t n = 1; n <= 2; n++) {
39378     GemmMicrokernelTester()
39379       .mr(1)
39380       .nr(2)
39381       .kr(1)
39382       .sr(1)
39383       .m(1)
39384       .n(n)
39385       .k(1)
39386       .iterations(1)
39387       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39388   }
39389 }
39390 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)39391 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
39392   for (size_t k = 2; k < 10; k++) {
39393     GemmMicrokernelTester()
39394       .mr(1)
39395       .nr(2)
39396       .kr(1)
39397       .sr(1)
39398       .m(1)
39399       .n(2)
39400       .k(k)
39401       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39402   }
39403 }
39404 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)39405 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
39406   for (size_t k = 2; k < 10; k++) {
39407     for (uint32_t n = 1; n <= 2; n++) {
39408       for (uint32_t m = 1; m <= 1; m++) {
39409         GemmMicrokernelTester()
39410           .mr(1)
39411           .nr(2)
39412           .kr(1)
39413           .sr(1)
39414           .m(m)
39415           .n(n)
39416           .k(k)
39417           .iterations(1)
39418           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39419       }
39420     }
39421   }
39422 }
39423 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)39424 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
39425   for (uint32_t n = 3; n < 4; n++) {
39426     for (size_t k = 1; k <= 5; k += 2) {
39427       GemmMicrokernelTester()
39428         .mr(1)
39429         .nr(2)
39430         .kr(1)
39431         .sr(1)
39432         .m(1)
39433         .n(n)
39434         .k(k)
39435         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39436     }
39437   }
39438 }
39439 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)39440 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
39441   for (uint32_t n = 3; n < 4; n++) {
39442     for (size_t k = 1; k <= 5; k += 2) {
39443       GemmMicrokernelTester()
39444         .mr(1)
39445         .nr(2)
39446         .kr(1)
39447         .sr(1)
39448         .m(1)
39449         .n(n)
39450         .k(k)
39451         .cn_stride(5)
39452         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39453     }
39454   }
39455 }
39456 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)39457 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
39458   for (uint32_t n = 3; n < 4; n++) {
39459     for (size_t k = 1; k <= 5; k += 2) {
39460       for (uint32_t m = 1; m <= 1; m++) {
39461         GemmMicrokernelTester()
39462           .mr(1)
39463           .nr(2)
39464           .kr(1)
39465           .sr(1)
39466           .m(m)
39467           .n(n)
39468           .k(k)
39469           .iterations(1)
39470           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39471       }
39472     }
39473   }
39474 }
39475 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)39476 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
39477   for (uint32_t n = 4; n <= 6; n += 2) {
39478     for (size_t k = 1; k <= 5; k += 2) {
39479       GemmMicrokernelTester()
39480         .mr(1)
39481         .nr(2)
39482         .kr(1)
39483         .sr(1)
39484         .m(1)
39485         .n(n)
39486         .k(k)
39487         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39488     }
39489   }
39490 }
39491 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)39492 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
39493   for (uint32_t n = 4; n <= 6; n += 2) {
39494     for (size_t k = 1; k <= 5; k += 2) {
39495       GemmMicrokernelTester()
39496         .mr(1)
39497         .nr(2)
39498         .kr(1)
39499         .sr(1)
39500         .m(1)
39501         .n(n)
39502         .k(k)
39503         .cn_stride(5)
39504         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39505     }
39506   }
39507 }
39508 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)39509 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
39510   for (uint32_t n = 4; n <= 6; n += 2) {
39511     for (size_t k = 1; k <= 5; k += 2) {
39512       for (uint32_t m = 1; m <= 1; m++) {
39513         GemmMicrokernelTester()
39514           .mr(1)
39515           .nr(2)
39516           .kr(1)
39517           .sr(1)
39518           .m(m)
39519           .n(n)
39520           .k(k)
39521           .iterations(1)
39522           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39523       }
39524     }
39525   }
39526 }
39527 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel)39528 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
39529   for (size_t k = 1; k <= 5; k += 2) {
39530     GemmMicrokernelTester()
39531       .mr(1)
39532       .nr(2)
39533       .kr(1)
39534       .sr(1)
39535       .m(1)
39536       .n(2)
39537       .k(k)
39538       .ks(3)
39539       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39540   }
39541 }
39542 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel_subtile)39543 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
39544   for (size_t k = 1; k <= 5; k += 2) {
39545     for (uint32_t n = 1; n <= 2; n++) {
39546       for (uint32_t m = 1; m <= 1; m++) {
39547         GemmMicrokernelTester()
39548           .mr(1)
39549           .nr(2)
39550           .kr(1)
39551           .sr(1)
39552           .m(m)
39553           .n(n)
39554           .k(k)
39555           .ks(3)
39556           .iterations(1)
39557           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39558       }
39559     }
39560   }
39561 }
39562 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_small_kernel)39563 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
39564   for (uint32_t n = 3; n < 4; n++) {
39565     for (size_t k = 1; k <= 5; k += 2) {
39566       GemmMicrokernelTester()
39567         .mr(1)
39568         .nr(2)
39569         .kr(1)
39570         .sr(1)
39571         .m(1)
39572         .n(n)
39573         .k(k)
39574         .ks(3)
39575         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39576     }
39577   }
39578 }
39579 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_small_kernel)39580 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
39581   for (uint32_t n = 4; n <= 6; n += 2) {
39582     for (size_t k = 1; k <= 5; k += 2) {
39583       GemmMicrokernelTester()
39584         .mr(1)
39585         .nr(2)
39586         .kr(1)
39587         .sr(1)
39588         .m(1)
39589         .n(n)
39590         .k(k)
39591         .ks(3)
39592         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39593     }
39594   }
39595 }
39596 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)39597 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
39598   for (size_t k = 1; k <= 5; k += 2) {
39599     for (uint32_t n = 1; n <= 2; n++) {
39600       for (uint32_t m = 1; m <= 1; m++) {
39601         GemmMicrokernelTester()
39602           .mr(1)
39603           .nr(2)
39604           .kr(1)
39605           .sr(1)
39606           .m(m)
39607           .n(n)
39608           .k(k)
39609           .cm_stride(5)
39610           .iterations(1)
39611           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39612       }
39613     }
39614   }
39615 }
39616 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,a_offset)39617 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
39618   for (size_t k = 1; k <= 5; k += 2) {
39619     GemmMicrokernelTester()
39620       .mr(1)
39621       .nr(2)
39622       .kr(1)
39623       .sr(1)
39624       .m(1)
39625       .n(2)
39626       .k(k)
39627       .ks(3)
39628       .a_offset(7)
39629       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39630   }
39631 }
39632 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,zero)39633 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
39634   for (size_t k = 1; k <= 5; k += 2) {
39635     for (uint32_t mz = 0; mz < 1; mz++) {
39636       GemmMicrokernelTester()
39637         .mr(1)
39638         .nr(2)
39639         .kr(1)
39640         .sr(1)
39641         .m(1)
39642         .n(2)
39643         .k(k)
39644         .ks(3)
39645         .a_offset(7)
39646         .zero_index(mz)
39647         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39648     }
39649   }
39650 }
39651 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)39652 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
39653   GemmMicrokernelTester()
39654     .mr(1)
39655     .nr(2)
39656     .kr(1)
39657     .sr(1)
39658     .m(1)
39659     .n(2)
39660     .k(1)
39661     .qmin(128)
39662     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39663 }
39664 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)39665 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
39666   GemmMicrokernelTester()
39667     .mr(1)
39668     .nr(2)
39669     .kr(1)
39670     .sr(1)
39671     .m(1)
39672     .n(2)
39673     .k(1)
39674     .qmax(128)
39675     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39676 }
39677 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)39678 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
39679   GemmMicrokernelTester()
39680     .mr(1)
39681     .nr(2)
39682     .kr(1)
39683     .sr(1)
39684     .m(1)
39685     .n(2)
39686     .k(1)
39687     .cm_stride(5)
39688     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39689 }
39690 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_a_zero_point)39691 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
39692   for (size_t k = 1; k <= 5; k += 2) {
39693     GemmMicrokernelTester()
39694       .mr(1)
39695       .nr(2)
39696       .kr(1)
39697       .sr(1)
39698       .m(1)
39699       .n(2)
39700       .k(k)
39701       .a_zero_point(0)
39702       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39703   }
39704 }
39705 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_b_zero_point)39706 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
39707   for (size_t k = 1; k <= 5; k += 2) {
39708     GemmMicrokernelTester()
39709       .mr(1)
39710       .nr(2)
39711       .kr(1)
39712       .sr(1)
39713       .m(1)
39714       .n(2)
39715       .k(k)
39716       .b_zero_point(0)
39717       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39718   }
39719 }
39720 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,no_zero_point)39721 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
39722   for (size_t k = 1; k <= 5; k += 2) {
39723     GemmMicrokernelTester()
39724       .mr(1)
39725       .nr(2)
39726       .kr(1)
39727       .sr(1)
39728       .m(1)
39729       .n(2)
39730       .k(k)
39731       .a_zero_point(0)
39732       .b_zero_point(0)
39733       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
39734   }
39735 }
39736 
39737 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)39738 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
39739   GemmMicrokernelTester()
39740     .mr(1)
39741     .nr(4)
39742     .kr(1)
39743     .sr(1)
39744     .m(1)
39745     .n(4)
39746     .k(1)
39747     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39748 }
39749 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)39750 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
39751   GemmMicrokernelTester()
39752     .mr(1)
39753     .nr(4)
39754     .kr(1)
39755     .sr(1)
39756     .m(1)
39757     .n(4)
39758     .k(1)
39759     .cn_stride(7)
39760     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39761 }
39762 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)39763 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
39764   for (uint32_t n = 1; n <= 4; n++) {
39765     for (uint32_t m = 1; m <= 1; m++) {
39766       GemmMicrokernelTester()
39767         .mr(1)
39768         .nr(4)
39769         .kr(1)
39770         .sr(1)
39771         .m(m)
39772         .n(n)
39773         .k(1)
39774         .iterations(1)
39775         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39776     }
39777   }
39778 }
39779 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)39780 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
39781   for (uint32_t m = 1; m <= 1; m++) {
39782     GemmMicrokernelTester()
39783       .mr(1)
39784       .nr(4)
39785       .kr(1)
39786       .sr(1)
39787       .m(m)
39788       .n(4)
39789       .k(1)
39790       .iterations(1)
39791       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39792   }
39793 }
39794 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)39795 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
39796   for (uint32_t n = 1; n <= 4; n++) {
39797     GemmMicrokernelTester()
39798       .mr(1)
39799       .nr(4)
39800       .kr(1)
39801       .sr(1)
39802       .m(1)
39803       .n(n)
39804       .k(1)
39805       .iterations(1)
39806       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39807   }
39808 }
39809 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)39810 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
39811   for (size_t k = 2; k < 10; k++) {
39812     GemmMicrokernelTester()
39813       .mr(1)
39814       .nr(4)
39815       .kr(1)
39816       .sr(1)
39817       .m(1)
39818       .n(4)
39819       .k(k)
39820       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39821   }
39822 }
39823 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)39824 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
39825   for (size_t k = 2; k < 10; k++) {
39826     for (uint32_t n = 1; n <= 4; n++) {
39827       for (uint32_t m = 1; m <= 1; m++) {
39828         GemmMicrokernelTester()
39829           .mr(1)
39830           .nr(4)
39831           .kr(1)
39832           .sr(1)
39833           .m(m)
39834           .n(n)
39835           .k(k)
39836           .iterations(1)
39837           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39838       }
39839     }
39840   }
39841 }
39842 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)39843 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
39844   for (uint32_t n = 5; n < 8; n++) {
39845     for (size_t k = 1; k <= 5; k += 2) {
39846       GemmMicrokernelTester()
39847         .mr(1)
39848         .nr(4)
39849         .kr(1)
39850         .sr(1)
39851         .m(1)
39852         .n(n)
39853         .k(k)
39854         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39855     }
39856   }
39857 }
39858 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)39859 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
39860   for (uint32_t n = 5; n < 8; n++) {
39861     for (size_t k = 1; k <= 5; k += 2) {
39862       GemmMicrokernelTester()
39863         .mr(1)
39864         .nr(4)
39865         .kr(1)
39866         .sr(1)
39867         .m(1)
39868         .n(n)
39869         .k(k)
39870         .cn_stride(7)
39871         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39872     }
39873   }
39874 }
39875 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)39876 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
39877   for (uint32_t n = 5; n < 8; n++) {
39878     for (size_t k = 1; k <= 5; k += 2) {
39879       for (uint32_t m = 1; m <= 1; m++) {
39880         GemmMicrokernelTester()
39881           .mr(1)
39882           .nr(4)
39883           .kr(1)
39884           .sr(1)
39885           .m(m)
39886           .n(n)
39887           .k(k)
39888           .iterations(1)
39889           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39890       }
39891     }
39892   }
39893 }
39894 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)39895 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
39896   for (uint32_t n = 8; n <= 12; n += 4) {
39897     for (size_t k = 1; k <= 5; k += 2) {
39898       GemmMicrokernelTester()
39899         .mr(1)
39900         .nr(4)
39901         .kr(1)
39902         .sr(1)
39903         .m(1)
39904         .n(n)
39905         .k(k)
39906         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39907     }
39908   }
39909 }
39910 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)39911 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
39912   for (uint32_t n = 8; n <= 12; n += 4) {
39913     for (size_t k = 1; k <= 5; k += 2) {
39914       GemmMicrokernelTester()
39915         .mr(1)
39916         .nr(4)
39917         .kr(1)
39918         .sr(1)
39919         .m(1)
39920         .n(n)
39921         .k(k)
39922         .cn_stride(7)
39923         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39924     }
39925   }
39926 }
39927 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)39928 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
39929   for (uint32_t n = 8; n <= 12; n += 4) {
39930     for (size_t k = 1; k <= 5; k += 2) {
39931       for (uint32_t m = 1; m <= 1; m++) {
39932         GemmMicrokernelTester()
39933           .mr(1)
39934           .nr(4)
39935           .kr(1)
39936           .sr(1)
39937           .m(m)
39938           .n(n)
39939           .k(k)
39940           .iterations(1)
39941           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39942       }
39943     }
39944   }
39945 }
39946 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel)39947 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel) {
39948   for (size_t k = 1; k <= 5; k += 2) {
39949     GemmMicrokernelTester()
39950       .mr(1)
39951       .nr(4)
39952       .kr(1)
39953       .sr(1)
39954       .m(1)
39955       .n(4)
39956       .k(k)
39957       .ks(3)
39958       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39959   }
39960 }
39961 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel_subtile)39962 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel_subtile) {
39963   for (size_t k = 1; k <= 5; k += 2) {
39964     for (uint32_t n = 1; n <= 4; n++) {
39965       for (uint32_t m = 1; m <= 1; m++) {
39966         GemmMicrokernelTester()
39967           .mr(1)
39968           .nr(4)
39969           .kr(1)
39970           .sr(1)
39971           .m(m)
39972           .n(n)
39973           .k(k)
39974           .ks(3)
39975           .iterations(1)
39976           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39977       }
39978     }
39979   }
39980 }
39981 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_small_kernel)39982 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
39983   for (uint32_t n = 5; n < 8; n++) {
39984     for (size_t k = 1; k <= 5; k += 2) {
39985       GemmMicrokernelTester()
39986         .mr(1)
39987         .nr(4)
39988         .kr(1)
39989         .sr(1)
39990         .m(1)
39991         .n(n)
39992         .k(k)
39993         .ks(3)
39994         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
39995     }
39996   }
39997 }
39998 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_small_kernel)39999 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
40000   for (uint32_t n = 8; n <= 12; n += 4) {
40001     for (size_t k = 1; k <= 5; k += 2) {
40002       GemmMicrokernelTester()
40003         .mr(1)
40004         .nr(4)
40005         .kr(1)
40006         .sr(1)
40007         .m(1)
40008         .n(n)
40009         .k(k)
40010         .ks(3)
40011         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40012     }
40013   }
40014 }
40015 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)40016 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
40017   for (size_t k = 1; k <= 5; k += 2) {
40018     for (uint32_t n = 1; n <= 4; n++) {
40019       for (uint32_t m = 1; m <= 1; m++) {
40020         GemmMicrokernelTester()
40021           .mr(1)
40022           .nr(4)
40023           .kr(1)
40024           .sr(1)
40025           .m(m)
40026           .n(n)
40027           .k(k)
40028           .cm_stride(7)
40029           .iterations(1)
40030           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40031       }
40032     }
40033   }
40034 }
40035 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,a_offset)40036 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, a_offset) {
40037   for (size_t k = 1; k <= 5; k += 2) {
40038     GemmMicrokernelTester()
40039       .mr(1)
40040       .nr(4)
40041       .kr(1)
40042       .sr(1)
40043       .m(1)
40044       .n(4)
40045       .k(k)
40046       .ks(3)
40047       .a_offset(7)
40048       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40049   }
40050 }
40051 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,zero)40052 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, zero) {
40053   for (size_t k = 1; k <= 5; k += 2) {
40054     for (uint32_t mz = 0; mz < 1; mz++) {
40055       GemmMicrokernelTester()
40056         .mr(1)
40057         .nr(4)
40058         .kr(1)
40059         .sr(1)
40060         .m(1)
40061         .n(4)
40062         .k(k)
40063         .ks(3)
40064         .a_offset(7)
40065         .zero_index(mz)
40066         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40067     }
40068   }
40069 }
40070 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)40071 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
40072   GemmMicrokernelTester()
40073     .mr(1)
40074     .nr(4)
40075     .kr(1)
40076     .sr(1)
40077     .m(1)
40078     .n(4)
40079     .k(1)
40080     .qmin(128)
40081     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40082 }
40083 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)40084 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
40085   GemmMicrokernelTester()
40086     .mr(1)
40087     .nr(4)
40088     .kr(1)
40089     .sr(1)
40090     .m(1)
40091     .n(4)
40092     .k(1)
40093     .qmax(128)
40094     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40095 }
40096 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)40097 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
40098   GemmMicrokernelTester()
40099     .mr(1)
40100     .nr(4)
40101     .kr(1)
40102     .sr(1)
40103     .m(1)
40104     .n(4)
40105     .k(1)
40106     .cm_stride(7)
40107     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40108 }
40109 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_a_zero_point)40110 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_a_zero_point) {
40111   for (size_t k = 1; k <= 5; k += 2) {
40112     GemmMicrokernelTester()
40113       .mr(1)
40114       .nr(4)
40115       .kr(1)
40116       .sr(1)
40117       .m(1)
40118       .n(4)
40119       .k(k)
40120       .a_zero_point(0)
40121       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40122   }
40123 }
40124 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_b_zero_point)40125 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_b_zero_point) {
40126   for (size_t k = 1; k <= 5; k += 2) {
40127     GemmMicrokernelTester()
40128       .mr(1)
40129       .nr(4)
40130       .kr(1)
40131       .sr(1)
40132       .m(1)
40133       .n(4)
40134       .k(k)
40135       .b_zero_point(0)
40136       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40137   }
40138 }
40139 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,no_zero_point)40140 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_zero_point) {
40141   for (size_t k = 1; k <= 5; k += 2) {
40142     GemmMicrokernelTester()
40143       .mr(1)
40144       .nr(4)
40145       .kr(1)
40146       .sr(1)
40147       .m(1)
40148       .n(4)
40149       .k(k)
40150       .a_zero_point(0)
40151       .b_zero_point(0)
40152       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40153   }
40154 }
40155 
40156 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)40157 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
40158   GemmMicrokernelTester()
40159     .mr(1)
40160     .nr(4)
40161     .kr(1)
40162     .sr(1)
40163     .m(1)
40164     .n(4)
40165     .k(1)
40166     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40167 }
40168 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)40169 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
40170   GemmMicrokernelTester()
40171     .mr(1)
40172     .nr(4)
40173     .kr(1)
40174     .sr(1)
40175     .m(1)
40176     .n(4)
40177     .k(1)
40178     .cn_stride(7)
40179     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40180 }
40181 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)40182 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
40183   for (uint32_t n = 1; n <= 4; n++) {
40184     for (uint32_t m = 1; m <= 1; m++) {
40185       GemmMicrokernelTester()
40186         .mr(1)
40187         .nr(4)
40188         .kr(1)
40189         .sr(1)
40190         .m(m)
40191         .n(n)
40192         .k(1)
40193         .iterations(1)
40194         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40195     }
40196   }
40197 }
40198 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)40199 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
40200   for (uint32_t m = 1; m <= 1; m++) {
40201     GemmMicrokernelTester()
40202       .mr(1)
40203       .nr(4)
40204       .kr(1)
40205       .sr(1)
40206       .m(m)
40207       .n(4)
40208       .k(1)
40209       .iterations(1)
40210       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40211   }
40212 }
40213 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)40214 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
40215   for (uint32_t n = 1; n <= 4; n++) {
40216     GemmMicrokernelTester()
40217       .mr(1)
40218       .nr(4)
40219       .kr(1)
40220       .sr(1)
40221       .m(1)
40222       .n(n)
40223       .k(1)
40224       .iterations(1)
40225       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40226   }
40227 }
40228 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)40229 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
40230   for (size_t k = 2; k < 10; k++) {
40231     GemmMicrokernelTester()
40232       .mr(1)
40233       .nr(4)
40234       .kr(1)
40235       .sr(1)
40236       .m(1)
40237       .n(4)
40238       .k(k)
40239       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40240   }
40241 }
40242 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)40243 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
40244   for (size_t k = 2; k < 10; k++) {
40245     for (uint32_t n = 1; n <= 4; n++) {
40246       for (uint32_t m = 1; m <= 1; m++) {
40247         GemmMicrokernelTester()
40248           .mr(1)
40249           .nr(4)
40250           .kr(1)
40251           .sr(1)
40252           .m(m)
40253           .n(n)
40254           .k(k)
40255           .iterations(1)
40256           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40257       }
40258     }
40259   }
40260 }
40261 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)40262 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
40263   for (uint32_t n = 5; n < 8; n++) {
40264     for (size_t k = 1; k <= 5; k += 2) {
40265       GemmMicrokernelTester()
40266         .mr(1)
40267         .nr(4)
40268         .kr(1)
40269         .sr(1)
40270         .m(1)
40271         .n(n)
40272         .k(k)
40273         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40274     }
40275   }
40276 }
40277 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)40278 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
40279   for (uint32_t n = 5; n < 8; n++) {
40280     for (size_t k = 1; k <= 5; k += 2) {
40281       GemmMicrokernelTester()
40282         .mr(1)
40283         .nr(4)
40284         .kr(1)
40285         .sr(1)
40286         .m(1)
40287         .n(n)
40288         .k(k)
40289         .cn_stride(7)
40290         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40291     }
40292   }
40293 }
40294 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)40295 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
40296   for (uint32_t n = 5; n < 8; n++) {
40297     for (size_t k = 1; k <= 5; k += 2) {
40298       for (uint32_t m = 1; m <= 1; m++) {
40299         GemmMicrokernelTester()
40300           .mr(1)
40301           .nr(4)
40302           .kr(1)
40303           .sr(1)
40304           .m(m)
40305           .n(n)
40306           .k(k)
40307           .iterations(1)
40308           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40309       }
40310     }
40311   }
40312 }
40313 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)40314 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
40315   for (uint32_t n = 8; n <= 12; n += 4) {
40316     for (size_t k = 1; k <= 5; k += 2) {
40317       GemmMicrokernelTester()
40318         .mr(1)
40319         .nr(4)
40320         .kr(1)
40321         .sr(1)
40322         .m(1)
40323         .n(n)
40324         .k(k)
40325         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40326     }
40327   }
40328 }
40329 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)40330 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
40331   for (uint32_t n = 8; n <= 12; n += 4) {
40332     for (size_t k = 1; k <= 5; k += 2) {
40333       GemmMicrokernelTester()
40334         .mr(1)
40335         .nr(4)
40336         .kr(1)
40337         .sr(1)
40338         .m(1)
40339         .n(n)
40340         .k(k)
40341         .cn_stride(7)
40342         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40343     }
40344   }
40345 }
40346 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)40347 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
40348   for (uint32_t n = 8; n <= 12; n += 4) {
40349     for (size_t k = 1; k <= 5; k += 2) {
40350       for (uint32_t m = 1; m <= 1; m++) {
40351         GemmMicrokernelTester()
40352           .mr(1)
40353           .nr(4)
40354           .kr(1)
40355           .sr(1)
40356           .m(m)
40357           .n(n)
40358           .k(k)
40359           .iterations(1)
40360           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40361       }
40362     }
40363   }
40364 }
40365 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel)40366 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
40367   for (size_t k = 1; k <= 5; k += 2) {
40368     GemmMicrokernelTester()
40369       .mr(1)
40370       .nr(4)
40371       .kr(1)
40372       .sr(1)
40373       .m(1)
40374       .n(4)
40375       .k(k)
40376       .ks(3)
40377       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40378   }
40379 }
40380 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel_subtile)40381 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
40382   for (size_t k = 1; k <= 5; k += 2) {
40383     for (uint32_t n = 1; n <= 4; n++) {
40384       for (uint32_t m = 1; m <= 1; m++) {
40385         GemmMicrokernelTester()
40386           .mr(1)
40387           .nr(4)
40388           .kr(1)
40389           .sr(1)
40390           .m(m)
40391           .n(n)
40392           .k(k)
40393           .ks(3)
40394           .iterations(1)
40395           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40396       }
40397     }
40398   }
40399 }
40400 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_small_kernel)40401 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
40402   for (uint32_t n = 5; n < 8; n++) {
40403     for (size_t k = 1; k <= 5; k += 2) {
40404       GemmMicrokernelTester()
40405         .mr(1)
40406         .nr(4)
40407         .kr(1)
40408         .sr(1)
40409         .m(1)
40410         .n(n)
40411         .k(k)
40412         .ks(3)
40413         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40414     }
40415   }
40416 }
40417 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_small_kernel)40418 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
40419   for (uint32_t n = 8; n <= 12; n += 4) {
40420     for (size_t k = 1; k <= 5; k += 2) {
40421       GemmMicrokernelTester()
40422         .mr(1)
40423         .nr(4)
40424         .kr(1)
40425         .sr(1)
40426         .m(1)
40427         .n(n)
40428         .k(k)
40429         .ks(3)
40430         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40431     }
40432   }
40433 }
40434 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)40435 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
40436   for (size_t k = 1; k <= 5; k += 2) {
40437     for (uint32_t n = 1; n <= 4; n++) {
40438       for (uint32_t m = 1; m <= 1; m++) {
40439         GemmMicrokernelTester()
40440           .mr(1)
40441           .nr(4)
40442           .kr(1)
40443           .sr(1)
40444           .m(m)
40445           .n(n)
40446           .k(k)
40447           .cm_stride(7)
40448           .iterations(1)
40449           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40450       }
40451     }
40452   }
40453 }
40454 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,a_offset)40455 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
40456   for (size_t k = 1; k <= 5; k += 2) {
40457     GemmMicrokernelTester()
40458       .mr(1)
40459       .nr(4)
40460       .kr(1)
40461       .sr(1)
40462       .m(1)
40463       .n(4)
40464       .k(k)
40465       .ks(3)
40466       .a_offset(7)
40467       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40468   }
40469 }
40470 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,zero)40471 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
40472   for (size_t k = 1; k <= 5; k += 2) {
40473     for (uint32_t mz = 0; mz < 1; mz++) {
40474       GemmMicrokernelTester()
40475         .mr(1)
40476         .nr(4)
40477         .kr(1)
40478         .sr(1)
40479         .m(1)
40480         .n(4)
40481         .k(k)
40482         .ks(3)
40483         .a_offset(7)
40484         .zero_index(mz)
40485         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40486     }
40487   }
40488 }
40489 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)40490 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
40491   GemmMicrokernelTester()
40492     .mr(1)
40493     .nr(4)
40494     .kr(1)
40495     .sr(1)
40496     .m(1)
40497     .n(4)
40498     .k(1)
40499     .qmin(128)
40500     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40501 }
40502 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)40503 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
40504   GemmMicrokernelTester()
40505     .mr(1)
40506     .nr(4)
40507     .kr(1)
40508     .sr(1)
40509     .m(1)
40510     .n(4)
40511     .k(1)
40512     .qmax(128)
40513     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40514 }
40515 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)40516 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
40517   GemmMicrokernelTester()
40518     .mr(1)
40519     .nr(4)
40520     .kr(1)
40521     .sr(1)
40522     .m(1)
40523     .n(4)
40524     .k(1)
40525     .cm_stride(7)
40526     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40527 }
40528 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_a_zero_point)40529 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
40530   for (size_t k = 1; k <= 5; k += 2) {
40531     GemmMicrokernelTester()
40532       .mr(1)
40533       .nr(4)
40534       .kr(1)
40535       .sr(1)
40536       .m(1)
40537       .n(4)
40538       .k(k)
40539       .a_zero_point(0)
40540       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40541   }
40542 }
40543 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_b_zero_point)40544 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
40545   for (size_t k = 1; k <= 5; k += 2) {
40546     GemmMicrokernelTester()
40547       .mr(1)
40548       .nr(4)
40549       .kr(1)
40550       .sr(1)
40551       .m(1)
40552       .n(4)
40553       .k(k)
40554       .b_zero_point(0)
40555       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40556   }
40557 }
40558 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,no_zero_point)40559 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
40560   for (size_t k = 1; k <= 5; k += 2) {
40561     GemmMicrokernelTester()
40562       .mr(1)
40563       .nr(4)
40564       .kr(1)
40565       .sr(1)
40566       .m(1)
40567       .n(4)
40568       .k(k)
40569       .a_zero_point(0)
40570       .b_zero_point(0)
40571       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
40572   }
40573 }
40574 
40575 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)40576 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
40577   GemmMicrokernelTester()
40578     .mr(2)
40579     .nr(2)
40580     .kr(1)
40581     .sr(1)
40582     .m(2)
40583     .n(2)
40584     .k(1)
40585     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40586 }
40587 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)40588 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
40589   GemmMicrokernelTester()
40590     .mr(2)
40591     .nr(2)
40592     .kr(1)
40593     .sr(1)
40594     .m(2)
40595     .n(2)
40596     .k(1)
40597     .cn_stride(5)
40598     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40599 }
40600 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)40601 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
40602   for (uint32_t n = 1; n <= 2; n++) {
40603     for (uint32_t m = 1; m <= 2; m++) {
40604       GemmMicrokernelTester()
40605         .mr(2)
40606         .nr(2)
40607         .kr(1)
40608         .sr(1)
40609         .m(m)
40610         .n(n)
40611         .k(1)
40612         .iterations(1)
40613         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40614     }
40615   }
40616 }
40617 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)40618 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
40619   for (uint32_t m = 1; m <= 2; m++) {
40620     GemmMicrokernelTester()
40621       .mr(2)
40622       .nr(2)
40623       .kr(1)
40624       .sr(1)
40625       .m(m)
40626       .n(2)
40627       .k(1)
40628       .iterations(1)
40629       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40630   }
40631 }
40632 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)40633 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
40634   for (uint32_t n = 1; n <= 2; n++) {
40635     GemmMicrokernelTester()
40636       .mr(2)
40637       .nr(2)
40638       .kr(1)
40639       .sr(1)
40640       .m(2)
40641       .n(n)
40642       .k(1)
40643       .iterations(1)
40644       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40645   }
40646 }
40647 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)40648 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
40649   for (size_t k = 2; k < 10; k++) {
40650     GemmMicrokernelTester()
40651       .mr(2)
40652       .nr(2)
40653       .kr(1)
40654       .sr(1)
40655       .m(2)
40656       .n(2)
40657       .k(k)
40658       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40659   }
40660 }
40661 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)40662 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
40663   for (size_t k = 2; k < 10; k++) {
40664     for (uint32_t n = 1; n <= 2; n++) {
40665       for (uint32_t m = 1; m <= 2; m++) {
40666         GemmMicrokernelTester()
40667           .mr(2)
40668           .nr(2)
40669           .kr(1)
40670           .sr(1)
40671           .m(m)
40672           .n(n)
40673           .k(k)
40674           .iterations(1)
40675           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40676       }
40677     }
40678   }
40679 }
40680 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)40681 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
40682   for (uint32_t n = 3; n < 4; n++) {
40683     for (size_t k = 1; k <= 5; k += 2) {
40684       GemmMicrokernelTester()
40685         .mr(2)
40686         .nr(2)
40687         .kr(1)
40688         .sr(1)
40689         .m(2)
40690         .n(n)
40691         .k(k)
40692         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40693     }
40694   }
40695 }
40696 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)40697 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
40698   for (uint32_t n = 3; n < 4; n++) {
40699     for (size_t k = 1; k <= 5; k += 2) {
40700       GemmMicrokernelTester()
40701         .mr(2)
40702         .nr(2)
40703         .kr(1)
40704         .sr(1)
40705         .m(2)
40706         .n(n)
40707         .k(k)
40708         .cn_stride(5)
40709         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40710     }
40711   }
40712 }
40713 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)40714 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
40715   for (uint32_t n = 3; n < 4; n++) {
40716     for (size_t k = 1; k <= 5; k += 2) {
40717       for (uint32_t m = 1; m <= 2; m++) {
40718         GemmMicrokernelTester()
40719           .mr(2)
40720           .nr(2)
40721           .kr(1)
40722           .sr(1)
40723           .m(m)
40724           .n(n)
40725           .k(k)
40726           .iterations(1)
40727           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40728       }
40729     }
40730   }
40731 }
40732 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)40733 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
40734   for (uint32_t n = 4; n <= 6; n += 2) {
40735     for (size_t k = 1; k <= 5; k += 2) {
40736       GemmMicrokernelTester()
40737         .mr(2)
40738         .nr(2)
40739         .kr(1)
40740         .sr(1)
40741         .m(2)
40742         .n(n)
40743         .k(k)
40744         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40745     }
40746   }
40747 }
40748 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)40749 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
40750   for (uint32_t n = 4; n <= 6; n += 2) {
40751     for (size_t k = 1; k <= 5; k += 2) {
40752       GemmMicrokernelTester()
40753         .mr(2)
40754         .nr(2)
40755         .kr(1)
40756         .sr(1)
40757         .m(2)
40758         .n(n)
40759         .k(k)
40760         .cn_stride(5)
40761         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40762     }
40763   }
40764 }
40765 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)40766 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
40767   for (uint32_t n = 4; n <= 6; n += 2) {
40768     for (size_t k = 1; k <= 5; k += 2) {
40769       for (uint32_t m = 1; m <= 2; m++) {
40770         GemmMicrokernelTester()
40771           .mr(2)
40772           .nr(2)
40773           .kr(1)
40774           .sr(1)
40775           .m(m)
40776           .n(n)
40777           .k(k)
40778           .iterations(1)
40779           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40780       }
40781     }
40782   }
40783 }
40784 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel)40785 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
40786   for (size_t k = 1; k <= 5; k += 2) {
40787     GemmMicrokernelTester()
40788       .mr(2)
40789       .nr(2)
40790       .kr(1)
40791       .sr(1)
40792       .m(2)
40793       .n(2)
40794       .k(k)
40795       .ks(3)
40796       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40797   }
40798 }
40799 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel_subtile)40800 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
40801   for (size_t k = 1; k <= 5; k += 2) {
40802     for (uint32_t n = 1; n <= 2; n++) {
40803       for (uint32_t m = 1; m <= 2; m++) {
40804         GemmMicrokernelTester()
40805           .mr(2)
40806           .nr(2)
40807           .kr(1)
40808           .sr(1)
40809           .m(m)
40810           .n(n)
40811           .k(k)
40812           .ks(3)
40813           .iterations(1)
40814           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40815       }
40816     }
40817   }
40818 }
40819 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_small_kernel)40820 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
40821   for (uint32_t n = 3; n < 4; n++) {
40822     for (size_t k = 1; k <= 5; k += 2) {
40823       GemmMicrokernelTester()
40824         .mr(2)
40825         .nr(2)
40826         .kr(1)
40827         .sr(1)
40828         .m(2)
40829         .n(n)
40830         .k(k)
40831         .ks(3)
40832         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40833     }
40834   }
40835 }
40836 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_small_kernel)40837 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
40838   for (uint32_t n = 4; n <= 6; n += 2) {
40839     for (size_t k = 1; k <= 5; k += 2) {
40840       GemmMicrokernelTester()
40841         .mr(2)
40842         .nr(2)
40843         .kr(1)
40844         .sr(1)
40845         .m(2)
40846         .n(n)
40847         .k(k)
40848         .ks(3)
40849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40850     }
40851   }
40852 }
40853 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)40854 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
40855   for (size_t k = 1; k <= 5; k += 2) {
40856     for (uint32_t n = 1; n <= 2; n++) {
40857       for (uint32_t m = 1; m <= 2; m++) {
40858         GemmMicrokernelTester()
40859           .mr(2)
40860           .nr(2)
40861           .kr(1)
40862           .sr(1)
40863           .m(m)
40864           .n(n)
40865           .k(k)
40866           .cm_stride(5)
40867           .iterations(1)
40868           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40869       }
40870     }
40871   }
40872 }
40873 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,a_offset)40874 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
40875   for (size_t k = 1; k <= 5; k += 2) {
40876     GemmMicrokernelTester()
40877       .mr(2)
40878       .nr(2)
40879       .kr(1)
40880       .sr(1)
40881       .m(2)
40882       .n(2)
40883       .k(k)
40884       .ks(3)
40885       .a_offset(13)
40886       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40887   }
40888 }
40889 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,zero)40890 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
40891   for (size_t k = 1; k <= 5; k += 2) {
40892     for (uint32_t mz = 0; mz < 2; mz++) {
40893       GemmMicrokernelTester()
40894         .mr(2)
40895         .nr(2)
40896         .kr(1)
40897         .sr(1)
40898         .m(2)
40899         .n(2)
40900         .k(k)
40901         .ks(3)
40902         .a_offset(13)
40903         .zero_index(mz)
40904         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40905     }
40906   }
40907 }
40908 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)40909 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
40910   GemmMicrokernelTester()
40911     .mr(2)
40912     .nr(2)
40913     .kr(1)
40914     .sr(1)
40915     .m(2)
40916     .n(2)
40917     .k(1)
40918     .qmin(128)
40919     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40920 }
40921 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)40922 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
40923   GemmMicrokernelTester()
40924     .mr(2)
40925     .nr(2)
40926     .kr(1)
40927     .sr(1)
40928     .m(2)
40929     .n(2)
40930     .k(1)
40931     .qmax(128)
40932     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40933 }
40934 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)40935 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
40936   GemmMicrokernelTester()
40937     .mr(2)
40938     .nr(2)
40939     .kr(1)
40940     .sr(1)
40941     .m(2)
40942     .n(2)
40943     .k(1)
40944     .cm_stride(5)
40945     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40946 }
40947 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_a_zero_point)40948 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_a_zero_point) {
40949   for (size_t k = 1; k <= 5; k += 2) {
40950     GemmMicrokernelTester()
40951       .mr(2)
40952       .nr(2)
40953       .kr(1)
40954       .sr(1)
40955       .m(2)
40956       .n(2)
40957       .k(k)
40958       .a_zero_point(0)
40959       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40960   }
40961 }
40962 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_b_zero_point)40963 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_b_zero_point) {
40964   for (size_t k = 1; k <= 5; k += 2) {
40965     GemmMicrokernelTester()
40966       .mr(2)
40967       .nr(2)
40968       .kr(1)
40969       .sr(1)
40970       .m(2)
40971       .n(2)
40972       .k(k)
40973       .b_zero_point(0)
40974       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40975   }
40976 }
40977 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,no_zero_point)40978 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_zero_point) {
40979   for (size_t k = 1; k <= 5; k += 2) {
40980     GemmMicrokernelTester()
40981       .mr(2)
40982       .nr(2)
40983       .kr(1)
40984       .sr(1)
40985       .m(2)
40986       .n(2)
40987       .k(k)
40988       .a_zero_point(0)
40989       .b_zero_point(0)
40990       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
40991   }
40992 }
40993 
40994 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)40995 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
40996   GemmMicrokernelTester()
40997     .mr(2)
40998     .nr(2)
40999     .kr(1)
41000     .sr(1)
41001     .m(2)
41002     .n(2)
41003     .k(1)
41004     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41005 }
41006 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)41007 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
41008   GemmMicrokernelTester()
41009     .mr(2)
41010     .nr(2)
41011     .kr(1)
41012     .sr(1)
41013     .m(2)
41014     .n(2)
41015     .k(1)
41016     .cn_stride(5)
41017     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41018 }
41019 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)41020 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
41021   for (uint32_t n = 1; n <= 2; n++) {
41022     for (uint32_t m = 1; m <= 2; m++) {
41023       GemmMicrokernelTester()
41024         .mr(2)
41025         .nr(2)
41026         .kr(1)
41027         .sr(1)
41028         .m(m)
41029         .n(n)
41030         .k(1)
41031         .iterations(1)
41032         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41033     }
41034   }
41035 }
41036 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)41037 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
41038   for (uint32_t m = 1; m <= 2; m++) {
41039     GemmMicrokernelTester()
41040       .mr(2)
41041       .nr(2)
41042       .kr(1)
41043       .sr(1)
41044       .m(m)
41045       .n(2)
41046       .k(1)
41047       .iterations(1)
41048       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41049   }
41050 }
41051 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)41052 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
41053   for (uint32_t n = 1; n <= 2; n++) {
41054     GemmMicrokernelTester()
41055       .mr(2)
41056       .nr(2)
41057       .kr(1)
41058       .sr(1)
41059       .m(2)
41060       .n(n)
41061       .k(1)
41062       .iterations(1)
41063       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41064   }
41065 }
41066 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)41067 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
41068   for (size_t k = 2; k < 10; k++) {
41069     GemmMicrokernelTester()
41070       .mr(2)
41071       .nr(2)
41072       .kr(1)
41073       .sr(1)
41074       .m(2)
41075       .n(2)
41076       .k(k)
41077       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41078   }
41079 }
41080 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)41081 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
41082   for (size_t k = 2; k < 10; k++) {
41083     for (uint32_t n = 1; n <= 2; n++) {
41084       for (uint32_t m = 1; m <= 2; m++) {
41085         GemmMicrokernelTester()
41086           .mr(2)
41087           .nr(2)
41088           .kr(1)
41089           .sr(1)
41090           .m(m)
41091           .n(n)
41092           .k(k)
41093           .iterations(1)
41094           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41095       }
41096     }
41097   }
41098 }
41099 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)41100 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
41101   for (uint32_t n = 3; n < 4; n++) {
41102     for (size_t k = 1; k <= 5; k += 2) {
41103       GemmMicrokernelTester()
41104         .mr(2)
41105         .nr(2)
41106         .kr(1)
41107         .sr(1)
41108         .m(2)
41109         .n(n)
41110         .k(k)
41111         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41112     }
41113   }
41114 }
41115 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)41116 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
41117   for (uint32_t n = 3; n < 4; n++) {
41118     for (size_t k = 1; k <= 5; k += 2) {
41119       GemmMicrokernelTester()
41120         .mr(2)
41121         .nr(2)
41122         .kr(1)
41123         .sr(1)
41124         .m(2)
41125         .n(n)
41126         .k(k)
41127         .cn_stride(5)
41128         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41129     }
41130   }
41131 }
41132 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)41133 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
41134   for (uint32_t n = 3; n < 4; n++) {
41135     for (size_t k = 1; k <= 5; k += 2) {
41136       for (uint32_t m = 1; m <= 2; m++) {
41137         GemmMicrokernelTester()
41138           .mr(2)
41139           .nr(2)
41140           .kr(1)
41141           .sr(1)
41142           .m(m)
41143           .n(n)
41144           .k(k)
41145           .iterations(1)
41146           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41147       }
41148     }
41149   }
41150 }
41151 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)41152 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
41153   for (uint32_t n = 4; n <= 6; n += 2) {
41154     for (size_t k = 1; k <= 5; k += 2) {
41155       GemmMicrokernelTester()
41156         .mr(2)
41157         .nr(2)
41158         .kr(1)
41159         .sr(1)
41160         .m(2)
41161         .n(n)
41162         .k(k)
41163         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41164     }
41165   }
41166 }
41167 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)41168 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
41169   for (uint32_t n = 4; n <= 6; n += 2) {
41170     for (size_t k = 1; k <= 5; k += 2) {
41171       GemmMicrokernelTester()
41172         .mr(2)
41173         .nr(2)
41174         .kr(1)
41175         .sr(1)
41176         .m(2)
41177         .n(n)
41178         .k(k)
41179         .cn_stride(5)
41180         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41181     }
41182   }
41183 }
41184 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)41185 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
41186   for (uint32_t n = 4; n <= 6; n += 2) {
41187     for (size_t k = 1; k <= 5; k += 2) {
41188       for (uint32_t m = 1; m <= 2; m++) {
41189         GemmMicrokernelTester()
41190           .mr(2)
41191           .nr(2)
41192           .kr(1)
41193           .sr(1)
41194           .m(m)
41195           .n(n)
41196           .k(k)
41197           .iterations(1)
41198           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41199       }
41200     }
41201   }
41202 }
41203 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel)41204 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
41205   for (size_t k = 1; k <= 5; k += 2) {
41206     GemmMicrokernelTester()
41207       .mr(2)
41208       .nr(2)
41209       .kr(1)
41210       .sr(1)
41211       .m(2)
41212       .n(2)
41213       .k(k)
41214       .ks(3)
41215       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41216   }
41217 }
41218 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel_subtile)41219 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
41220   for (size_t k = 1; k <= 5; k += 2) {
41221     for (uint32_t n = 1; n <= 2; n++) {
41222       for (uint32_t m = 1; m <= 2; m++) {
41223         GemmMicrokernelTester()
41224           .mr(2)
41225           .nr(2)
41226           .kr(1)
41227           .sr(1)
41228           .m(m)
41229           .n(n)
41230           .k(k)
41231           .ks(3)
41232           .iterations(1)
41233           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41234       }
41235     }
41236   }
41237 }
41238 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_small_kernel)41239 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
41240   for (uint32_t n = 3; n < 4; n++) {
41241     for (size_t k = 1; k <= 5; k += 2) {
41242       GemmMicrokernelTester()
41243         .mr(2)
41244         .nr(2)
41245         .kr(1)
41246         .sr(1)
41247         .m(2)
41248         .n(n)
41249         .k(k)
41250         .ks(3)
41251         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41252     }
41253   }
41254 }
41255 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_small_kernel)41256 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
41257   for (uint32_t n = 4; n <= 6; n += 2) {
41258     for (size_t k = 1; k <= 5; k += 2) {
41259       GemmMicrokernelTester()
41260         .mr(2)
41261         .nr(2)
41262         .kr(1)
41263         .sr(1)
41264         .m(2)
41265         .n(n)
41266         .k(k)
41267         .ks(3)
41268         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41269     }
41270   }
41271 }
41272 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)41273 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
41274   for (size_t k = 1; k <= 5; k += 2) {
41275     for (uint32_t n = 1; n <= 2; n++) {
41276       for (uint32_t m = 1; m <= 2; m++) {
41277         GemmMicrokernelTester()
41278           .mr(2)
41279           .nr(2)
41280           .kr(1)
41281           .sr(1)
41282           .m(m)
41283           .n(n)
41284           .k(k)
41285           .cm_stride(5)
41286           .iterations(1)
41287           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41288       }
41289     }
41290   }
41291 }
41292 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,a_offset)41293 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
41294   for (size_t k = 1; k <= 5; k += 2) {
41295     GemmMicrokernelTester()
41296       .mr(2)
41297       .nr(2)
41298       .kr(1)
41299       .sr(1)
41300       .m(2)
41301       .n(2)
41302       .k(k)
41303       .ks(3)
41304       .a_offset(13)
41305       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41306   }
41307 }
41308 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,zero)41309 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
41310   for (size_t k = 1; k <= 5; k += 2) {
41311     for (uint32_t mz = 0; mz < 2; mz++) {
41312       GemmMicrokernelTester()
41313         .mr(2)
41314         .nr(2)
41315         .kr(1)
41316         .sr(1)
41317         .m(2)
41318         .n(2)
41319         .k(k)
41320         .ks(3)
41321         .a_offset(13)
41322         .zero_index(mz)
41323         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41324     }
41325   }
41326 }
41327 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)41328 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
41329   GemmMicrokernelTester()
41330     .mr(2)
41331     .nr(2)
41332     .kr(1)
41333     .sr(1)
41334     .m(2)
41335     .n(2)
41336     .k(1)
41337     .qmin(128)
41338     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41339 }
41340 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)41341 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
41342   GemmMicrokernelTester()
41343     .mr(2)
41344     .nr(2)
41345     .kr(1)
41346     .sr(1)
41347     .m(2)
41348     .n(2)
41349     .k(1)
41350     .qmax(128)
41351     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41352 }
41353 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)41354 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
41355   GemmMicrokernelTester()
41356     .mr(2)
41357     .nr(2)
41358     .kr(1)
41359     .sr(1)
41360     .m(2)
41361     .n(2)
41362     .k(1)
41363     .cm_stride(5)
41364     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41365 }
41366 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_a_zero_point)41367 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
41368   for (size_t k = 1; k <= 5; k += 2) {
41369     GemmMicrokernelTester()
41370       .mr(2)
41371       .nr(2)
41372       .kr(1)
41373       .sr(1)
41374       .m(2)
41375       .n(2)
41376       .k(k)
41377       .a_zero_point(0)
41378       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41379   }
41380 }
41381 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_b_zero_point)41382 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
41383   for (size_t k = 1; k <= 5; k += 2) {
41384     GemmMicrokernelTester()
41385       .mr(2)
41386       .nr(2)
41387       .kr(1)
41388       .sr(1)
41389       .m(2)
41390       .n(2)
41391       .k(k)
41392       .b_zero_point(0)
41393       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41394   }
41395 }
41396 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,no_zero_point)41397 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
41398   for (size_t k = 1; k <= 5; k += 2) {
41399     GemmMicrokernelTester()
41400       .mr(2)
41401       .nr(2)
41402       .kr(1)
41403       .sr(1)
41404       .m(2)
41405       .n(2)
41406       .k(k)
41407       .a_zero_point(0)
41408       .b_zero_point(0)
41409       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41410   }
41411 }
41412 
41413 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)41414 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
41415   GemmMicrokernelTester()
41416     .mr(2)
41417     .nr(4)
41418     .kr(1)
41419     .sr(1)
41420     .m(2)
41421     .n(4)
41422     .k(1)
41423     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41424 }
41425 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)41426 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
41427   GemmMicrokernelTester()
41428     .mr(2)
41429     .nr(4)
41430     .kr(1)
41431     .sr(1)
41432     .m(2)
41433     .n(4)
41434     .k(1)
41435     .cn_stride(7)
41436     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41437 }
41438 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)41439 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
41440   for (uint32_t n = 1; n <= 4; n++) {
41441     for (uint32_t m = 1; m <= 2; m++) {
41442       GemmMicrokernelTester()
41443         .mr(2)
41444         .nr(4)
41445         .kr(1)
41446         .sr(1)
41447         .m(m)
41448         .n(n)
41449         .k(1)
41450         .iterations(1)
41451         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41452     }
41453   }
41454 }
41455 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)41456 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
41457   for (uint32_t m = 1; m <= 2; m++) {
41458     GemmMicrokernelTester()
41459       .mr(2)
41460       .nr(4)
41461       .kr(1)
41462       .sr(1)
41463       .m(m)
41464       .n(4)
41465       .k(1)
41466       .iterations(1)
41467       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41468   }
41469 }
41470 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)41471 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
41472   for (uint32_t n = 1; n <= 4; n++) {
41473     GemmMicrokernelTester()
41474       .mr(2)
41475       .nr(4)
41476       .kr(1)
41477       .sr(1)
41478       .m(2)
41479       .n(n)
41480       .k(1)
41481       .iterations(1)
41482       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41483   }
41484 }
41485 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)41486 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
41487   for (size_t k = 2; k < 10; k++) {
41488     GemmMicrokernelTester()
41489       .mr(2)
41490       .nr(4)
41491       .kr(1)
41492       .sr(1)
41493       .m(2)
41494       .n(4)
41495       .k(k)
41496       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41497   }
41498 }
41499 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)41500 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
41501   for (size_t k = 2; k < 10; k++) {
41502     for (uint32_t n = 1; n <= 4; n++) {
41503       for (uint32_t m = 1; m <= 2; m++) {
41504         GemmMicrokernelTester()
41505           .mr(2)
41506           .nr(4)
41507           .kr(1)
41508           .sr(1)
41509           .m(m)
41510           .n(n)
41511           .k(k)
41512           .iterations(1)
41513           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41514       }
41515     }
41516   }
41517 }
41518 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)41519 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
41520   for (uint32_t n = 5; n < 8; n++) {
41521     for (size_t k = 1; k <= 5; k += 2) {
41522       GemmMicrokernelTester()
41523         .mr(2)
41524         .nr(4)
41525         .kr(1)
41526         .sr(1)
41527         .m(2)
41528         .n(n)
41529         .k(k)
41530         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41531     }
41532   }
41533 }
41534 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)41535 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
41536   for (uint32_t n = 5; n < 8; n++) {
41537     for (size_t k = 1; k <= 5; k += 2) {
41538       GemmMicrokernelTester()
41539         .mr(2)
41540         .nr(4)
41541         .kr(1)
41542         .sr(1)
41543         .m(2)
41544         .n(n)
41545         .k(k)
41546         .cn_stride(7)
41547         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41548     }
41549   }
41550 }
41551 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)41552 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
41553   for (uint32_t n = 5; n < 8; n++) {
41554     for (size_t k = 1; k <= 5; k += 2) {
41555       for (uint32_t m = 1; m <= 2; m++) {
41556         GemmMicrokernelTester()
41557           .mr(2)
41558           .nr(4)
41559           .kr(1)
41560           .sr(1)
41561           .m(m)
41562           .n(n)
41563           .k(k)
41564           .iterations(1)
41565           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41566       }
41567     }
41568   }
41569 }
41570 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)41571 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
41572   for (uint32_t n = 8; n <= 12; n += 4) {
41573     for (size_t k = 1; k <= 5; k += 2) {
41574       GemmMicrokernelTester()
41575         .mr(2)
41576         .nr(4)
41577         .kr(1)
41578         .sr(1)
41579         .m(2)
41580         .n(n)
41581         .k(k)
41582         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41583     }
41584   }
41585 }
41586 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)41587 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
41588   for (uint32_t n = 8; n <= 12; n += 4) {
41589     for (size_t k = 1; k <= 5; k += 2) {
41590       GemmMicrokernelTester()
41591         .mr(2)
41592         .nr(4)
41593         .kr(1)
41594         .sr(1)
41595         .m(2)
41596         .n(n)
41597         .k(k)
41598         .cn_stride(7)
41599         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41600     }
41601   }
41602 }
41603 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)41604 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
41605   for (uint32_t n = 8; n <= 12; n += 4) {
41606     for (size_t k = 1; k <= 5; k += 2) {
41607       for (uint32_t m = 1; m <= 2; m++) {
41608         GemmMicrokernelTester()
41609           .mr(2)
41610           .nr(4)
41611           .kr(1)
41612           .sr(1)
41613           .m(m)
41614           .n(n)
41615           .k(k)
41616           .iterations(1)
41617           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41618       }
41619     }
41620   }
41621 }
41622 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel)41623 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel) {
41624   for (size_t k = 1; k <= 5; k += 2) {
41625     GemmMicrokernelTester()
41626       .mr(2)
41627       .nr(4)
41628       .kr(1)
41629       .sr(1)
41630       .m(2)
41631       .n(4)
41632       .k(k)
41633       .ks(3)
41634       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41635   }
41636 }
41637 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel_subtile)41638 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel_subtile) {
41639   for (size_t k = 1; k <= 5; k += 2) {
41640     for (uint32_t n = 1; n <= 4; n++) {
41641       for (uint32_t m = 1; m <= 2; m++) {
41642         GemmMicrokernelTester()
41643           .mr(2)
41644           .nr(4)
41645           .kr(1)
41646           .sr(1)
41647           .m(m)
41648           .n(n)
41649           .k(k)
41650           .ks(3)
41651           .iterations(1)
41652           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41653       }
41654     }
41655   }
41656 }
41657 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_small_kernel)41658 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
41659   for (uint32_t n = 5; n < 8; n++) {
41660     for (size_t k = 1; k <= 5; k += 2) {
41661       GemmMicrokernelTester()
41662         .mr(2)
41663         .nr(4)
41664         .kr(1)
41665         .sr(1)
41666         .m(2)
41667         .n(n)
41668         .k(k)
41669         .ks(3)
41670         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41671     }
41672   }
41673 }
41674 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_small_kernel)41675 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
41676   for (uint32_t n = 8; n <= 12; n += 4) {
41677     for (size_t k = 1; k <= 5; k += 2) {
41678       GemmMicrokernelTester()
41679         .mr(2)
41680         .nr(4)
41681         .kr(1)
41682         .sr(1)
41683         .m(2)
41684         .n(n)
41685         .k(k)
41686         .ks(3)
41687         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41688     }
41689   }
41690 }
41691 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)41692 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
41693   for (size_t k = 1; k <= 5; k += 2) {
41694     for (uint32_t n = 1; n <= 4; n++) {
41695       for (uint32_t m = 1; m <= 2; m++) {
41696         GemmMicrokernelTester()
41697           .mr(2)
41698           .nr(4)
41699           .kr(1)
41700           .sr(1)
41701           .m(m)
41702           .n(n)
41703           .k(k)
41704           .cm_stride(7)
41705           .iterations(1)
41706           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41707       }
41708     }
41709   }
41710 }
41711 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,a_offset)41712 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, a_offset) {
41713   for (size_t k = 1; k <= 5; k += 2) {
41714     GemmMicrokernelTester()
41715       .mr(2)
41716       .nr(4)
41717       .kr(1)
41718       .sr(1)
41719       .m(2)
41720       .n(4)
41721       .k(k)
41722       .ks(3)
41723       .a_offset(13)
41724       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41725   }
41726 }
41727 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,zero)41728 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, zero) {
41729   for (size_t k = 1; k <= 5; k += 2) {
41730     for (uint32_t mz = 0; mz < 2; mz++) {
41731       GemmMicrokernelTester()
41732         .mr(2)
41733         .nr(4)
41734         .kr(1)
41735         .sr(1)
41736         .m(2)
41737         .n(4)
41738         .k(k)
41739         .ks(3)
41740         .a_offset(13)
41741         .zero_index(mz)
41742         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41743     }
41744   }
41745 }
41746 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)41747 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
41748   GemmMicrokernelTester()
41749     .mr(2)
41750     .nr(4)
41751     .kr(1)
41752     .sr(1)
41753     .m(2)
41754     .n(4)
41755     .k(1)
41756     .qmin(128)
41757     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41758 }
41759 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)41760 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
41761   GemmMicrokernelTester()
41762     .mr(2)
41763     .nr(4)
41764     .kr(1)
41765     .sr(1)
41766     .m(2)
41767     .n(4)
41768     .k(1)
41769     .qmax(128)
41770     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41771 }
41772 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)41773 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
41774   GemmMicrokernelTester()
41775     .mr(2)
41776     .nr(4)
41777     .kr(1)
41778     .sr(1)
41779     .m(2)
41780     .n(4)
41781     .k(1)
41782     .cm_stride(7)
41783     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41784 }
41785 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_a_zero_point)41786 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_a_zero_point) {
41787   for (size_t k = 1; k <= 5; k += 2) {
41788     GemmMicrokernelTester()
41789       .mr(2)
41790       .nr(4)
41791       .kr(1)
41792       .sr(1)
41793       .m(2)
41794       .n(4)
41795       .k(k)
41796       .a_zero_point(0)
41797       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41798   }
41799 }
41800 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_b_zero_point)41801 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_b_zero_point) {
41802   for (size_t k = 1; k <= 5; k += 2) {
41803     GemmMicrokernelTester()
41804       .mr(2)
41805       .nr(4)
41806       .kr(1)
41807       .sr(1)
41808       .m(2)
41809       .n(4)
41810       .k(k)
41811       .b_zero_point(0)
41812       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41813   }
41814 }
41815 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,no_zero_point)41816 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_zero_point) {
41817   for (size_t k = 1; k <= 5; k += 2) {
41818     GemmMicrokernelTester()
41819       .mr(2)
41820       .nr(4)
41821       .kr(1)
41822       .sr(1)
41823       .m(2)
41824       .n(4)
41825       .k(k)
41826       .a_zero_point(0)
41827       .b_zero_point(0)
41828       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
41829   }
41830 }
41831 
41832 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)41833 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
41834   GemmMicrokernelTester()
41835     .mr(2)
41836     .nr(4)
41837     .kr(1)
41838     .sr(1)
41839     .m(2)
41840     .n(4)
41841     .k(1)
41842     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41843 }
41844 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)41845 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
41846   GemmMicrokernelTester()
41847     .mr(2)
41848     .nr(4)
41849     .kr(1)
41850     .sr(1)
41851     .m(2)
41852     .n(4)
41853     .k(1)
41854     .cn_stride(7)
41855     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41856 }
41857 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)41858 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
41859   for (uint32_t n = 1; n <= 4; n++) {
41860     for (uint32_t m = 1; m <= 2; m++) {
41861       GemmMicrokernelTester()
41862         .mr(2)
41863         .nr(4)
41864         .kr(1)
41865         .sr(1)
41866         .m(m)
41867         .n(n)
41868         .k(1)
41869         .iterations(1)
41870         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41871     }
41872   }
41873 }
41874 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)41875 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
41876   for (uint32_t m = 1; m <= 2; m++) {
41877     GemmMicrokernelTester()
41878       .mr(2)
41879       .nr(4)
41880       .kr(1)
41881       .sr(1)
41882       .m(m)
41883       .n(4)
41884       .k(1)
41885       .iterations(1)
41886       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41887   }
41888 }
41889 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)41890 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
41891   for (uint32_t n = 1; n <= 4; n++) {
41892     GemmMicrokernelTester()
41893       .mr(2)
41894       .nr(4)
41895       .kr(1)
41896       .sr(1)
41897       .m(2)
41898       .n(n)
41899       .k(1)
41900       .iterations(1)
41901       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41902   }
41903 }
41904 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)41905 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
41906   for (size_t k = 2; k < 10; k++) {
41907     GemmMicrokernelTester()
41908       .mr(2)
41909       .nr(4)
41910       .kr(1)
41911       .sr(1)
41912       .m(2)
41913       .n(4)
41914       .k(k)
41915       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41916   }
41917 }
41918 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)41919 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
41920   for (size_t k = 2; k < 10; k++) {
41921     for (uint32_t n = 1; n <= 4; n++) {
41922       for (uint32_t m = 1; m <= 2; m++) {
41923         GemmMicrokernelTester()
41924           .mr(2)
41925           .nr(4)
41926           .kr(1)
41927           .sr(1)
41928           .m(m)
41929           .n(n)
41930           .k(k)
41931           .iterations(1)
41932           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41933       }
41934     }
41935   }
41936 }
41937 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)41938 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
41939   for (uint32_t n = 5; n < 8; n++) {
41940     for (size_t k = 1; k <= 5; k += 2) {
41941       GemmMicrokernelTester()
41942         .mr(2)
41943         .nr(4)
41944         .kr(1)
41945         .sr(1)
41946         .m(2)
41947         .n(n)
41948         .k(k)
41949         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41950     }
41951   }
41952 }
41953 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)41954 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
41955   for (uint32_t n = 5; n < 8; n++) {
41956     for (size_t k = 1; k <= 5; k += 2) {
41957       GemmMicrokernelTester()
41958         .mr(2)
41959         .nr(4)
41960         .kr(1)
41961         .sr(1)
41962         .m(2)
41963         .n(n)
41964         .k(k)
41965         .cn_stride(7)
41966         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41967     }
41968   }
41969 }
41970 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)41971 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
41972   for (uint32_t n = 5; n < 8; n++) {
41973     for (size_t k = 1; k <= 5; k += 2) {
41974       for (uint32_t m = 1; m <= 2; m++) {
41975         GemmMicrokernelTester()
41976           .mr(2)
41977           .nr(4)
41978           .kr(1)
41979           .sr(1)
41980           .m(m)
41981           .n(n)
41982           .k(k)
41983           .iterations(1)
41984           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
41985       }
41986     }
41987   }
41988 }
41989 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)41990 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
41991   for (uint32_t n = 8; n <= 12; n += 4) {
41992     for (size_t k = 1; k <= 5; k += 2) {
41993       GemmMicrokernelTester()
41994         .mr(2)
41995         .nr(4)
41996         .kr(1)
41997         .sr(1)
41998         .m(2)
41999         .n(n)
42000         .k(k)
42001         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42002     }
42003   }
42004 }
42005 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)42006 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
42007   for (uint32_t n = 8; n <= 12; n += 4) {
42008     for (size_t k = 1; k <= 5; k += 2) {
42009       GemmMicrokernelTester()
42010         .mr(2)
42011         .nr(4)
42012         .kr(1)
42013         .sr(1)
42014         .m(2)
42015         .n(n)
42016         .k(k)
42017         .cn_stride(7)
42018         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42019     }
42020   }
42021 }
42022 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)42023 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
42024   for (uint32_t n = 8; n <= 12; n += 4) {
42025     for (size_t k = 1; k <= 5; k += 2) {
42026       for (uint32_t m = 1; m <= 2; m++) {
42027         GemmMicrokernelTester()
42028           .mr(2)
42029           .nr(4)
42030           .kr(1)
42031           .sr(1)
42032           .m(m)
42033           .n(n)
42034           .k(k)
42035           .iterations(1)
42036           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42037       }
42038     }
42039   }
42040 }
42041 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel)42042 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
42043   for (size_t k = 1; k <= 5; k += 2) {
42044     GemmMicrokernelTester()
42045       .mr(2)
42046       .nr(4)
42047       .kr(1)
42048       .sr(1)
42049       .m(2)
42050       .n(4)
42051       .k(k)
42052       .ks(3)
42053       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42054   }
42055 }
42056 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel_subtile)42057 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
42058   for (size_t k = 1; k <= 5; k += 2) {
42059     for (uint32_t n = 1; n <= 4; n++) {
42060       for (uint32_t m = 1; m <= 2; m++) {
42061         GemmMicrokernelTester()
42062           .mr(2)
42063           .nr(4)
42064           .kr(1)
42065           .sr(1)
42066           .m(m)
42067           .n(n)
42068           .k(k)
42069           .ks(3)
42070           .iterations(1)
42071           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42072       }
42073     }
42074   }
42075 }
42076 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_small_kernel)42077 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
42078   for (uint32_t n = 5; n < 8; n++) {
42079     for (size_t k = 1; k <= 5; k += 2) {
42080       GemmMicrokernelTester()
42081         .mr(2)
42082         .nr(4)
42083         .kr(1)
42084         .sr(1)
42085         .m(2)
42086         .n(n)
42087         .k(k)
42088         .ks(3)
42089         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42090     }
42091   }
42092 }
42093 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_small_kernel)42094 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
42095   for (uint32_t n = 8; n <= 12; n += 4) {
42096     for (size_t k = 1; k <= 5; k += 2) {
42097       GemmMicrokernelTester()
42098         .mr(2)
42099         .nr(4)
42100         .kr(1)
42101         .sr(1)
42102         .m(2)
42103         .n(n)
42104         .k(k)
42105         .ks(3)
42106         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42107     }
42108   }
42109 }
42110 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)42111 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
42112   for (size_t k = 1; k <= 5; k += 2) {
42113     for (uint32_t n = 1; n <= 4; n++) {
42114       for (uint32_t m = 1; m <= 2; m++) {
42115         GemmMicrokernelTester()
42116           .mr(2)
42117           .nr(4)
42118           .kr(1)
42119           .sr(1)
42120           .m(m)
42121           .n(n)
42122           .k(k)
42123           .cm_stride(7)
42124           .iterations(1)
42125           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42126       }
42127     }
42128   }
42129 }
42130 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,a_offset)42131 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
42132   for (size_t k = 1; k <= 5; k += 2) {
42133     GemmMicrokernelTester()
42134       .mr(2)
42135       .nr(4)
42136       .kr(1)
42137       .sr(1)
42138       .m(2)
42139       .n(4)
42140       .k(k)
42141       .ks(3)
42142       .a_offset(13)
42143       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42144   }
42145 }
42146 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,zero)42147 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
42148   for (size_t k = 1; k <= 5; k += 2) {
42149     for (uint32_t mz = 0; mz < 2; mz++) {
42150       GemmMicrokernelTester()
42151         .mr(2)
42152         .nr(4)
42153         .kr(1)
42154         .sr(1)
42155         .m(2)
42156         .n(4)
42157         .k(k)
42158         .ks(3)
42159         .a_offset(13)
42160         .zero_index(mz)
42161         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42162     }
42163   }
42164 }
42165 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)42166 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
42167   GemmMicrokernelTester()
42168     .mr(2)
42169     .nr(4)
42170     .kr(1)
42171     .sr(1)
42172     .m(2)
42173     .n(4)
42174     .k(1)
42175     .qmin(128)
42176     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42177 }
42178 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)42179 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
42180   GemmMicrokernelTester()
42181     .mr(2)
42182     .nr(4)
42183     .kr(1)
42184     .sr(1)
42185     .m(2)
42186     .n(4)
42187     .k(1)
42188     .qmax(128)
42189     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42190 }
42191 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)42192 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
42193   GemmMicrokernelTester()
42194     .mr(2)
42195     .nr(4)
42196     .kr(1)
42197     .sr(1)
42198     .m(2)
42199     .n(4)
42200     .k(1)
42201     .cm_stride(7)
42202     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42203 }
42204 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_a_zero_point)42205 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
42206   for (size_t k = 1; k <= 5; k += 2) {
42207     GemmMicrokernelTester()
42208       .mr(2)
42209       .nr(4)
42210       .kr(1)
42211       .sr(1)
42212       .m(2)
42213       .n(4)
42214       .k(k)
42215       .a_zero_point(0)
42216       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42217   }
42218 }
42219 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_b_zero_point)42220 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
42221   for (size_t k = 1; k <= 5; k += 2) {
42222     GemmMicrokernelTester()
42223       .mr(2)
42224       .nr(4)
42225       .kr(1)
42226       .sr(1)
42227       .m(2)
42228       .n(4)
42229       .k(k)
42230       .b_zero_point(0)
42231       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42232   }
42233 }
42234 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,no_zero_point)42235 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
42236   for (size_t k = 1; k <= 5; k += 2) {
42237     GemmMicrokernelTester()
42238       .mr(2)
42239       .nr(4)
42240       .kr(1)
42241       .sr(1)
42242       .m(2)
42243       .n(4)
42244       .k(k)
42245       .a_zero_point(0)
42246       .b_zero_point(0)
42247       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
42248   }
42249 }
42250 
42251 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)42252 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
42253   GemmMicrokernelTester()
42254     .mr(3)
42255     .nr(2)
42256     .kr(1)
42257     .sr(1)
42258     .m(3)
42259     .n(2)
42260     .k(1)
42261     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42262 }
42263 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)42264 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
42265   GemmMicrokernelTester()
42266     .mr(3)
42267     .nr(2)
42268     .kr(1)
42269     .sr(1)
42270     .m(3)
42271     .n(2)
42272     .k(1)
42273     .cn_stride(5)
42274     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42275 }
42276 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)42277 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
42278   for (uint32_t n = 1; n <= 2; n++) {
42279     for (uint32_t m = 1; m <= 3; m++) {
42280       GemmMicrokernelTester()
42281         .mr(3)
42282         .nr(2)
42283         .kr(1)
42284         .sr(1)
42285         .m(m)
42286         .n(n)
42287         .k(1)
42288         .iterations(1)
42289         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42290     }
42291   }
42292 }
42293 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)42294 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42295   for (uint32_t m = 1; m <= 3; m++) {
42296     GemmMicrokernelTester()
42297       .mr(3)
42298       .nr(2)
42299       .kr(1)
42300       .sr(1)
42301       .m(m)
42302       .n(2)
42303       .k(1)
42304       .iterations(1)
42305       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42306   }
42307 }
42308 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)42309 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42310   for (uint32_t n = 1; n <= 2; n++) {
42311     GemmMicrokernelTester()
42312       .mr(3)
42313       .nr(2)
42314       .kr(1)
42315       .sr(1)
42316       .m(3)
42317       .n(n)
42318       .k(1)
42319       .iterations(1)
42320       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42321   }
42322 }
42323 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)42324 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
42325   for (size_t k = 2; k < 10; k++) {
42326     GemmMicrokernelTester()
42327       .mr(3)
42328       .nr(2)
42329       .kr(1)
42330       .sr(1)
42331       .m(3)
42332       .n(2)
42333       .k(k)
42334       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42335   }
42336 }
42337 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)42338 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
42339   for (size_t k = 2; k < 10; k++) {
42340     for (uint32_t n = 1; n <= 2; n++) {
42341       for (uint32_t m = 1; m <= 3; m++) {
42342         GemmMicrokernelTester()
42343           .mr(3)
42344           .nr(2)
42345           .kr(1)
42346           .sr(1)
42347           .m(m)
42348           .n(n)
42349           .k(k)
42350           .iterations(1)
42351           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42352       }
42353     }
42354   }
42355 }
42356 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)42357 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
42358   for (uint32_t n = 3; n < 4; n++) {
42359     for (size_t k = 1; k <= 5; k += 2) {
42360       GemmMicrokernelTester()
42361         .mr(3)
42362         .nr(2)
42363         .kr(1)
42364         .sr(1)
42365         .m(3)
42366         .n(n)
42367         .k(k)
42368         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42369     }
42370   }
42371 }
42372 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)42373 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
42374   for (uint32_t n = 3; n < 4; n++) {
42375     for (size_t k = 1; k <= 5; k += 2) {
42376       GemmMicrokernelTester()
42377         .mr(3)
42378         .nr(2)
42379         .kr(1)
42380         .sr(1)
42381         .m(3)
42382         .n(n)
42383         .k(k)
42384         .cn_stride(5)
42385         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42386     }
42387   }
42388 }
42389 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)42390 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
42391   for (uint32_t n = 3; n < 4; n++) {
42392     for (size_t k = 1; k <= 5; k += 2) {
42393       for (uint32_t m = 1; m <= 3; m++) {
42394         GemmMicrokernelTester()
42395           .mr(3)
42396           .nr(2)
42397           .kr(1)
42398           .sr(1)
42399           .m(m)
42400           .n(n)
42401           .k(k)
42402           .iterations(1)
42403           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42404       }
42405     }
42406   }
42407 }
42408 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)42409 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
42410   for (uint32_t n = 4; n <= 6; n += 2) {
42411     for (size_t k = 1; k <= 5; k += 2) {
42412       GemmMicrokernelTester()
42413         .mr(3)
42414         .nr(2)
42415         .kr(1)
42416         .sr(1)
42417         .m(3)
42418         .n(n)
42419         .k(k)
42420         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42421     }
42422   }
42423 }
42424 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)42425 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
42426   for (uint32_t n = 4; n <= 6; n += 2) {
42427     for (size_t k = 1; k <= 5; k += 2) {
42428       GemmMicrokernelTester()
42429         .mr(3)
42430         .nr(2)
42431         .kr(1)
42432         .sr(1)
42433         .m(3)
42434         .n(n)
42435         .k(k)
42436         .cn_stride(5)
42437         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42438     }
42439   }
42440 }
42441 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)42442 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
42443   for (uint32_t n = 4; n <= 6; n += 2) {
42444     for (size_t k = 1; k <= 5; k += 2) {
42445       for (uint32_t m = 1; m <= 3; m++) {
42446         GemmMicrokernelTester()
42447           .mr(3)
42448           .nr(2)
42449           .kr(1)
42450           .sr(1)
42451           .m(m)
42452           .n(n)
42453           .k(k)
42454           .iterations(1)
42455           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42456       }
42457     }
42458   }
42459 }
42460 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel)42461 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
42462   for (size_t k = 1; k <= 5; k += 2) {
42463     GemmMicrokernelTester()
42464       .mr(3)
42465       .nr(2)
42466       .kr(1)
42467       .sr(1)
42468       .m(3)
42469       .n(2)
42470       .k(k)
42471       .ks(3)
42472       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42473   }
42474 }
42475 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel_subtile)42476 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
42477   for (size_t k = 1; k <= 5; k += 2) {
42478     for (uint32_t n = 1; n <= 2; n++) {
42479       for (uint32_t m = 1; m <= 3; m++) {
42480         GemmMicrokernelTester()
42481           .mr(3)
42482           .nr(2)
42483           .kr(1)
42484           .sr(1)
42485           .m(m)
42486           .n(n)
42487           .k(k)
42488           .ks(3)
42489           .iterations(1)
42490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42491       }
42492     }
42493   }
42494 }
42495 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_small_kernel)42496 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
42497   for (uint32_t n = 3; n < 4; n++) {
42498     for (size_t k = 1; k <= 5; k += 2) {
42499       GemmMicrokernelTester()
42500         .mr(3)
42501         .nr(2)
42502         .kr(1)
42503         .sr(1)
42504         .m(3)
42505         .n(n)
42506         .k(k)
42507         .ks(3)
42508         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42509     }
42510   }
42511 }
42512 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_small_kernel)42513 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
42514   for (uint32_t n = 4; n <= 6; n += 2) {
42515     for (size_t k = 1; k <= 5; k += 2) {
42516       GemmMicrokernelTester()
42517         .mr(3)
42518         .nr(2)
42519         .kr(1)
42520         .sr(1)
42521         .m(3)
42522         .n(n)
42523         .k(k)
42524         .ks(3)
42525         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42526     }
42527   }
42528 }
42529 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)42530 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
42531   for (size_t k = 1; k <= 5; k += 2) {
42532     for (uint32_t n = 1; n <= 2; n++) {
42533       for (uint32_t m = 1; m <= 3; m++) {
42534         GemmMicrokernelTester()
42535           .mr(3)
42536           .nr(2)
42537           .kr(1)
42538           .sr(1)
42539           .m(m)
42540           .n(n)
42541           .k(k)
42542           .cm_stride(5)
42543           .iterations(1)
42544           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42545       }
42546     }
42547   }
42548 }
42549 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,a_offset)42550 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
42551   for (size_t k = 1; k <= 5; k += 2) {
42552     GemmMicrokernelTester()
42553       .mr(3)
42554       .nr(2)
42555       .kr(1)
42556       .sr(1)
42557       .m(3)
42558       .n(2)
42559       .k(k)
42560       .ks(3)
42561       .a_offset(17)
42562       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42563   }
42564 }
42565 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,zero)42566 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
42567   for (size_t k = 1; k <= 5; k += 2) {
42568     for (uint32_t mz = 0; mz < 3; mz++) {
42569       GemmMicrokernelTester()
42570         .mr(3)
42571         .nr(2)
42572         .kr(1)
42573         .sr(1)
42574         .m(3)
42575         .n(2)
42576         .k(k)
42577         .ks(3)
42578         .a_offset(17)
42579         .zero_index(mz)
42580         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42581     }
42582   }
42583 }
42584 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)42585 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
42586   GemmMicrokernelTester()
42587     .mr(3)
42588     .nr(2)
42589     .kr(1)
42590     .sr(1)
42591     .m(3)
42592     .n(2)
42593     .k(1)
42594     .qmin(128)
42595     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42596 }
42597 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)42598 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
42599   GemmMicrokernelTester()
42600     .mr(3)
42601     .nr(2)
42602     .kr(1)
42603     .sr(1)
42604     .m(3)
42605     .n(2)
42606     .k(1)
42607     .qmax(128)
42608     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42609 }
42610 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)42611 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
42612   GemmMicrokernelTester()
42613     .mr(3)
42614     .nr(2)
42615     .kr(1)
42616     .sr(1)
42617     .m(3)
42618     .n(2)
42619     .k(1)
42620     .cm_stride(5)
42621     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42622 }
42623 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_a_zero_point)42624 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
42625   for (size_t k = 1; k <= 5; k += 2) {
42626     GemmMicrokernelTester()
42627       .mr(3)
42628       .nr(2)
42629       .kr(1)
42630       .sr(1)
42631       .m(3)
42632       .n(2)
42633       .k(k)
42634       .a_zero_point(0)
42635       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42636   }
42637 }
42638 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_b_zero_point)42639 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
42640   for (size_t k = 1; k <= 5; k += 2) {
42641     GemmMicrokernelTester()
42642       .mr(3)
42643       .nr(2)
42644       .kr(1)
42645       .sr(1)
42646       .m(3)
42647       .n(2)
42648       .k(k)
42649       .b_zero_point(0)
42650       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42651   }
42652 }
42653 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,no_zero_point)42654 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
42655   for (size_t k = 1; k <= 5; k += 2) {
42656     GemmMicrokernelTester()
42657       .mr(3)
42658       .nr(2)
42659       .kr(1)
42660       .sr(1)
42661       .m(3)
42662       .n(2)
42663       .k(k)
42664       .a_zero_point(0)
42665       .b_zero_point(0)
42666       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42667   }
42668 }
42669 
42670 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)42671 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
42672   GemmMicrokernelTester()
42673     .mr(3)
42674     .nr(4)
42675     .kr(1)
42676     .sr(1)
42677     .m(3)
42678     .n(4)
42679     .k(1)
42680     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42681 }
42682 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)42683 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
42684   GemmMicrokernelTester()
42685     .mr(3)
42686     .nr(4)
42687     .kr(1)
42688     .sr(1)
42689     .m(3)
42690     .n(4)
42691     .k(1)
42692     .cn_stride(7)
42693     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42694 }
42695 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)42696 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
42697   for (uint32_t n = 1; n <= 4; n++) {
42698     for (uint32_t m = 1; m <= 3; m++) {
42699       GemmMicrokernelTester()
42700         .mr(3)
42701         .nr(4)
42702         .kr(1)
42703         .sr(1)
42704         .m(m)
42705         .n(n)
42706         .k(1)
42707         .iterations(1)
42708         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42709     }
42710   }
42711 }
42712 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)42713 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42714   for (uint32_t m = 1; m <= 3; m++) {
42715     GemmMicrokernelTester()
42716       .mr(3)
42717       .nr(4)
42718       .kr(1)
42719       .sr(1)
42720       .m(m)
42721       .n(4)
42722       .k(1)
42723       .iterations(1)
42724       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42725   }
42726 }
42727 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)42728 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42729   for (uint32_t n = 1; n <= 4; n++) {
42730     GemmMicrokernelTester()
42731       .mr(3)
42732       .nr(4)
42733       .kr(1)
42734       .sr(1)
42735       .m(3)
42736       .n(n)
42737       .k(1)
42738       .iterations(1)
42739       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42740   }
42741 }
42742 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)42743 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
42744   for (size_t k = 2; k < 10; k++) {
42745     GemmMicrokernelTester()
42746       .mr(3)
42747       .nr(4)
42748       .kr(1)
42749       .sr(1)
42750       .m(3)
42751       .n(4)
42752       .k(k)
42753       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42754   }
42755 }
42756 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)42757 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
42758   for (size_t k = 2; k < 10; k++) {
42759     for (uint32_t n = 1; n <= 4; n++) {
42760       for (uint32_t m = 1; m <= 3; m++) {
42761         GemmMicrokernelTester()
42762           .mr(3)
42763           .nr(4)
42764           .kr(1)
42765           .sr(1)
42766           .m(m)
42767           .n(n)
42768           .k(k)
42769           .iterations(1)
42770           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42771       }
42772     }
42773   }
42774 }
42775 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)42776 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
42777   for (uint32_t n = 5; n < 8; n++) {
42778     for (size_t k = 1; k <= 5; k += 2) {
42779       GemmMicrokernelTester()
42780         .mr(3)
42781         .nr(4)
42782         .kr(1)
42783         .sr(1)
42784         .m(3)
42785         .n(n)
42786         .k(k)
42787         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42788     }
42789   }
42790 }
42791 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)42792 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
42793   for (uint32_t n = 5; n < 8; n++) {
42794     for (size_t k = 1; k <= 5; k += 2) {
42795       GemmMicrokernelTester()
42796         .mr(3)
42797         .nr(4)
42798         .kr(1)
42799         .sr(1)
42800         .m(3)
42801         .n(n)
42802         .k(k)
42803         .cn_stride(7)
42804         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42805     }
42806   }
42807 }
42808 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)42809 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
42810   for (uint32_t n = 5; n < 8; n++) {
42811     for (size_t k = 1; k <= 5; k += 2) {
42812       for (uint32_t m = 1; m <= 3; m++) {
42813         GemmMicrokernelTester()
42814           .mr(3)
42815           .nr(4)
42816           .kr(1)
42817           .sr(1)
42818           .m(m)
42819           .n(n)
42820           .k(k)
42821           .iterations(1)
42822           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42823       }
42824     }
42825   }
42826 }
42827 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)42828 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
42829   for (uint32_t n = 8; n <= 12; n += 4) {
42830     for (size_t k = 1; k <= 5; k += 2) {
42831       GemmMicrokernelTester()
42832         .mr(3)
42833         .nr(4)
42834         .kr(1)
42835         .sr(1)
42836         .m(3)
42837         .n(n)
42838         .k(k)
42839         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42840     }
42841   }
42842 }
42843 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)42844 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
42845   for (uint32_t n = 8; n <= 12; n += 4) {
42846     for (size_t k = 1; k <= 5; k += 2) {
42847       GemmMicrokernelTester()
42848         .mr(3)
42849         .nr(4)
42850         .kr(1)
42851         .sr(1)
42852         .m(3)
42853         .n(n)
42854         .k(k)
42855         .cn_stride(7)
42856         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42857     }
42858   }
42859 }
42860 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)42861 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
42862   for (uint32_t n = 8; n <= 12; n += 4) {
42863     for (size_t k = 1; k <= 5; k += 2) {
42864       for (uint32_t m = 1; m <= 3; m++) {
42865         GemmMicrokernelTester()
42866           .mr(3)
42867           .nr(4)
42868           .kr(1)
42869           .sr(1)
42870           .m(m)
42871           .n(n)
42872           .k(k)
42873           .iterations(1)
42874           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42875       }
42876     }
42877   }
42878 }
42879 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel)42880 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
42881   for (size_t k = 1; k <= 5; k += 2) {
42882     GemmMicrokernelTester()
42883       .mr(3)
42884       .nr(4)
42885       .kr(1)
42886       .sr(1)
42887       .m(3)
42888       .n(4)
42889       .k(k)
42890       .ks(3)
42891       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42892   }
42893 }
42894 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel_subtile)42895 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
42896   for (size_t k = 1; k <= 5; k += 2) {
42897     for (uint32_t n = 1; n <= 4; n++) {
42898       for (uint32_t m = 1; m <= 3; m++) {
42899         GemmMicrokernelTester()
42900           .mr(3)
42901           .nr(4)
42902           .kr(1)
42903           .sr(1)
42904           .m(m)
42905           .n(n)
42906           .k(k)
42907           .ks(3)
42908           .iterations(1)
42909           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42910       }
42911     }
42912   }
42913 }
42914 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_small_kernel)42915 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
42916   for (uint32_t n = 5; n < 8; n++) {
42917     for (size_t k = 1; k <= 5; k += 2) {
42918       GemmMicrokernelTester()
42919         .mr(3)
42920         .nr(4)
42921         .kr(1)
42922         .sr(1)
42923         .m(3)
42924         .n(n)
42925         .k(k)
42926         .ks(3)
42927         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42928     }
42929   }
42930 }
42931 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_small_kernel)42932 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
42933   for (uint32_t n = 8; n <= 12; n += 4) {
42934     for (size_t k = 1; k <= 5; k += 2) {
42935       GemmMicrokernelTester()
42936         .mr(3)
42937         .nr(4)
42938         .kr(1)
42939         .sr(1)
42940         .m(3)
42941         .n(n)
42942         .k(k)
42943         .ks(3)
42944         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42945     }
42946   }
42947 }
42948 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)42949 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
42950   for (size_t k = 1; k <= 5; k += 2) {
42951     for (uint32_t n = 1; n <= 4; n++) {
42952       for (uint32_t m = 1; m <= 3; m++) {
42953         GemmMicrokernelTester()
42954           .mr(3)
42955           .nr(4)
42956           .kr(1)
42957           .sr(1)
42958           .m(m)
42959           .n(n)
42960           .k(k)
42961           .cm_stride(7)
42962           .iterations(1)
42963           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42964       }
42965     }
42966   }
42967 }
42968 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,a_offset)42969 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
42970   for (size_t k = 1; k <= 5; k += 2) {
42971     GemmMicrokernelTester()
42972       .mr(3)
42973       .nr(4)
42974       .kr(1)
42975       .sr(1)
42976       .m(3)
42977       .n(4)
42978       .k(k)
42979       .ks(3)
42980       .a_offset(17)
42981       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
42982   }
42983 }
42984 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,zero)42985 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
42986   for (size_t k = 1; k <= 5; k += 2) {
42987     for (uint32_t mz = 0; mz < 3; mz++) {
42988       GemmMicrokernelTester()
42989         .mr(3)
42990         .nr(4)
42991         .kr(1)
42992         .sr(1)
42993         .m(3)
42994         .n(4)
42995         .k(k)
42996         .ks(3)
42997         .a_offset(17)
42998         .zero_index(mz)
42999         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43000     }
43001   }
43002 }
43003 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)43004 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
43005   GemmMicrokernelTester()
43006     .mr(3)
43007     .nr(4)
43008     .kr(1)
43009     .sr(1)
43010     .m(3)
43011     .n(4)
43012     .k(1)
43013     .qmin(128)
43014     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43015 }
43016 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)43017 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
43018   GemmMicrokernelTester()
43019     .mr(3)
43020     .nr(4)
43021     .kr(1)
43022     .sr(1)
43023     .m(3)
43024     .n(4)
43025     .k(1)
43026     .qmax(128)
43027     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43028 }
43029 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)43030 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
43031   GemmMicrokernelTester()
43032     .mr(3)
43033     .nr(4)
43034     .kr(1)
43035     .sr(1)
43036     .m(3)
43037     .n(4)
43038     .k(1)
43039     .cm_stride(7)
43040     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43041 }
43042 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_a_zero_point)43043 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
43044   for (size_t k = 1; k <= 5; k += 2) {
43045     GemmMicrokernelTester()
43046       .mr(3)
43047       .nr(4)
43048       .kr(1)
43049       .sr(1)
43050       .m(3)
43051       .n(4)
43052       .k(k)
43053       .a_zero_point(0)
43054       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43055   }
43056 }
43057 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_b_zero_point)43058 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
43059   for (size_t k = 1; k <= 5; k += 2) {
43060     GemmMicrokernelTester()
43061       .mr(3)
43062       .nr(4)
43063       .kr(1)
43064       .sr(1)
43065       .m(3)
43066       .n(4)
43067       .k(k)
43068       .b_zero_point(0)
43069       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43070   }
43071 }
43072 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,no_zero_point)43073 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
43074   for (size_t k = 1; k <= 5; k += 2) {
43075     GemmMicrokernelTester()
43076       .mr(3)
43077       .nr(4)
43078       .kr(1)
43079       .sr(1)
43080       .m(3)
43081       .n(4)
43082       .k(k)
43083       .a_zero_point(0)
43084       .b_zero_point(0)
43085       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43086   }
43087 }
43088 
43089 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)43090 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
43091   GemmMicrokernelTester()
43092     .mr(4)
43093     .nr(2)
43094     .kr(1)
43095     .sr(1)
43096     .m(4)
43097     .n(2)
43098     .k(1)
43099     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43100 }
43101 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)43102 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
43103   GemmMicrokernelTester()
43104     .mr(4)
43105     .nr(2)
43106     .kr(1)
43107     .sr(1)
43108     .m(4)
43109     .n(2)
43110     .k(1)
43111     .cn_stride(5)
43112     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43113 }
43114 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)43115 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
43116   for (uint32_t n = 1; n <= 2; n++) {
43117     for (uint32_t m = 1; m <= 4; m++) {
43118       GemmMicrokernelTester()
43119         .mr(4)
43120         .nr(2)
43121         .kr(1)
43122         .sr(1)
43123         .m(m)
43124         .n(n)
43125         .k(1)
43126         .iterations(1)
43127         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43128     }
43129   }
43130 }
43131 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)43132 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
43133   for (uint32_t m = 1; m <= 4; m++) {
43134     GemmMicrokernelTester()
43135       .mr(4)
43136       .nr(2)
43137       .kr(1)
43138       .sr(1)
43139       .m(m)
43140       .n(2)
43141       .k(1)
43142       .iterations(1)
43143       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43144   }
43145 }
43146 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)43147 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
43148   for (uint32_t n = 1; n <= 2; n++) {
43149     GemmMicrokernelTester()
43150       .mr(4)
43151       .nr(2)
43152       .kr(1)
43153       .sr(1)
43154       .m(4)
43155       .n(n)
43156       .k(1)
43157       .iterations(1)
43158       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43159   }
43160 }
43161 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)43162 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
43163   for (size_t k = 2; k < 10; k++) {
43164     GemmMicrokernelTester()
43165       .mr(4)
43166       .nr(2)
43167       .kr(1)
43168       .sr(1)
43169       .m(4)
43170       .n(2)
43171       .k(k)
43172       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43173   }
43174 }
43175 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)43176 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
43177   for (size_t k = 2; k < 10; k++) {
43178     for (uint32_t n = 1; n <= 2; n++) {
43179       for (uint32_t m = 1; m <= 4; m++) {
43180         GemmMicrokernelTester()
43181           .mr(4)
43182           .nr(2)
43183           .kr(1)
43184           .sr(1)
43185           .m(m)
43186           .n(n)
43187           .k(k)
43188           .iterations(1)
43189           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43190       }
43191     }
43192   }
43193 }
43194 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)43195 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
43196   for (uint32_t n = 3; n < 4; n++) {
43197     for (size_t k = 1; k <= 5; k += 2) {
43198       GemmMicrokernelTester()
43199         .mr(4)
43200         .nr(2)
43201         .kr(1)
43202         .sr(1)
43203         .m(4)
43204         .n(n)
43205         .k(k)
43206         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43207     }
43208   }
43209 }
43210 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)43211 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
43212   for (uint32_t n = 3; n < 4; n++) {
43213     for (size_t k = 1; k <= 5; k += 2) {
43214       GemmMicrokernelTester()
43215         .mr(4)
43216         .nr(2)
43217         .kr(1)
43218         .sr(1)
43219         .m(4)
43220         .n(n)
43221         .k(k)
43222         .cn_stride(5)
43223         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43224     }
43225   }
43226 }
43227 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)43228 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
43229   for (uint32_t n = 3; n < 4; n++) {
43230     for (size_t k = 1; k <= 5; k += 2) {
43231       for (uint32_t m = 1; m <= 4; m++) {
43232         GemmMicrokernelTester()
43233           .mr(4)
43234           .nr(2)
43235           .kr(1)
43236           .sr(1)
43237           .m(m)
43238           .n(n)
43239           .k(k)
43240           .iterations(1)
43241           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43242       }
43243     }
43244   }
43245 }
43246 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)43247 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
43248   for (uint32_t n = 4; n <= 6; n += 2) {
43249     for (size_t k = 1; k <= 5; k += 2) {
43250       GemmMicrokernelTester()
43251         .mr(4)
43252         .nr(2)
43253         .kr(1)
43254         .sr(1)
43255         .m(4)
43256         .n(n)
43257         .k(k)
43258         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43259     }
43260   }
43261 }
43262 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)43263 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
43264   for (uint32_t n = 4; n <= 6; n += 2) {
43265     for (size_t k = 1; k <= 5; k += 2) {
43266       GemmMicrokernelTester()
43267         .mr(4)
43268         .nr(2)
43269         .kr(1)
43270         .sr(1)
43271         .m(4)
43272         .n(n)
43273         .k(k)
43274         .cn_stride(5)
43275         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43276     }
43277   }
43278 }
43279 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)43280 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
43281   for (uint32_t n = 4; n <= 6; n += 2) {
43282     for (size_t k = 1; k <= 5; k += 2) {
43283       for (uint32_t m = 1; m <= 4; m++) {
43284         GemmMicrokernelTester()
43285           .mr(4)
43286           .nr(2)
43287           .kr(1)
43288           .sr(1)
43289           .m(m)
43290           .n(n)
43291           .k(k)
43292           .iterations(1)
43293           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43294       }
43295     }
43296   }
43297 }
43298 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel)43299 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
43300   for (size_t k = 1; k <= 5; k += 2) {
43301     GemmMicrokernelTester()
43302       .mr(4)
43303       .nr(2)
43304       .kr(1)
43305       .sr(1)
43306       .m(4)
43307       .n(2)
43308       .k(k)
43309       .ks(3)
43310       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43311   }
43312 }
43313 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel_subtile)43314 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
43315   for (size_t k = 1; k <= 5; k += 2) {
43316     for (uint32_t n = 1; n <= 2; n++) {
43317       for (uint32_t m = 1; m <= 4; m++) {
43318         GemmMicrokernelTester()
43319           .mr(4)
43320           .nr(2)
43321           .kr(1)
43322           .sr(1)
43323           .m(m)
43324           .n(n)
43325           .k(k)
43326           .ks(3)
43327           .iterations(1)
43328           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43329       }
43330     }
43331   }
43332 }
43333 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_small_kernel)43334 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
43335   for (uint32_t n = 3; n < 4; n++) {
43336     for (size_t k = 1; k <= 5; k += 2) {
43337       GemmMicrokernelTester()
43338         .mr(4)
43339         .nr(2)
43340         .kr(1)
43341         .sr(1)
43342         .m(4)
43343         .n(n)
43344         .k(k)
43345         .ks(3)
43346         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43347     }
43348   }
43349 }
43350 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_small_kernel)43351 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
43352   for (uint32_t n = 4; n <= 6; n += 2) {
43353     for (size_t k = 1; k <= 5; k += 2) {
43354       GemmMicrokernelTester()
43355         .mr(4)
43356         .nr(2)
43357         .kr(1)
43358         .sr(1)
43359         .m(4)
43360         .n(n)
43361         .k(k)
43362         .ks(3)
43363         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43364     }
43365   }
43366 }
43367 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)43368 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
43369   for (size_t k = 1; k <= 5; k += 2) {
43370     for (uint32_t n = 1; n <= 2; n++) {
43371       for (uint32_t m = 1; m <= 4; m++) {
43372         GemmMicrokernelTester()
43373           .mr(4)
43374           .nr(2)
43375           .kr(1)
43376           .sr(1)
43377           .m(m)
43378           .n(n)
43379           .k(k)
43380           .cm_stride(5)
43381           .iterations(1)
43382           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43383       }
43384     }
43385   }
43386 }
43387 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,a_offset)43388 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
43389   for (size_t k = 1; k <= 5; k += 2) {
43390     GemmMicrokernelTester()
43391       .mr(4)
43392       .nr(2)
43393       .kr(1)
43394       .sr(1)
43395       .m(4)
43396       .n(2)
43397       .k(k)
43398       .ks(3)
43399       .a_offset(23)
43400       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43401   }
43402 }
43403 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,zero)43404 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
43405   for (size_t k = 1; k <= 5; k += 2) {
43406     for (uint32_t mz = 0; mz < 4; mz++) {
43407       GemmMicrokernelTester()
43408         .mr(4)
43409         .nr(2)
43410         .kr(1)
43411         .sr(1)
43412         .m(4)
43413         .n(2)
43414         .k(k)
43415         .ks(3)
43416         .a_offset(23)
43417         .zero_index(mz)
43418         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43419     }
43420   }
43421 }
43422 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)43423 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
43424   GemmMicrokernelTester()
43425     .mr(4)
43426     .nr(2)
43427     .kr(1)
43428     .sr(1)
43429     .m(4)
43430     .n(2)
43431     .k(1)
43432     .qmin(128)
43433     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43434 }
43435 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)43436 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
43437   GemmMicrokernelTester()
43438     .mr(4)
43439     .nr(2)
43440     .kr(1)
43441     .sr(1)
43442     .m(4)
43443     .n(2)
43444     .k(1)
43445     .qmax(128)
43446     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43447 }
43448 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)43449 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
43450   GemmMicrokernelTester()
43451     .mr(4)
43452     .nr(2)
43453     .kr(1)
43454     .sr(1)
43455     .m(4)
43456     .n(2)
43457     .k(1)
43458     .cm_stride(5)
43459     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43460 }
43461 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_a_zero_point)43462 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
43463   for (size_t k = 1; k <= 5; k += 2) {
43464     GemmMicrokernelTester()
43465       .mr(4)
43466       .nr(2)
43467       .kr(1)
43468       .sr(1)
43469       .m(4)
43470       .n(2)
43471       .k(k)
43472       .a_zero_point(0)
43473       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43474   }
43475 }
43476 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_b_zero_point)43477 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
43478   for (size_t k = 1; k <= 5; k += 2) {
43479     GemmMicrokernelTester()
43480       .mr(4)
43481       .nr(2)
43482       .kr(1)
43483       .sr(1)
43484       .m(4)
43485       .n(2)
43486       .k(k)
43487       .b_zero_point(0)
43488       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43489   }
43490 }
43491 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,no_zero_point)43492 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
43493   for (size_t k = 1; k <= 5; k += 2) {
43494     GemmMicrokernelTester()
43495       .mr(4)
43496       .nr(2)
43497       .kr(1)
43498       .sr(1)
43499       .m(4)
43500       .n(2)
43501       .k(k)
43502       .a_zero_point(0)
43503       .b_zero_point(0)
43504       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43505   }
43506 }
43507 
43508 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)43509 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
43510   GemmMicrokernelTester()
43511     .mr(4)
43512     .nr(4)
43513     .kr(1)
43514     .sr(1)
43515     .m(4)
43516     .n(4)
43517     .k(1)
43518     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43519 }
43520 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)43521 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
43522   GemmMicrokernelTester()
43523     .mr(4)
43524     .nr(4)
43525     .kr(1)
43526     .sr(1)
43527     .m(4)
43528     .n(4)
43529     .k(1)
43530     .cn_stride(7)
43531     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43532 }
43533 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)43534 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
43535   for (uint32_t n = 1; n <= 4; n++) {
43536     for (uint32_t m = 1; m <= 4; m++) {
43537       GemmMicrokernelTester()
43538         .mr(4)
43539         .nr(4)
43540         .kr(1)
43541         .sr(1)
43542         .m(m)
43543         .n(n)
43544         .k(1)
43545         .iterations(1)
43546         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43547     }
43548   }
43549 }
43550 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)43551 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
43552   for (uint32_t m = 1; m <= 4; m++) {
43553     GemmMicrokernelTester()
43554       .mr(4)
43555       .nr(4)
43556       .kr(1)
43557       .sr(1)
43558       .m(m)
43559       .n(4)
43560       .k(1)
43561       .iterations(1)
43562       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43563   }
43564 }
43565 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)43566 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
43567   for (uint32_t n = 1; n <= 4; n++) {
43568     GemmMicrokernelTester()
43569       .mr(4)
43570       .nr(4)
43571       .kr(1)
43572       .sr(1)
43573       .m(4)
43574       .n(n)
43575       .k(1)
43576       .iterations(1)
43577       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43578   }
43579 }
43580 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)43581 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
43582   for (size_t k = 2; k < 10; k++) {
43583     GemmMicrokernelTester()
43584       .mr(4)
43585       .nr(4)
43586       .kr(1)
43587       .sr(1)
43588       .m(4)
43589       .n(4)
43590       .k(k)
43591       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43592   }
43593 }
43594 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)43595 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
43596   for (size_t k = 2; k < 10; k++) {
43597     for (uint32_t n = 1; n <= 4; n++) {
43598       for (uint32_t m = 1; m <= 4; m++) {
43599         GemmMicrokernelTester()
43600           .mr(4)
43601           .nr(4)
43602           .kr(1)
43603           .sr(1)
43604           .m(m)
43605           .n(n)
43606           .k(k)
43607           .iterations(1)
43608           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43609       }
43610     }
43611   }
43612 }
43613 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)43614 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
43615   for (uint32_t n = 5; n < 8; n++) {
43616     for (size_t k = 1; k <= 5; k += 2) {
43617       GemmMicrokernelTester()
43618         .mr(4)
43619         .nr(4)
43620         .kr(1)
43621         .sr(1)
43622         .m(4)
43623         .n(n)
43624         .k(k)
43625         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43626     }
43627   }
43628 }
43629 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)43630 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
43631   for (uint32_t n = 5; n < 8; n++) {
43632     for (size_t k = 1; k <= 5; k += 2) {
43633       GemmMicrokernelTester()
43634         .mr(4)
43635         .nr(4)
43636         .kr(1)
43637         .sr(1)
43638         .m(4)
43639         .n(n)
43640         .k(k)
43641         .cn_stride(7)
43642         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43643     }
43644   }
43645 }
43646 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)43647 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
43648   for (uint32_t n = 5; n < 8; n++) {
43649     for (size_t k = 1; k <= 5; k += 2) {
43650       for (uint32_t m = 1; m <= 4; m++) {
43651         GemmMicrokernelTester()
43652           .mr(4)
43653           .nr(4)
43654           .kr(1)
43655           .sr(1)
43656           .m(m)
43657           .n(n)
43658           .k(k)
43659           .iterations(1)
43660           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43661       }
43662     }
43663   }
43664 }
43665 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)43666 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
43667   for (uint32_t n = 8; n <= 12; n += 4) {
43668     for (size_t k = 1; k <= 5; k += 2) {
43669       GemmMicrokernelTester()
43670         .mr(4)
43671         .nr(4)
43672         .kr(1)
43673         .sr(1)
43674         .m(4)
43675         .n(n)
43676         .k(k)
43677         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43678     }
43679   }
43680 }
43681 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)43682 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
43683   for (uint32_t n = 8; n <= 12; n += 4) {
43684     for (size_t k = 1; k <= 5; k += 2) {
43685       GemmMicrokernelTester()
43686         .mr(4)
43687         .nr(4)
43688         .kr(1)
43689         .sr(1)
43690         .m(4)
43691         .n(n)
43692         .k(k)
43693         .cn_stride(7)
43694         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43695     }
43696   }
43697 }
43698 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)43699 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
43700   for (uint32_t n = 8; n <= 12; n += 4) {
43701     for (size_t k = 1; k <= 5; k += 2) {
43702       for (uint32_t m = 1; m <= 4; m++) {
43703         GemmMicrokernelTester()
43704           .mr(4)
43705           .nr(4)
43706           .kr(1)
43707           .sr(1)
43708           .m(m)
43709           .n(n)
43710           .k(k)
43711           .iterations(1)
43712           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43713       }
43714     }
43715   }
43716 }
43717 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel)43718 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
43719   for (size_t k = 1; k <= 5; k += 2) {
43720     GemmMicrokernelTester()
43721       .mr(4)
43722       .nr(4)
43723       .kr(1)
43724       .sr(1)
43725       .m(4)
43726       .n(4)
43727       .k(k)
43728       .ks(3)
43729       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43730   }
43731 }
43732 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel_subtile)43733 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
43734   for (size_t k = 1; k <= 5; k += 2) {
43735     for (uint32_t n = 1; n <= 4; n++) {
43736       for (uint32_t m = 1; m <= 4; m++) {
43737         GemmMicrokernelTester()
43738           .mr(4)
43739           .nr(4)
43740           .kr(1)
43741           .sr(1)
43742           .m(m)
43743           .n(n)
43744           .k(k)
43745           .ks(3)
43746           .iterations(1)
43747           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43748       }
43749     }
43750   }
43751 }
43752 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_small_kernel)43753 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
43754   for (uint32_t n = 5; n < 8; n++) {
43755     for (size_t k = 1; k <= 5; k += 2) {
43756       GemmMicrokernelTester()
43757         .mr(4)
43758         .nr(4)
43759         .kr(1)
43760         .sr(1)
43761         .m(4)
43762         .n(n)
43763         .k(k)
43764         .ks(3)
43765         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43766     }
43767   }
43768 }
43769 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_small_kernel)43770 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
43771   for (uint32_t n = 8; n <= 12; n += 4) {
43772     for (size_t k = 1; k <= 5; k += 2) {
43773       GemmMicrokernelTester()
43774         .mr(4)
43775         .nr(4)
43776         .kr(1)
43777         .sr(1)
43778         .m(4)
43779         .n(n)
43780         .k(k)
43781         .ks(3)
43782         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43783     }
43784   }
43785 }
43786 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)43787 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
43788   for (size_t k = 1; k <= 5; k += 2) {
43789     for (uint32_t n = 1; n <= 4; n++) {
43790       for (uint32_t m = 1; m <= 4; m++) {
43791         GemmMicrokernelTester()
43792           .mr(4)
43793           .nr(4)
43794           .kr(1)
43795           .sr(1)
43796           .m(m)
43797           .n(n)
43798           .k(k)
43799           .cm_stride(7)
43800           .iterations(1)
43801           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43802       }
43803     }
43804   }
43805 }
43806 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,a_offset)43807 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
43808   for (size_t k = 1; k <= 5; k += 2) {
43809     GemmMicrokernelTester()
43810       .mr(4)
43811       .nr(4)
43812       .kr(1)
43813       .sr(1)
43814       .m(4)
43815       .n(4)
43816       .k(k)
43817       .ks(3)
43818       .a_offset(23)
43819       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43820   }
43821 }
43822 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,zero)43823 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
43824   for (size_t k = 1; k <= 5; k += 2) {
43825     for (uint32_t mz = 0; mz < 4; mz++) {
43826       GemmMicrokernelTester()
43827         .mr(4)
43828         .nr(4)
43829         .kr(1)
43830         .sr(1)
43831         .m(4)
43832         .n(4)
43833         .k(k)
43834         .ks(3)
43835         .a_offset(23)
43836         .zero_index(mz)
43837         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43838     }
43839   }
43840 }
43841 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)43842 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
43843   GemmMicrokernelTester()
43844     .mr(4)
43845     .nr(4)
43846     .kr(1)
43847     .sr(1)
43848     .m(4)
43849     .n(4)
43850     .k(1)
43851     .qmin(128)
43852     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43853 }
43854 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)43855 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
43856   GemmMicrokernelTester()
43857     .mr(4)
43858     .nr(4)
43859     .kr(1)
43860     .sr(1)
43861     .m(4)
43862     .n(4)
43863     .k(1)
43864     .qmax(128)
43865     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43866 }
43867 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)43868 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
43869   GemmMicrokernelTester()
43870     .mr(4)
43871     .nr(4)
43872     .kr(1)
43873     .sr(1)
43874     .m(4)
43875     .n(4)
43876     .k(1)
43877     .cm_stride(7)
43878     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43879 }
43880 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_a_zero_point)43881 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
43882   for (size_t k = 1; k <= 5; k += 2) {
43883     GemmMicrokernelTester()
43884       .mr(4)
43885       .nr(4)
43886       .kr(1)
43887       .sr(1)
43888       .m(4)
43889       .n(4)
43890       .k(k)
43891       .a_zero_point(0)
43892       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43893   }
43894 }
43895 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_b_zero_point)43896 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
43897   for (size_t k = 1; k <= 5; k += 2) {
43898     GemmMicrokernelTester()
43899       .mr(4)
43900       .nr(4)
43901       .kr(1)
43902       .sr(1)
43903       .m(4)
43904       .n(4)
43905       .k(k)
43906       .b_zero_point(0)
43907       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43908   }
43909 }
43910 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,no_zero_point)43911 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
43912   for (size_t k = 1; k <= 5; k += 2) {
43913     GemmMicrokernelTester()
43914       .mr(4)
43915       .nr(4)
43916       .kr(1)
43917       .sr(1)
43918       .m(4)
43919       .n(4)
43920       .k(k)
43921       .a_zero_point(0)
43922       .b_zero_point(0)
43923       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
43924   }
43925 }
43926