xref: /aosp_15_r20/external/XNNPACK/test/qu8-igemm-minmax-fp32-2.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qu8-igemm-minmax-fp32.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)28   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
29     TEST_REQUIRES_ARM_NEON_DOT;
30     GemmMicrokernelTester()
31       .mr(4)
32       .nr(16)
33       .kr(4)
34       .sr(1)
35       .m(4)
36       .n(16)
37       .k(16)
38       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
39   }
40 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)41   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
42     TEST_REQUIRES_ARM_NEON_DOT;
43     GemmMicrokernelTester()
44       .mr(4)
45       .nr(16)
46       .kr(4)
47       .sr(1)
48       .m(4)
49       .n(16)
50       .k(16)
51       .cn_stride(19)
52       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
53   }
54 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)55   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
56     TEST_REQUIRES_ARM_NEON_DOT;
57     for (uint32_t n = 1; n <= 16; n++) {
58       for (uint32_t m = 1; m <= 4; m++) {
59         GemmMicrokernelTester()
60           .mr(4)
61           .nr(16)
62           .kr(4)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(16)
67           .iterations(1)
68           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
69       }
70     }
71   }
72 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)73   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
74     TEST_REQUIRES_ARM_NEON_DOT;
75     for (uint32_t m = 1; m <= 4; m++) {
76       GemmMicrokernelTester()
77         .mr(4)
78         .nr(16)
79         .kr(4)
80         .sr(1)
81         .m(m)
82         .n(16)
83         .k(16)
84         .iterations(1)
85         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
86     }
87   }
88 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)89   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
90     TEST_REQUIRES_ARM_NEON_DOT;
91     for (uint32_t n = 1; n <= 16; n++) {
92       GemmMicrokernelTester()
93         .mr(4)
94         .nr(16)
95         .kr(4)
96         .sr(1)
97         .m(4)
98         .n(n)
99         .k(16)
100         .iterations(1)
101         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
102     }
103   }
104 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)105   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
106     TEST_REQUIRES_ARM_NEON_DOT;
107     for (size_t k = 1; k < 16; k++) {
108       GemmMicrokernelTester()
109         .mr(4)
110         .nr(16)
111         .kr(4)
112         .sr(1)
113         .m(4)
114         .n(16)
115         .k(k)
116         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
117     }
118   }
119 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)120   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
121     TEST_REQUIRES_ARM_NEON_DOT;
122     for (size_t k = 1; k < 16; k++) {
123       for (uint32_t n = 1; n <= 16; n++) {
124         for (uint32_t m = 1; m <= 4; m++) {
125           GemmMicrokernelTester()
126             .mr(4)
127             .nr(16)
128             .kr(4)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
135         }
136       }
137     }
138   }
139 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)140   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
141     TEST_REQUIRES_ARM_NEON_DOT;
142     for (size_t k = 17; k < 32; k++) {
143       GemmMicrokernelTester()
144         .mr(4)
145         .nr(16)
146         .kr(4)
147         .sr(1)
148         .m(4)
149         .n(16)
150         .k(k)
151         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
152     }
153   }
154 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)155   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
156     TEST_REQUIRES_ARM_NEON_DOT;
157     for (size_t k = 17; k < 32; k++) {
158       for (uint32_t n = 1; n <= 16; n++) {
159         for (uint32_t m = 1; m <= 4; m++) {
160           GemmMicrokernelTester()
161             .mr(4)
162             .nr(16)
163             .kr(4)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
170         }
171       }
172     }
173   }
174 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)175   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
176     TEST_REQUIRES_ARM_NEON_DOT;
177     for (size_t k = 32; k <= 160; k += 16) {
178       GemmMicrokernelTester()
179         .mr(4)
180         .nr(16)
181         .kr(4)
182         .sr(1)
183         .m(4)
184         .n(16)
185         .k(k)
186         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
187     }
188   }
189 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)190   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
191     TEST_REQUIRES_ARM_NEON_DOT;
192     for (size_t k = 32; k <= 160; k += 16) {
193       for (uint32_t n = 1; n <= 16; n++) {
194         for (uint32_t m = 1; m <= 4; m++) {
195           GemmMicrokernelTester()
196             .mr(4)
197             .nr(16)
198             .kr(4)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
205         }
206       }
207     }
208   }
209 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)210   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
211     TEST_REQUIRES_ARM_NEON_DOT;
212     for (uint32_t n = 17; n < 32; n++) {
213       for (size_t k = 1; k <= 80; k += 17) {
214         GemmMicrokernelTester()
215           .mr(4)
216           .nr(16)
217           .kr(4)
218           .sr(1)
219           .m(4)
220           .n(n)
221           .k(k)
222           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
223       }
224     }
225   }
226 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)227   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
228     TEST_REQUIRES_ARM_NEON_DOT;
229     for (uint32_t n = 17; n < 32; n++) {
230       for (size_t k = 1; k <= 80; k += 17) {
231         GemmMicrokernelTester()
232           .mr(4)
233           .nr(16)
234           .kr(4)
235           .sr(1)
236           .m(4)
237           .n(n)
238           .k(k)
239           .cn_stride(19)
240           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
241       }
242     }
243   }
244 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)245   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
246     TEST_REQUIRES_ARM_NEON_DOT;
247     for (uint32_t n = 17; n < 32; n++) {
248       for (size_t k = 1; k <= 80; k += 17) {
249         for (uint32_t m = 1; m <= 4; m++) {
250           GemmMicrokernelTester()
251             .mr(4)
252             .nr(16)
253             .kr(4)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
260         }
261       }
262     }
263   }
264 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)265   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
266     TEST_REQUIRES_ARM_NEON_DOT;
267     for (uint32_t n = 32; n <= 48; n += 16) {
268       for (size_t k = 1; k <= 80; k += 17) {
269         GemmMicrokernelTester()
270           .mr(4)
271           .nr(16)
272           .kr(4)
273           .sr(1)
274           .m(4)
275           .n(n)
276           .k(k)
277           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
278       }
279     }
280   }
281 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)282   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
283     TEST_REQUIRES_ARM_NEON_DOT;
284     for (uint32_t n = 32; n <= 48; n += 16) {
285       for (size_t k = 1; k <= 80; k += 17) {
286         GemmMicrokernelTester()
287           .mr(4)
288           .nr(16)
289           .kr(4)
290           .sr(1)
291           .m(4)
292           .n(n)
293           .k(k)
294           .cn_stride(19)
295           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
296       }
297     }
298   }
299 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)300   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
301     TEST_REQUIRES_ARM_NEON_DOT;
302     for (uint32_t n = 32; n <= 48; n += 16) {
303       for (size_t k = 1; k <= 80; k += 17) {
304         for (uint32_t m = 1; m <= 4; m++) {
305           GemmMicrokernelTester()
306             .mr(4)
307             .nr(16)
308             .kr(4)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
315         }
316       }
317     }
318   }
319 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)320   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
321     TEST_REQUIRES_ARM_NEON_DOT;
322     for (size_t k = 1; k <= 80; k += 17) {
323       GemmMicrokernelTester()
324         .mr(4)
325         .nr(16)
326         .kr(4)
327         .sr(1)
328         .m(4)
329         .n(16)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
333     }
334   }
335 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)336   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_NEON_DOT;
338     for (size_t k = 1; k <= 80; k += 17) {
339       for (uint32_t n = 1; n <= 16; n++) {
340         for (uint32_t m = 1; m <= 4; m++) {
341           GemmMicrokernelTester()
342             .mr(4)
343             .nr(16)
344             .kr(4)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
352         }
353       }
354     }
355   }
356 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)357   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
358     TEST_REQUIRES_ARM_NEON_DOT;
359     for (uint32_t n = 17; n < 32; n++) {
360       for (size_t k = 1; k <= 80; k += 17) {
361         GemmMicrokernelTester()
362           .mr(4)
363           .nr(16)
364           .kr(4)
365           .sr(1)
366           .m(4)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
371       }
372     }
373   }
374 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)375   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
376     TEST_REQUIRES_ARM_NEON_DOT;
377     for (uint32_t n = 32; n <= 48; n += 16) {
378       for (size_t k = 1; k <= 80; k += 17) {
379         GemmMicrokernelTester()
380           .mr(4)
381           .nr(16)
382           .kr(4)
383           .sr(1)
384           .m(4)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
389       }
390     }
391   }
392 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)393   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_NEON_DOT;
395     for (size_t k = 1; k <= 80; k += 17) {
396       for (uint32_t n = 1; n <= 16; n++) {
397         for (uint32_t m = 1; m <= 4; m++) {
398           GemmMicrokernelTester()
399             .mr(4)
400             .nr(16)
401             .kr(4)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(19)
407             .iterations(1)
408             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
409         }
410       }
411     }
412   }
413 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)414   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
415     TEST_REQUIRES_ARM_NEON_DOT;
416     for (size_t k = 1; k <= 80; k += 17) {
417       GemmMicrokernelTester()
418         .mr(4)
419         .nr(16)
420         .kr(4)
421         .sr(1)
422         .m(4)
423         .n(16)
424         .k(k)
425         .ks(3)
426         .a_offset(331)
427         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
428     }
429   }
430 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)431   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
432     TEST_REQUIRES_ARM_NEON_DOT;
433     for (size_t k = 1; k <= 80; k += 17) {
434       for (uint32_t mz = 0; mz < 4; mz++) {
435         GemmMicrokernelTester()
436           .mr(4)
437           .nr(16)
438           .kr(4)
439           .sr(1)
440           .m(4)
441           .n(16)
442           .k(k)
443           .ks(3)
444           .a_offset(331)
445           .zero_index(mz)
446           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
447       }
448     }
449   }
450 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)451   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
452     TEST_REQUIRES_ARM_NEON_DOT;
453     GemmMicrokernelTester()
454       .mr(4)
455       .nr(16)
456       .kr(4)
457       .sr(1)
458       .m(4)
459       .n(16)
460       .k(16)
461       .qmin(128)
462       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
463   }
464 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)465   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
466     TEST_REQUIRES_ARM_NEON_DOT;
467     GemmMicrokernelTester()
468       .mr(4)
469       .nr(16)
470       .kr(4)
471       .sr(1)
472       .m(4)
473       .n(16)
474       .k(16)
475       .qmax(128)
476       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
477   }
478 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)479   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
480     TEST_REQUIRES_ARM_NEON_DOT;
481     GemmMicrokernelTester()
482       .mr(4)
483       .nr(16)
484       .kr(4)
485       .sr(1)
486       .m(4)
487       .n(16)
488       .k(16)
489       .cm_stride(19)
490       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
491   }
492 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)493   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
494     TEST_REQUIRES_ARM_NEON_DOT;
495     for (size_t k = 1; k <= 80; k += 17) {
496       GemmMicrokernelTester()
497         .mr(4)
498         .nr(16)
499         .kr(4)
500         .sr(1)
501         .m(4)
502         .n(16)
503         .k(k)
504         .a_zero_point(0)
505         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
506     }
507   }
508 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)509   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
510     TEST_REQUIRES_ARM_NEON_DOT;
511     for (size_t k = 1; k <= 80; k += 17) {
512       GemmMicrokernelTester()
513         .mr(4)
514         .nr(16)
515         .kr(4)
516         .sr(1)
517         .m(4)
518         .n(16)
519         .k(k)
520         .b_zero_point(0)
521         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
522     }
523   }
524 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)525   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
526     TEST_REQUIRES_ARM_NEON_DOT;
527     for (size_t k = 1; k <= 80; k += 17) {
528       GemmMicrokernelTester()
529         .mr(4)
530         .nr(16)
531         .kr(4)
532         .sr(1)
533         .m(4)
534         .n(16)
535         .k(k)
536         .a_zero_point(0)
537         .b_zero_point(0)
538         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
539     }
540   }
541 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
542 
543 
544 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4)545   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4) {
546     TEST_REQUIRES_ARM_SIMD32;
547     GemmMicrokernelTester()
548       .mr(1)
549       .nr(2)
550       .kr(4)
551       .sr(1)
552       .m(1)
553       .n(2)
554       .k(4)
555       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
556   }
557 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cn)558   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cn) {
559     TEST_REQUIRES_ARM_SIMD32;
560     GemmMicrokernelTester()
561       .mr(1)
562       .nr(2)
563       .kr(4)
564       .sr(1)
565       .m(1)
566       .n(2)
567       .k(4)
568       .cn_stride(5)
569       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
570   }
571 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile)572   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile) {
573     TEST_REQUIRES_ARM_SIMD32;
574     for (uint32_t n = 1; n <= 2; n++) {
575       for (uint32_t m = 1; m <= 1; m++) {
576         GemmMicrokernelTester()
577           .mr(1)
578           .nr(2)
579           .kr(4)
580           .sr(1)
581           .m(m)
582           .n(n)
583           .k(4)
584           .iterations(1)
585           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
586       }
587     }
588   }
589 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_m)590   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_m) {
591     TEST_REQUIRES_ARM_SIMD32;
592     for (uint32_t m = 1; m <= 1; m++) {
593       GemmMicrokernelTester()
594         .mr(1)
595         .nr(2)
596         .kr(4)
597         .sr(1)
598         .m(m)
599         .n(2)
600         .k(4)
601         .iterations(1)
602         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
603     }
604   }
605 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_n)606   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_n) {
607     TEST_REQUIRES_ARM_SIMD32;
608     for (uint32_t n = 1; n <= 2; n++) {
609       GemmMicrokernelTester()
610         .mr(1)
611         .nr(2)
612         .kr(4)
613         .sr(1)
614         .m(1)
615         .n(n)
616         .k(4)
617         .iterations(1)
618         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
619     }
620   }
621 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4)622   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4) {
623     TEST_REQUIRES_ARM_SIMD32;
624     for (size_t k = 1; k < 4; k++) {
625       GemmMicrokernelTester()
626         .mr(1)
627         .nr(2)
628         .kr(4)
629         .sr(1)
630         .m(1)
631         .n(2)
632         .k(k)
633         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
634     }
635   }
636 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_subtile)637   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_subtile) {
638     TEST_REQUIRES_ARM_SIMD32;
639     for (size_t k = 1; k < 4; k++) {
640       for (uint32_t n = 1; n <= 2; n++) {
641         for (uint32_t m = 1; m <= 1; m++) {
642           GemmMicrokernelTester()
643             .mr(1)
644             .nr(2)
645             .kr(4)
646             .sr(1)
647             .m(m)
648             .n(n)
649             .k(k)
650             .iterations(1)
651             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
652         }
653       }
654     }
655   }
656 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4)657   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4) {
658     TEST_REQUIRES_ARM_SIMD32;
659     for (size_t k = 5; k < 8; k++) {
660       GemmMicrokernelTester()
661         .mr(1)
662         .nr(2)
663         .kr(4)
664         .sr(1)
665         .m(1)
666         .n(2)
667         .k(k)
668         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
669     }
670   }
671 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_subtile)672   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_subtile) {
673     TEST_REQUIRES_ARM_SIMD32;
674     for (size_t k = 5; k < 8; k++) {
675       for (uint32_t n = 1; n <= 2; n++) {
676         for (uint32_t m = 1; m <= 1; m++) {
677           GemmMicrokernelTester()
678             .mr(1)
679             .nr(2)
680             .kr(4)
681             .sr(1)
682             .m(m)
683             .n(n)
684             .k(k)
685             .iterations(1)
686             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
687         }
688       }
689     }
690   }
691 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4)692   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4) {
693     TEST_REQUIRES_ARM_SIMD32;
694     for (size_t k = 8; k <= 40; k += 4) {
695       GemmMicrokernelTester()
696         .mr(1)
697         .nr(2)
698         .kr(4)
699         .sr(1)
700         .m(1)
701         .n(2)
702         .k(k)
703         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
704     }
705   }
706 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_subtile)707   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_subtile) {
708     TEST_REQUIRES_ARM_SIMD32;
709     for (size_t k = 8; k <= 40; k += 4) {
710       for (uint32_t n = 1; n <= 2; n++) {
711         for (uint32_t m = 1; m <= 1; m++) {
712           GemmMicrokernelTester()
713             .mr(1)
714             .nr(2)
715             .kr(4)
716             .sr(1)
717             .m(m)
718             .n(n)
719             .k(k)
720             .iterations(1)
721             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
722         }
723       }
724     }
725   }
726 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2)727   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2) {
728     TEST_REQUIRES_ARM_SIMD32;
729     for (uint32_t n = 3; n < 4; n++) {
730       for (size_t k = 1; k <= 20; k += 5) {
731         GemmMicrokernelTester()
732           .mr(1)
733           .nr(2)
734           .kr(4)
735           .sr(1)
736           .m(1)
737           .n(n)
738           .k(k)
739           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
740       }
741     }
742   }
743 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_cn)744   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_cn) {
745     TEST_REQUIRES_ARM_SIMD32;
746     for (uint32_t n = 3; n < 4; n++) {
747       for (size_t k = 1; k <= 20; k += 5) {
748         GemmMicrokernelTester()
749           .mr(1)
750           .nr(2)
751           .kr(4)
752           .sr(1)
753           .m(1)
754           .n(n)
755           .k(k)
756           .cn_stride(5)
757           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
758       }
759     }
760   }
761 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_subtile)762   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_subtile) {
763     TEST_REQUIRES_ARM_SIMD32;
764     for (uint32_t n = 3; n < 4; n++) {
765       for (size_t k = 1; k <= 20; k += 5) {
766         for (uint32_t m = 1; m <= 1; m++) {
767           GemmMicrokernelTester()
768             .mr(1)
769             .nr(2)
770             .kr(4)
771             .sr(1)
772             .m(m)
773             .n(n)
774             .k(k)
775             .iterations(1)
776             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
777         }
778       }
779     }
780   }
781 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2)782   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2) {
783     TEST_REQUIRES_ARM_SIMD32;
784     for (uint32_t n = 4; n <= 6; n += 2) {
785       for (size_t k = 1; k <= 20; k += 5) {
786         GemmMicrokernelTester()
787           .mr(1)
788           .nr(2)
789           .kr(4)
790           .sr(1)
791           .m(1)
792           .n(n)
793           .k(k)
794           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
795       }
796     }
797   }
798 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_cn)799   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_cn) {
800     TEST_REQUIRES_ARM_SIMD32;
801     for (uint32_t n = 4; n <= 6; n += 2) {
802       for (size_t k = 1; k <= 20; k += 5) {
803         GemmMicrokernelTester()
804           .mr(1)
805           .nr(2)
806           .kr(4)
807           .sr(1)
808           .m(1)
809           .n(n)
810           .k(k)
811           .cn_stride(5)
812           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
813       }
814     }
815   }
816 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_subtile)817   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_subtile) {
818     TEST_REQUIRES_ARM_SIMD32;
819     for (uint32_t n = 4; n <= 6; n += 2) {
820       for (size_t k = 1; k <= 20; k += 5) {
821         for (uint32_t m = 1; m <= 1; m++) {
822           GemmMicrokernelTester()
823             .mr(1)
824             .nr(2)
825             .kr(4)
826             .sr(1)
827             .m(m)
828             .n(n)
829             .k(k)
830             .iterations(1)
831             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
832         }
833       }
834     }
835   }
836 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel)837   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel) {
838     TEST_REQUIRES_ARM_SIMD32;
839     for (size_t k = 1; k <= 20; k += 5) {
840       GemmMicrokernelTester()
841         .mr(1)
842         .nr(2)
843         .kr(4)
844         .sr(1)
845         .m(1)
846         .n(2)
847         .k(k)
848         .ks(3)
849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
850     }
851   }
852 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel_subtile)853   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel_subtile) {
854     TEST_REQUIRES_ARM_SIMD32;
855     for (size_t k = 1; k <= 20; k += 5) {
856       for (uint32_t n = 1; n <= 2; n++) {
857         for (uint32_t m = 1; m <= 1; m++) {
858           GemmMicrokernelTester()
859             .mr(1)
860             .nr(2)
861             .kr(4)
862             .sr(1)
863             .m(m)
864             .n(n)
865             .k(k)
866             .ks(3)
867             .iterations(1)
868             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
869         }
870       }
871     }
872   }
873 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_small_kernel)874   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_small_kernel) {
875     TEST_REQUIRES_ARM_SIMD32;
876     for (uint32_t n = 3; n < 4; n++) {
877       for (size_t k = 1; k <= 20; k += 5) {
878         GemmMicrokernelTester()
879           .mr(1)
880           .nr(2)
881           .kr(4)
882           .sr(1)
883           .m(1)
884           .n(n)
885           .k(k)
886           .ks(3)
887           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
888       }
889     }
890   }
891 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_small_kernel)892   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_small_kernel) {
893     TEST_REQUIRES_ARM_SIMD32;
894     for (uint32_t n = 4; n <= 6; n += 2) {
895       for (size_t k = 1; k <= 20; k += 5) {
896         GemmMicrokernelTester()
897           .mr(1)
898           .nr(2)
899           .kr(4)
900           .sr(1)
901           .m(1)
902           .n(n)
903           .k(k)
904           .ks(3)
905           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
906       }
907     }
908   }
909 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm_subtile)910   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm_subtile) {
911     TEST_REQUIRES_ARM_SIMD32;
912     for (size_t k = 1; k <= 20; k += 5) {
913       for (uint32_t n = 1; n <= 2; n++) {
914         for (uint32_t m = 1; m <= 1; m++) {
915           GemmMicrokernelTester()
916             .mr(1)
917             .nr(2)
918             .kr(4)
919             .sr(1)
920             .m(m)
921             .n(n)
922             .k(k)
923             .cm_stride(5)
924             .iterations(1)
925             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
926         }
927       }
928     }
929   }
930 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,a_offset)931   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, a_offset) {
932     TEST_REQUIRES_ARM_SIMD32;
933     for (size_t k = 1; k <= 20; k += 5) {
934       GemmMicrokernelTester()
935         .mr(1)
936         .nr(2)
937         .kr(4)
938         .sr(1)
939         .m(1)
940         .n(2)
941         .k(k)
942         .ks(3)
943         .a_offset(23)
944         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
945     }
946   }
947 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,zero)948   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, zero) {
949     TEST_REQUIRES_ARM_SIMD32;
950     for (size_t k = 1; k <= 20; k += 5) {
951       for (uint32_t mz = 0; mz < 1; mz++) {
952         GemmMicrokernelTester()
953           .mr(1)
954           .nr(2)
955           .kr(4)
956           .sr(1)
957           .m(1)
958           .n(2)
959           .k(k)
960           .ks(3)
961           .a_offset(23)
962           .zero_index(mz)
963           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
964       }
965     }
966   }
967 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmin)968   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmin) {
969     TEST_REQUIRES_ARM_SIMD32;
970     GemmMicrokernelTester()
971       .mr(1)
972       .nr(2)
973       .kr(4)
974       .sr(1)
975       .m(1)
976       .n(2)
977       .k(4)
978       .qmin(128)
979       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
980   }
981 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmax)982   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmax) {
983     TEST_REQUIRES_ARM_SIMD32;
984     GemmMicrokernelTester()
985       .mr(1)
986       .nr(2)
987       .kr(4)
988       .sr(1)
989       .m(1)
990       .n(2)
991       .k(4)
992       .qmax(128)
993       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
994   }
995 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm)996   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm) {
997     TEST_REQUIRES_ARM_SIMD32;
998     GemmMicrokernelTester()
999       .mr(1)
1000       .nr(2)
1001       .kr(4)
1002       .sr(1)
1003       .m(1)
1004       .n(2)
1005       .k(4)
1006       .cm_stride(5)
1007       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1008   }
1009 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_a_zero_point)1010   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_a_zero_point) {
1011     TEST_REQUIRES_ARM_SIMD32;
1012     for (size_t k = 1; k <= 20; k += 5) {
1013       GemmMicrokernelTester()
1014         .mr(1)
1015         .nr(2)
1016         .kr(4)
1017         .sr(1)
1018         .m(1)
1019         .n(2)
1020         .k(k)
1021         .a_zero_point(0)
1022         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1023     }
1024   }
1025 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_b_zero_point)1026   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_b_zero_point) {
1027     TEST_REQUIRES_ARM_SIMD32;
1028     for (size_t k = 1; k <= 20; k += 5) {
1029       GemmMicrokernelTester()
1030         .mr(1)
1031         .nr(2)
1032         .kr(4)
1033         .sr(1)
1034         .m(1)
1035         .n(2)
1036         .k(k)
1037         .b_zero_point(0)
1038         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1039     }
1040   }
1041 
TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,no_zero_point)1042   TEST(QU8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, no_zero_point) {
1043     TEST_REQUIRES_ARM_SIMD32;
1044     for (size_t k = 1; k <= 20; k += 5) {
1045       GemmMicrokernelTester()
1046         .mr(1)
1047         .nr(2)
1048         .kr(4)
1049         .sr(1)
1050         .m(1)
1051         .n(2)
1052         .k(k)
1053         .a_zero_point(0)
1054         .b_zero_point(0)
1055         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1056     }
1057   }
1058 #endif  // XNN_ARCH_ARM
1059 
1060 
1061 #if XNN_ARCH_ARM
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4)1062   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4) {
1063     TEST_REQUIRES_ARM_SIMD32;
1064     GemmMicrokernelTester()
1065       .mr(2)
1066       .nr(2)
1067       .kr(4)
1068       .sr(1)
1069       .m(2)
1070       .n(2)
1071       .k(4)
1072       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1073   }
1074 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cn)1075   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cn) {
1076     TEST_REQUIRES_ARM_SIMD32;
1077     GemmMicrokernelTester()
1078       .mr(2)
1079       .nr(2)
1080       .kr(4)
1081       .sr(1)
1082       .m(2)
1083       .n(2)
1084       .k(4)
1085       .cn_stride(5)
1086       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1087   }
1088 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile)1089   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile) {
1090     TEST_REQUIRES_ARM_SIMD32;
1091     for (uint32_t n = 1; n <= 2; n++) {
1092       for (uint32_t m = 1; m <= 2; m++) {
1093         GemmMicrokernelTester()
1094           .mr(2)
1095           .nr(2)
1096           .kr(4)
1097           .sr(1)
1098           .m(m)
1099           .n(n)
1100           .k(4)
1101           .iterations(1)
1102           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1103       }
1104     }
1105   }
1106 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_m)1107   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_m) {
1108     TEST_REQUIRES_ARM_SIMD32;
1109     for (uint32_t m = 1; m <= 2; m++) {
1110       GemmMicrokernelTester()
1111         .mr(2)
1112         .nr(2)
1113         .kr(4)
1114         .sr(1)
1115         .m(m)
1116         .n(2)
1117         .k(4)
1118         .iterations(1)
1119         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1120     }
1121   }
1122 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_n)1123   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_n) {
1124     TEST_REQUIRES_ARM_SIMD32;
1125     for (uint32_t n = 1; n <= 2; n++) {
1126       GemmMicrokernelTester()
1127         .mr(2)
1128         .nr(2)
1129         .kr(4)
1130         .sr(1)
1131         .m(2)
1132         .n(n)
1133         .k(4)
1134         .iterations(1)
1135         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1136     }
1137   }
1138 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4)1139   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4) {
1140     TEST_REQUIRES_ARM_SIMD32;
1141     for (size_t k = 1; k < 4; k++) {
1142       GemmMicrokernelTester()
1143         .mr(2)
1144         .nr(2)
1145         .kr(4)
1146         .sr(1)
1147         .m(2)
1148         .n(2)
1149         .k(k)
1150         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1151     }
1152   }
1153 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_subtile)1154   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_subtile) {
1155     TEST_REQUIRES_ARM_SIMD32;
1156     for (size_t k = 1; k < 4; k++) {
1157       for (uint32_t n = 1; n <= 2; n++) {
1158         for (uint32_t m = 1; m <= 2; m++) {
1159           GemmMicrokernelTester()
1160             .mr(2)
1161             .nr(2)
1162             .kr(4)
1163             .sr(1)
1164             .m(m)
1165             .n(n)
1166             .k(k)
1167             .iterations(1)
1168             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1169         }
1170       }
1171     }
1172   }
1173 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4)1174   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4) {
1175     TEST_REQUIRES_ARM_SIMD32;
1176     for (size_t k = 5; k < 8; k++) {
1177       GemmMicrokernelTester()
1178         .mr(2)
1179         .nr(2)
1180         .kr(4)
1181         .sr(1)
1182         .m(2)
1183         .n(2)
1184         .k(k)
1185         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1186     }
1187   }
1188 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_subtile)1189   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_subtile) {
1190     TEST_REQUIRES_ARM_SIMD32;
1191     for (size_t k = 5; k < 8; k++) {
1192       for (uint32_t n = 1; n <= 2; n++) {
1193         for (uint32_t m = 1; m <= 2; m++) {
1194           GemmMicrokernelTester()
1195             .mr(2)
1196             .nr(2)
1197             .kr(4)
1198             .sr(1)
1199             .m(m)
1200             .n(n)
1201             .k(k)
1202             .iterations(1)
1203             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1204         }
1205       }
1206     }
1207   }
1208 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4)1209   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4) {
1210     TEST_REQUIRES_ARM_SIMD32;
1211     for (size_t k = 8; k <= 40; k += 4) {
1212       GemmMicrokernelTester()
1213         .mr(2)
1214         .nr(2)
1215         .kr(4)
1216         .sr(1)
1217         .m(2)
1218         .n(2)
1219         .k(k)
1220         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1221     }
1222   }
1223 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_subtile)1224   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_subtile) {
1225     TEST_REQUIRES_ARM_SIMD32;
1226     for (size_t k = 8; k <= 40; k += 4) {
1227       for (uint32_t n = 1; n <= 2; n++) {
1228         for (uint32_t m = 1; m <= 2; m++) {
1229           GemmMicrokernelTester()
1230             .mr(2)
1231             .nr(2)
1232             .kr(4)
1233             .sr(1)
1234             .m(m)
1235             .n(n)
1236             .k(k)
1237             .iterations(1)
1238             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1239         }
1240       }
1241     }
1242   }
1243 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2)1244   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2) {
1245     TEST_REQUIRES_ARM_SIMD32;
1246     for (uint32_t n = 3; n < 4; n++) {
1247       for (size_t k = 1; k <= 20; k += 5) {
1248         GemmMicrokernelTester()
1249           .mr(2)
1250           .nr(2)
1251           .kr(4)
1252           .sr(1)
1253           .m(2)
1254           .n(n)
1255           .k(k)
1256           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1257       }
1258     }
1259   }
1260 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_cn)1261   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_cn) {
1262     TEST_REQUIRES_ARM_SIMD32;
1263     for (uint32_t n = 3; n < 4; n++) {
1264       for (size_t k = 1; k <= 20; k += 5) {
1265         GemmMicrokernelTester()
1266           .mr(2)
1267           .nr(2)
1268           .kr(4)
1269           .sr(1)
1270           .m(2)
1271           .n(n)
1272           .k(k)
1273           .cn_stride(5)
1274           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1275       }
1276     }
1277   }
1278 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_subtile)1279   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_subtile) {
1280     TEST_REQUIRES_ARM_SIMD32;
1281     for (uint32_t n = 3; n < 4; n++) {
1282       for (size_t k = 1; k <= 20; k += 5) {
1283         for (uint32_t m = 1; m <= 2; m++) {
1284           GemmMicrokernelTester()
1285             .mr(2)
1286             .nr(2)
1287             .kr(4)
1288             .sr(1)
1289             .m(m)
1290             .n(n)
1291             .k(k)
1292             .iterations(1)
1293             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1294         }
1295       }
1296     }
1297   }
1298 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2)1299   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2) {
1300     TEST_REQUIRES_ARM_SIMD32;
1301     for (uint32_t n = 4; n <= 6; n += 2) {
1302       for (size_t k = 1; k <= 20; k += 5) {
1303         GemmMicrokernelTester()
1304           .mr(2)
1305           .nr(2)
1306           .kr(4)
1307           .sr(1)
1308           .m(2)
1309           .n(n)
1310           .k(k)
1311           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1312       }
1313     }
1314   }
1315 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_cn)1316   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_cn) {
1317     TEST_REQUIRES_ARM_SIMD32;
1318     for (uint32_t n = 4; n <= 6; n += 2) {
1319       for (size_t k = 1; k <= 20; k += 5) {
1320         GemmMicrokernelTester()
1321           .mr(2)
1322           .nr(2)
1323           .kr(4)
1324           .sr(1)
1325           .m(2)
1326           .n(n)
1327           .k(k)
1328           .cn_stride(5)
1329           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1330       }
1331     }
1332   }
1333 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_subtile)1334   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_subtile) {
1335     TEST_REQUIRES_ARM_SIMD32;
1336     for (uint32_t n = 4; n <= 6; n += 2) {
1337       for (size_t k = 1; k <= 20; k += 5) {
1338         for (uint32_t m = 1; m <= 2; m++) {
1339           GemmMicrokernelTester()
1340             .mr(2)
1341             .nr(2)
1342             .kr(4)
1343             .sr(1)
1344             .m(m)
1345             .n(n)
1346             .k(k)
1347             .iterations(1)
1348             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1349         }
1350       }
1351     }
1352   }
1353 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel)1354   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel) {
1355     TEST_REQUIRES_ARM_SIMD32;
1356     for (size_t k = 1; k <= 20; k += 5) {
1357       GemmMicrokernelTester()
1358         .mr(2)
1359         .nr(2)
1360         .kr(4)
1361         .sr(1)
1362         .m(2)
1363         .n(2)
1364         .k(k)
1365         .ks(3)
1366         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1367     }
1368   }
1369 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel_subtile)1370   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel_subtile) {
1371     TEST_REQUIRES_ARM_SIMD32;
1372     for (size_t k = 1; k <= 20; k += 5) {
1373       for (uint32_t n = 1; n <= 2; n++) {
1374         for (uint32_t m = 1; m <= 2; m++) {
1375           GemmMicrokernelTester()
1376             .mr(2)
1377             .nr(2)
1378             .kr(4)
1379             .sr(1)
1380             .m(m)
1381             .n(n)
1382             .k(k)
1383             .ks(3)
1384             .iterations(1)
1385             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1386         }
1387       }
1388     }
1389   }
1390 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_small_kernel)1391   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_small_kernel) {
1392     TEST_REQUIRES_ARM_SIMD32;
1393     for (uint32_t n = 3; n < 4; n++) {
1394       for (size_t k = 1; k <= 20; k += 5) {
1395         GemmMicrokernelTester()
1396           .mr(2)
1397           .nr(2)
1398           .kr(4)
1399           .sr(1)
1400           .m(2)
1401           .n(n)
1402           .k(k)
1403           .ks(3)
1404           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1405       }
1406     }
1407   }
1408 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_small_kernel)1409   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_small_kernel) {
1410     TEST_REQUIRES_ARM_SIMD32;
1411     for (uint32_t n = 4; n <= 6; n += 2) {
1412       for (size_t k = 1; k <= 20; k += 5) {
1413         GemmMicrokernelTester()
1414           .mr(2)
1415           .nr(2)
1416           .kr(4)
1417           .sr(1)
1418           .m(2)
1419           .n(n)
1420           .k(k)
1421           .ks(3)
1422           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1423       }
1424     }
1425   }
1426 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm_subtile)1427   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm_subtile) {
1428     TEST_REQUIRES_ARM_SIMD32;
1429     for (size_t k = 1; k <= 20; k += 5) {
1430       for (uint32_t n = 1; n <= 2; n++) {
1431         for (uint32_t m = 1; m <= 2; m++) {
1432           GemmMicrokernelTester()
1433             .mr(2)
1434             .nr(2)
1435             .kr(4)
1436             .sr(1)
1437             .m(m)
1438             .n(n)
1439             .k(k)
1440             .cm_stride(5)
1441             .iterations(1)
1442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1443         }
1444       }
1445     }
1446   }
1447 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,a_offset)1448   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, a_offset) {
1449     TEST_REQUIRES_ARM_SIMD32;
1450     for (size_t k = 1; k <= 20; k += 5) {
1451       GemmMicrokernelTester()
1452         .mr(2)
1453         .nr(2)
1454         .kr(4)
1455         .sr(1)
1456         .m(2)
1457         .n(2)
1458         .k(k)
1459         .ks(3)
1460         .a_offset(43)
1461         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1462     }
1463   }
1464 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,zero)1465   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, zero) {
1466     TEST_REQUIRES_ARM_SIMD32;
1467     for (size_t k = 1; k <= 20; k += 5) {
1468       for (uint32_t mz = 0; mz < 2; mz++) {
1469         GemmMicrokernelTester()
1470           .mr(2)
1471           .nr(2)
1472           .kr(4)
1473           .sr(1)
1474           .m(2)
1475           .n(2)
1476           .k(k)
1477           .ks(3)
1478           .a_offset(43)
1479           .zero_index(mz)
1480           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1481       }
1482     }
1483   }
1484 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmin)1485   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmin) {
1486     TEST_REQUIRES_ARM_SIMD32;
1487     GemmMicrokernelTester()
1488       .mr(2)
1489       .nr(2)
1490       .kr(4)
1491       .sr(1)
1492       .m(2)
1493       .n(2)
1494       .k(4)
1495       .qmin(128)
1496       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1497   }
1498 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmax)1499   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmax) {
1500     TEST_REQUIRES_ARM_SIMD32;
1501     GemmMicrokernelTester()
1502       .mr(2)
1503       .nr(2)
1504       .kr(4)
1505       .sr(1)
1506       .m(2)
1507       .n(2)
1508       .k(4)
1509       .qmax(128)
1510       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1511   }
1512 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm)1513   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm) {
1514     TEST_REQUIRES_ARM_SIMD32;
1515     GemmMicrokernelTester()
1516       .mr(2)
1517       .nr(2)
1518       .kr(4)
1519       .sr(1)
1520       .m(2)
1521       .n(2)
1522       .k(4)
1523       .cm_stride(5)
1524       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1525   }
1526 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_a_zero_point)1527   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_a_zero_point) {
1528     TEST_REQUIRES_ARM_SIMD32;
1529     for (size_t k = 1; k <= 20; k += 5) {
1530       GemmMicrokernelTester()
1531         .mr(2)
1532         .nr(2)
1533         .kr(4)
1534         .sr(1)
1535         .m(2)
1536         .n(2)
1537         .k(k)
1538         .a_zero_point(0)
1539         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1540     }
1541   }
1542 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_b_zero_point)1543   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_b_zero_point) {
1544     TEST_REQUIRES_ARM_SIMD32;
1545     for (size_t k = 1; k <= 20; k += 5) {
1546       GemmMicrokernelTester()
1547         .mr(2)
1548         .nr(2)
1549         .kr(4)
1550         .sr(1)
1551         .m(2)
1552         .n(2)
1553         .k(k)
1554         .b_zero_point(0)
1555         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1556     }
1557   }
1558 
TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,no_zero_point)1559   TEST(QU8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, no_zero_point) {
1560     TEST_REQUIRES_ARM_SIMD32;
1561     for (size_t k = 1; k <= 20; k += 5) {
1562       GemmMicrokernelTester()
1563         .mr(2)
1564         .nr(2)
1565         .kr(4)
1566         .sr(1)
1567         .m(2)
1568         .n(2)
1569         .k(k)
1570         .a_zero_point(0)
1571         .b_zero_point(0)
1572         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qu8_conv_minmax_fp32_armsimd32_params, xnn_qu8_requantize_fp32);
1573     }
1574   }
1575 #endif  // XNN_ARCH_ARM
1576 
1577 
1578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8)1579   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8) {
1580     TEST_REQUIRES_ARM_NEON;
1581     GemmMicrokernelTester()
1582       .mr(1)
1583       .nr(8)
1584       .kr(1)
1585       .sr(1)
1586       .m(1)
1587       .n(8)
1588       .k(8)
1589       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1590   }
1591 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cn)1592   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cn) {
1593     TEST_REQUIRES_ARM_NEON;
1594     GemmMicrokernelTester()
1595       .mr(1)
1596       .nr(8)
1597       .kr(1)
1598       .sr(1)
1599       .m(1)
1600       .n(8)
1601       .k(8)
1602       .cn_stride(11)
1603       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1604   }
1605 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile)1606   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1607     TEST_REQUIRES_ARM_NEON;
1608     for (uint32_t n = 1; n <= 8; n++) {
1609       for (uint32_t m = 1; m <= 1; m++) {
1610         GemmMicrokernelTester()
1611           .mr(1)
1612           .nr(8)
1613           .kr(1)
1614           .sr(1)
1615           .m(m)
1616           .n(n)
1617           .k(8)
1618           .iterations(1)
1619           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1620       }
1621     }
1622   }
1623 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1624   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1625     TEST_REQUIRES_ARM_NEON;
1626     for (uint32_t m = 1; m <= 1; m++) {
1627       GemmMicrokernelTester()
1628         .mr(1)
1629         .nr(8)
1630         .kr(1)
1631         .sr(1)
1632         .m(m)
1633         .n(8)
1634         .k(8)
1635         .iterations(1)
1636         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1637     }
1638   }
1639 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1640   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1641     TEST_REQUIRES_ARM_NEON;
1642     for (uint32_t n = 1; n <= 8; n++) {
1643       GemmMicrokernelTester()
1644         .mr(1)
1645         .nr(8)
1646         .kr(1)
1647         .sr(1)
1648         .m(1)
1649         .n(n)
1650         .k(8)
1651         .iterations(1)
1652         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1653     }
1654   }
1655 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8)1656   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8) {
1657     TEST_REQUIRES_ARM_NEON;
1658     for (size_t k = 1; k < 8; k++) {
1659       GemmMicrokernelTester()
1660         .mr(1)
1661         .nr(8)
1662         .kr(1)
1663         .sr(1)
1664         .m(1)
1665         .n(8)
1666         .k(k)
1667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1668     }
1669   }
1670 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8_subtile)1671   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1672     TEST_REQUIRES_ARM_NEON;
1673     for (size_t k = 1; k < 8; k++) {
1674       for (uint32_t n = 1; n <= 8; n++) {
1675         for (uint32_t m = 1; m <= 1; m++) {
1676           GemmMicrokernelTester()
1677             .mr(1)
1678             .nr(8)
1679             .kr(1)
1680             .sr(1)
1681             .m(m)
1682             .n(n)
1683             .k(k)
1684             .iterations(1)
1685             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1686         }
1687       }
1688     }
1689   }
1690 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8)1691   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8) {
1692     TEST_REQUIRES_ARM_NEON;
1693     for (size_t k = 9; k < 16; k++) {
1694       GemmMicrokernelTester()
1695         .mr(1)
1696         .nr(8)
1697         .kr(1)
1698         .sr(1)
1699         .m(1)
1700         .n(8)
1701         .k(k)
1702         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1703     }
1704   }
1705 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8_subtile)1706   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1707     TEST_REQUIRES_ARM_NEON;
1708     for (size_t k = 9; k < 16; k++) {
1709       for (uint32_t n = 1; n <= 8; n++) {
1710         for (uint32_t m = 1; m <= 1; m++) {
1711           GemmMicrokernelTester()
1712             .mr(1)
1713             .nr(8)
1714             .kr(1)
1715             .sr(1)
1716             .m(m)
1717             .n(n)
1718             .k(k)
1719             .iterations(1)
1720             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1721         }
1722       }
1723     }
1724   }
1725 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8)1726   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8) {
1727     TEST_REQUIRES_ARM_NEON;
1728     for (size_t k = 16; k <= 80; k += 8) {
1729       GemmMicrokernelTester()
1730         .mr(1)
1731         .nr(8)
1732         .kr(1)
1733         .sr(1)
1734         .m(1)
1735         .n(8)
1736         .k(k)
1737         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1738     }
1739   }
1740 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8_subtile)1741   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
1742     TEST_REQUIRES_ARM_NEON;
1743     for (size_t k = 16; k <= 80; k += 8) {
1744       for (uint32_t n = 1; n <= 8; n++) {
1745         for (uint32_t m = 1; m <= 1; m++) {
1746           GemmMicrokernelTester()
1747             .mr(1)
1748             .nr(8)
1749             .kr(1)
1750             .sr(1)
1751             .m(m)
1752             .n(n)
1753             .k(k)
1754             .iterations(1)
1755             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8)1761   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8) {
1762     TEST_REQUIRES_ARM_NEON;
1763     for (uint32_t n = 9; n < 16; n++) {
1764       for (size_t k = 1; k <= 40; k += 9) {
1765         GemmMicrokernelTester()
1766           .mr(1)
1767           .nr(8)
1768           .kr(1)
1769           .sr(1)
1770           .m(1)
1771           .n(n)
1772           .k(k)
1773           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1774       }
1775     }
1776   }
1777 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1778   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1779     TEST_REQUIRES_ARM_NEON;
1780     for (uint32_t n = 9; n < 16; n++) {
1781       for (size_t k = 1; k <= 40; k += 9) {
1782         GemmMicrokernelTester()
1783           .mr(1)
1784           .nr(8)
1785           .kr(1)
1786           .sr(1)
1787           .m(1)
1788           .n(n)
1789           .k(k)
1790           .cn_stride(11)
1791           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1792       }
1793     }
1794   }
1795 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_subtile)1796   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1797     TEST_REQUIRES_ARM_NEON;
1798     for (uint32_t n = 9; n < 16; n++) {
1799       for (size_t k = 1; k <= 40; k += 9) {
1800         for (uint32_t m = 1; m <= 1; m++) {
1801           GemmMicrokernelTester()
1802             .mr(1)
1803             .nr(8)
1804             .kr(1)
1805             .sr(1)
1806             .m(m)
1807             .n(n)
1808             .k(k)
1809             .iterations(1)
1810             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1811         }
1812       }
1813     }
1814   }
1815 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8)1816   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8) {
1817     TEST_REQUIRES_ARM_NEON;
1818     for (uint32_t n = 16; n <= 24; n += 8) {
1819       for (size_t k = 1; k <= 40; k += 9) {
1820         GemmMicrokernelTester()
1821           .mr(1)
1822           .nr(8)
1823           .kr(1)
1824           .sr(1)
1825           .m(1)
1826           .n(n)
1827           .k(k)
1828           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1829       }
1830     }
1831   }
1832 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)1833   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1834     TEST_REQUIRES_ARM_NEON;
1835     for (uint32_t n = 16; n <= 24; n += 8) {
1836       for (size_t k = 1; k <= 40; k += 9) {
1837         GemmMicrokernelTester()
1838           .mr(1)
1839           .nr(8)
1840           .kr(1)
1841           .sr(1)
1842           .m(1)
1843           .n(n)
1844           .k(k)
1845           .cn_stride(11)
1846           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1847       }
1848     }
1849   }
1850 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_subtile)1851   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
1852     TEST_REQUIRES_ARM_NEON;
1853     for (uint32_t n = 16; n <= 24; n += 8) {
1854       for (size_t k = 1; k <= 40; k += 9) {
1855         for (uint32_t m = 1; m <= 1; m++) {
1856           GemmMicrokernelTester()
1857             .mr(1)
1858             .nr(8)
1859             .kr(1)
1860             .sr(1)
1861             .m(m)
1862             .n(n)
1863             .k(k)
1864             .iterations(1)
1865             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1866         }
1867       }
1868     }
1869   }
1870 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel)1871   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel) {
1872     TEST_REQUIRES_ARM_NEON;
1873     for (size_t k = 1; k <= 40; k += 9) {
1874       GemmMicrokernelTester()
1875         .mr(1)
1876         .nr(8)
1877         .kr(1)
1878         .sr(1)
1879         .m(1)
1880         .n(8)
1881         .k(k)
1882         .ks(3)
1883         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1884     }
1885   }
1886 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel_subtile)1887   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
1888     TEST_REQUIRES_ARM_NEON;
1889     for (size_t k = 1; k <= 40; k += 9) {
1890       for (uint32_t n = 1; n <= 8; n++) {
1891         for (uint32_t m = 1; m <= 1; m++) {
1892           GemmMicrokernelTester()
1893             .mr(1)
1894             .nr(8)
1895             .kr(1)
1896             .sr(1)
1897             .m(m)
1898             .n(n)
1899             .k(k)
1900             .ks(3)
1901             .iterations(1)
1902             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1903         }
1904       }
1905     }
1906   }
1907 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1908   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1909     TEST_REQUIRES_ARM_NEON;
1910     for (uint32_t n = 9; n < 16; n++) {
1911       for (size_t k = 1; k <= 40; k += 9) {
1912         GemmMicrokernelTester()
1913           .mr(1)
1914           .nr(8)
1915           .kr(1)
1916           .sr(1)
1917           .m(1)
1918           .n(n)
1919           .k(k)
1920           .ks(3)
1921           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1922       }
1923     }
1924   }
1925 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)1926   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1927     TEST_REQUIRES_ARM_NEON;
1928     for (uint32_t n = 16; n <= 24; n += 8) {
1929       for (size_t k = 1; k <= 40; k += 9) {
1930         GemmMicrokernelTester()
1931           .mr(1)
1932           .nr(8)
1933           .kr(1)
1934           .sr(1)
1935           .m(1)
1936           .n(n)
1937           .k(k)
1938           .ks(3)
1939           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1940       }
1941     }
1942   }
1943 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm_subtile)1944   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
1945     TEST_REQUIRES_ARM_NEON;
1946     for (size_t k = 1; k <= 40; k += 9) {
1947       for (uint32_t n = 1; n <= 8; n++) {
1948         for (uint32_t m = 1; m <= 1; m++) {
1949           GemmMicrokernelTester()
1950             .mr(1)
1951             .nr(8)
1952             .kr(1)
1953             .sr(1)
1954             .m(m)
1955             .n(n)
1956             .k(k)
1957             .cm_stride(11)
1958             .iterations(1)
1959             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1960         }
1961       }
1962     }
1963   }
1964 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,a_offset)1965   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, a_offset) {
1966     TEST_REQUIRES_ARM_NEON;
1967     for (size_t k = 1; k <= 40; k += 9) {
1968       GemmMicrokernelTester()
1969         .mr(1)
1970         .nr(8)
1971         .kr(1)
1972         .sr(1)
1973         .m(1)
1974         .n(8)
1975         .k(k)
1976         .ks(3)
1977         .a_offset(43)
1978         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1979     }
1980   }
1981 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,zero)1982   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, zero) {
1983     TEST_REQUIRES_ARM_NEON;
1984     for (size_t k = 1; k <= 40; k += 9) {
1985       for (uint32_t mz = 0; mz < 1; mz++) {
1986         GemmMicrokernelTester()
1987           .mr(1)
1988           .nr(8)
1989           .kr(1)
1990           .sr(1)
1991           .m(1)
1992           .n(8)
1993           .k(k)
1994           .ks(3)
1995           .a_offset(43)
1996           .zero_index(mz)
1997           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1998       }
1999     }
2000   }
2001 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmin)2002   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmin) {
2003     TEST_REQUIRES_ARM_NEON;
2004     GemmMicrokernelTester()
2005       .mr(1)
2006       .nr(8)
2007       .kr(1)
2008       .sr(1)
2009       .m(1)
2010       .n(8)
2011       .k(8)
2012       .qmin(128)
2013       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2014   }
2015 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmax)2016   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmax) {
2017     TEST_REQUIRES_ARM_NEON;
2018     GemmMicrokernelTester()
2019       .mr(1)
2020       .nr(8)
2021       .kr(1)
2022       .sr(1)
2023       .m(1)
2024       .n(8)
2025       .k(8)
2026       .qmax(128)
2027       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2028   }
2029 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm)2030   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm) {
2031     TEST_REQUIRES_ARM_NEON;
2032     GemmMicrokernelTester()
2033       .mr(1)
2034       .nr(8)
2035       .kr(1)
2036       .sr(1)
2037       .m(1)
2038       .n(8)
2039       .k(8)
2040       .cm_stride(11)
2041       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2042   }
2043 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_a_zero_point)2044   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_a_zero_point) {
2045     TEST_REQUIRES_ARM_NEON;
2046     for (size_t k = 1; k <= 40; k += 9) {
2047       GemmMicrokernelTester()
2048         .mr(1)
2049         .nr(8)
2050         .kr(1)
2051         .sr(1)
2052         .m(1)
2053         .n(8)
2054         .k(k)
2055         .a_zero_point(0)
2056         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2057     }
2058   }
2059 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_b_zero_point)2060   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_b_zero_point) {
2061     TEST_REQUIRES_ARM_NEON;
2062     for (size_t k = 1; k <= 40; k += 9) {
2063       GemmMicrokernelTester()
2064         .mr(1)
2065         .nr(8)
2066         .kr(1)
2067         .sr(1)
2068         .m(1)
2069         .n(8)
2070         .k(k)
2071         .b_zero_point(0)
2072         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2073     }
2074   }
2075 
TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,no_zero_point)2076   TEST(QU8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, no_zero_point) {
2077     TEST_REQUIRES_ARM_NEON;
2078     for (size_t k = 1; k <= 40; k += 9) {
2079       GemmMicrokernelTester()
2080         .mr(1)
2081         .nr(8)
2082         .kr(1)
2083         .sr(1)
2084         .m(1)
2085         .n(8)
2086         .k(k)
2087         .a_zero_point(0)
2088         .b_zero_point(0)
2089         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2090     }
2091   }
2092 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2093 
2094 
2095 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)2096   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
2097     TEST_REQUIRES_ARM_NEON;
2098     GemmMicrokernelTester()
2099       .mr(1)
2100       .nr(16)
2101       .kr(1)
2102       .sr(1)
2103       .m(1)
2104       .n(16)
2105       .k(8)
2106       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2107   }
2108 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)2109   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
2110     TEST_REQUIRES_ARM_NEON;
2111     GemmMicrokernelTester()
2112       .mr(1)
2113       .nr(16)
2114       .kr(1)
2115       .sr(1)
2116       .m(1)
2117       .n(16)
2118       .k(8)
2119       .cn_stride(19)
2120       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2121   }
2122 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)2123   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2124     TEST_REQUIRES_ARM_NEON;
2125     for (uint32_t n = 1; n <= 16; n++) {
2126       for (uint32_t m = 1; m <= 1; m++) {
2127         GemmMicrokernelTester()
2128           .mr(1)
2129           .nr(16)
2130           .kr(1)
2131           .sr(1)
2132           .m(m)
2133           .n(n)
2134           .k(8)
2135           .iterations(1)
2136           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2137       }
2138     }
2139   }
2140 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2141   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2142     TEST_REQUIRES_ARM_NEON;
2143     for (uint32_t m = 1; m <= 1; m++) {
2144       GemmMicrokernelTester()
2145         .mr(1)
2146         .nr(16)
2147         .kr(1)
2148         .sr(1)
2149         .m(m)
2150         .n(16)
2151         .k(8)
2152         .iterations(1)
2153         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2154     }
2155   }
2156 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2157   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2158     TEST_REQUIRES_ARM_NEON;
2159     for (uint32_t n = 1; n <= 16; n++) {
2160       GemmMicrokernelTester()
2161         .mr(1)
2162         .nr(16)
2163         .kr(1)
2164         .sr(1)
2165         .m(1)
2166         .n(n)
2167         .k(8)
2168         .iterations(1)
2169         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2170     }
2171   }
2172 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)2173   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
2174     TEST_REQUIRES_ARM_NEON;
2175     for (size_t k = 1; k < 8; k++) {
2176       GemmMicrokernelTester()
2177         .mr(1)
2178         .nr(16)
2179         .kr(1)
2180         .sr(1)
2181         .m(1)
2182         .n(16)
2183         .k(k)
2184         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2185     }
2186   }
2187 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)2188   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2189     TEST_REQUIRES_ARM_NEON;
2190     for (size_t k = 1; k < 8; k++) {
2191       for (uint32_t n = 1; n <= 16; n++) {
2192         for (uint32_t m = 1; m <= 1; m++) {
2193           GemmMicrokernelTester()
2194             .mr(1)
2195             .nr(16)
2196             .kr(1)
2197             .sr(1)
2198             .m(m)
2199             .n(n)
2200             .k(k)
2201             .iterations(1)
2202             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2203         }
2204       }
2205     }
2206   }
2207 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)2208   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
2209     TEST_REQUIRES_ARM_NEON;
2210     for (size_t k = 9; k < 16; k++) {
2211       GemmMicrokernelTester()
2212         .mr(1)
2213         .nr(16)
2214         .kr(1)
2215         .sr(1)
2216         .m(1)
2217         .n(16)
2218         .k(k)
2219         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2220     }
2221   }
2222 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)2223   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2224     TEST_REQUIRES_ARM_NEON;
2225     for (size_t k = 9; k < 16; k++) {
2226       for (uint32_t n = 1; n <= 16; n++) {
2227         for (uint32_t m = 1; m <= 1; m++) {
2228           GemmMicrokernelTester()
2229             .mr(1)
2230             .nr(16)
2231             .kr(1)
2232             .sr(1)
2233             .m(m)
2234             .n(n)
2235             .k(k)
2236             .iterations(1)
2237             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2238         }
2239       }
2240     }
2241   }
2242 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)2243   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
2244     TEST_REQUIRES_ARM_NEON;
2245     for (size_t k = 16; k <= 80; k += 8) {
2246       GemmMicrokernelTester()
2247         .mr(1)
2248         .nr(16)
2249         .kr(1)
2250         .sr(1)
2251         .m(1)
2252         .n(16)
2253         .k(k)
2254         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2255     }
2256   }
2257 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)2258   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
2259     TEST_REQUIRES_ARM_NEON;
2260     for (size_t k = 16; k <= 80; k += 8) {
2261       for (uint32_t n = 1; n <= 16; n++) {
2262         for (uint32_t m = 1; m <= 1; m++) {
2263           GemmMicrokernelTester()
2264             .mr(1)
2265             .nr(16)
2266             .kr(1)
2267             .sr(1)
2268             .m(m)
2269             .n(n)
2270             .k(k)
2271             .iterations(1)
2272             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2273         }
2274       }
2275     }
2276   }
2277 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)2278   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
2279     TEST_REQUIRES_ARM_NEON;
2280     for (uint32_t n = 17; n < 32; n++) {
2281       for (size_t k = 1; k <= 40; k += 9) {
2282         GemmMicrokernelTester()
2283           .mr(1)
2284           .nr(16)
2285           .kr(1)
2286           .sr(1)
2287           .m(1)
2288           .n(n)
2289           .k(k)
2290           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2291       }
2292     }
2293   }
2294 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2295   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2296     TEST_REQUIRES_ARM_NEON;
2297     for (uint32_t n = 17; n < 32; n++) {
2298       for (size_t k = 1; k <= 40; k += 9) {
2299         GemmMicrokernelTester()
2300           .mr(1)
2301           .nr(16)
2302           .kr(1)
2303           .sr(1)
2304           .m(1)
2305           .n(n)
2306           .k(k)
2307           .cn_stride(19)
2308           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2309       }
2310     }
2311   }
2312 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)2313   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2314     TEST_REQUIRES_ARM_NEON;
2315     for (uint32_t n = 17; n < 32; n++) {
2316       for (size_t k = 1; k <= 40; k += 9) {
2317         for (uint32_t m = 1; m <= 1; m++) {
2318           GemmMicrokernelTester()
2319             .mr(1)
2320             .nr(16)
2321             .kr(1)
2322             .sr(1)
2323             .m(m)
2324             .n(n)
2325             .k(k)
2326             .iterations(1)
2327             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2328         }
2329       }
2330     }
2331   }
2332 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)2333   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
2334     TEST_REQUIRES_ARM_NEON;
2335     for (uint32_t n = 32; n <= 48; n += 16) {
2336       for (size_t k = 1; k <= 40; k += 9) {
2337         GemmMicrokernelTester()
2338           .mr(1)
2339           .nr(16)
2340           .kr(1)
2341           .sr(1)
2342           .m(1)
2343           .n(n)
2344           .k(k)
2345           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2346       }
2347     }
2348   }
2349 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)2350   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2351     TEST_REQUIRES_ARM_NEON;
2352     for (uint32_t n = 32; n <= 48; n += 16) {
2353       for (size_t k = 1; k <= 40; k += 9) {
2354         GemmMicrokernelTester()
2355           .mr(1)
2356           .nr(16)
2357           .kr(1)
2358           .sr(1)
2359           .m(1)
2360           .n(n)
2361           .k(k)
2362           .cn_stride(19)
2363           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2364       }
2365     }
2366   }
2367 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)2368   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
2369     TEST_REQUIRES_ARM_NEON;
2370     for (uint32_t n = 32; n <= 48; n += 16) {
2371       for (size_t k = 1; k <= 40; k += 9) {
2372         for (uint32_t m = 1; m <= 1; m++) {
2373           GemmMicrokernelTester()
2374             .mr(1)
2375             .nr(16)
2376             .kr(1)
2377             .sr(1)
2378             .m(m)
2379             .n(n)
2380             .k(k)
2381             .iterations(1)
2382             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2383         }
2384       }
2385     }
2386   }
2387 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel)2388   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
2389     TEST_REQUIRES_ARM_NEON;
2390     for (size_t k = 1; k <= 40; k += 9) {
2391       GemmMicrokernelTester()
2392         .mr(1)
2393         .nr(16)
2394         .kr(1)
2395         .sr(1)
2396         .m(1)
2397         .n(16)
2398         .k(k)
2399         .ks(3)
2400         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2401     }
2402   }
2403 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel_subtile)2404   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
2405     TEST_REQUIRES_ARM_NEON;
2406     for (size_t k = 1; k <= 40; k += 9) {
2407       for (uint32_t n = 1; n <= 16; n++) {
2408         for (uint32_t m = 1; m <= 1; m++) {
2409           GemmMicrokernelTester()
2410             .mr(1)
2411             .nr(16)
2412             .kr(1)
2413             .sr(1)
2414             .m(m)
2415             .n(n)
2416             .k(k)
2417             .ks(3)
2418             .iterations(1)
2419             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2420         }
2421       }
2422     }
2423   }
2424 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2425   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2426     TEST_REQUIRES_ARM_NEON;
2427     for (uint32_t n = 17; n < 32; n++) {
2428       for (size_t k = 1; k <= 40; k += 9) {
2429         GemmMicrokernelTester()
2430           .mr(1)
2431           .nr(16)
2432           .kr(1)
2433           .sr(1)
2434           .m(1)
2435           .n(n)
2436           .k(k)
2437           .ks(3)
2438           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2439       }
2440     }
2441   }
2442 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)2443   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2444     TEST_REQUIRES_ARM_NEON;
2445     for (uint32_t n = 32; n <= 48; n += 16) {
2446       for (size_t k = 1; k <= 40; k += 9) {
2447         GemmMicrokernelTester()
2448           .mr(1)
2449           .nr(16)
2450           .kr(1)
2451           .sr(1)
2452           .m(1)
2453           .n(n)
2454           .k(k)
2455           .ks(3)
2456           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2457       }
2458     }
2459   }
2460 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)2461   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
2462     TEST_REQUIRES_ARM_NEON;
2463     for (size_t k = 1; k <= 40; k += 9) {
2464       for (uint32_t n = 1; n <= 16; n++) {
2465         for (uint32_t m = 1; m <= 1; m++) {
2466           GemmMicrokernelTester()
2467             .mr(1)
2468             .nr(16)
2469             .kr(1)
2470             .sr(1)
2471             .m(m)
2472             .n(n)
2473             .k(k)
2474             .cm_stride(19)
2475             .iterations(1)
2476             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2477         }
2478       }
2479     }
2480   }
2481 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,a_offset)2482   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
2483     TEST_REQUIRES_ARM_NEON;
2484     for (size_t k = 1; k <= 40; k += 9) {
2485       GemmMicrokernelTester()
2486         .mr(1)
2487         .nr(16)
2488         .kr(1)
2489         .sr(1)
2490         .m(1)
2491         .n(16)
2492         .k(k)
2493         .ks(3)
2494         .a_offset(43)
2495         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2496     }
2497   }
2498 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,zero)2499   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
2500     TEST_REQUIRES_ARM_NEON;
2501     for (size_t k = 1; k <= 40; k += 9) {
2502       for (uint32_t mz = 0; mz < 1; mz++) {
2503         GemmMicrokernelTester()
2504           .mr(1)
2505           .nr(16)
2506           .kr(1)
2507           .sr(1)
2508           .m(1)
2509           .n(16)
2510           .k(k)
2511           .ks(3)
2512           .a_offset(43)
2513           .zero_index(mz)
2514           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2515       }
2516     }
2517   }
2518 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)2519   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
2520     TEST_REQUIRES_ARM_NEON;
2521     GemmMicrokernelTester()
2522       .mr(1)
2523       .nr(16)
2524       .kr(1)
2525       .sr(1)
2526       .m(1)
2527       .n(16)
2528       .k(8)
2529       .qmin(128)
2530       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2531   }
2532 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)2533   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
2534     TEST_REQUIRES_ARM_NEON;
2535     GemmMicrokernelTester()
2536       .mr(1)
2537       .nr(16)
2538       .kr(1)
2539       .sr(1)
2540       .m(1)
2541       .n(16)
2542       .k(8)
2543       .qmax(128)
2544       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2545   }
2546 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)2547   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
2548     TEST_REQUIRES_ARM_NEON;
2549     GemmMicrokernelTester()
2550       .mr(1)
2551       .nr(16)
2552       .kr(1)
2553       .sr(1)
2554       .m(1)
2555       .n(16)
2556       .k(8)
2557       .cm_stride(19)
2558       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2559   }
2560 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_a_zero_point)2561   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_a_zero_point) {
2562     TEST_REQUIRES_ARM_NEON;
2563     for (size_t k = 1; k <= 40; k += 9) {
2564       GemmMicrokernelTester()
2565         .mr(1)
2566         .nr(16)
2567         .kr(1)
2568         .sr(1)
2569         .m(1)
2570         .n(16)
2571         .k(k)
2572         .a_zero_point(0)
2573         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2574     }
2575   }
2576 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_b_zero_point)2577   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_b_zero_point) {
2578     TEST_REQUIRES_ARM_NEON;
2579     for (size_t k = 1; k <= 40; k += 9) {
2580       GemmMicrokernelTester()
2581         .mr(1)
2582         .nr(16)
2583         .kr(1)
2584         .sr(1)
2585         .m(1)
2586         .n(16)
2587         .k(k)
2588         .b_zero_point(0)
2589         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2590     }
2591   }
2592 
TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,no_zero_point)2593   TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_zero_point) {
2594     TEST_REQUIRES_ARM_NEON;
2595     for (size_t k = 1; k <= 40; k += 9) {
2596       GemmMicrokernelTester()
2597         .mr(1)
2598         .nr(16)
2599         .kr(1)
2600         .sr(1)
2601         .m(1)
2602         .n(16)
2603         .k(k)
2604         .a_zero_point(0)
2605         .b_zero_point(0)
2606         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2607     }
2608   }
2609 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2610 
2611 
2612 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8)2613   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8) {
2614     TEST_REQUIRES_ARM_NEON_DOT;
2615     GemmMicrokernelTester()
2616       .mr(2)
2617       .nr(16)
2618       .kr(4)
2619       .sr(1)
2620       .m(2)
2621       .n(16)
2622       .k(8)
2623       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2624   }
2625 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cn)2626   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cn) {
2627     TEST_REQUIRES_ARM_NEON_DOT;
2628     GemmMicrokernelTester()
2629       .mr(2)
2630       .nr(16)
2631       .kr(4)
2632       .sr(1)
2633       .m(2)
2634       .n(16)
2635       .k(8)
2636       .cn_stride(19)
2637       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2638   }
2639 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile)2640   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile) {
2641     TEST_REQUIRES_ARM_NEON_DOT;
2642     for (uint32_t n = 1; n <= 16; n++) {
2643       for (uint32_t m = 1; m <= 2; m++) {
2644         GemmMicrokernelTester()
2645           .mr(2)
2646           .nr(16)
2647           .kr(4)
2648           .sr(1)
2649           .m(m)
2650           .n(n)
2651           .k(8)
2652           .iterations(1)
2653           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2654       }
2655     }
2656   }
2657 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile_m)2658   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile_m) {
2659     TEST_REQUIRES_ARM_NEON_DOT;
2660     for (uint32_t m = 1; m <= 2; m++) {
2661       GemmMicrokernelTester()
2662         .mr(2)
2663         .nr(16)
2664         .kr(4)
2665         .sr(1)
2666         .m(m)
2667         .n(16)
2668         .k(8)
2669         .iterations(1)
2670         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2671     }
2672   }
2673 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_eq_8_subtile_n)2674   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_eq_8_subtile_n) {
2675     TEST_REQUIRES_ARM_NEON_DOT;
2676     for (uint32_t n = 1; n <= 16; n++) {
2677       GemmMicrokernelTester()
2678         .mr(2)
2679         .nr(16)
2680         .kr(4)
2681         .sr(1)
2682         .m(2)
2683         .n(n)
2684         .k(8)
2685         .iterations(1)
2686         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2687     }
2688   }
2689 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_lt_8)2690   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_lt_8) {
2691     TEST_REQUIRES_ARM_NEON_DOT;
2692     for (size_t k = 1; k < 8; k++) {
2693       GemmMicrokernelTester()
2694         .mr(2)
2695         .nr(16)
2696         .kr(4)
2697         .sr(1)
2698         .m(2)
2699         .n(16)
2700         .k(k)
2701         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2702     }
2703   }
2704 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_lt_8_subtile)2705   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_lt_8_subtile) {
2706     TEST_REQUIRES_ARM_NEON_DOT;
2707     for (size_t k = 1; k < 8; k++) {
2708       for (uint32_t n = 1; n <= 16; n++) {
2709         for (uint32_t m = 1; m <= 2; m++) {
2710           GemmMicrokernelTester()
2711             .mr(2)
2712             .nr(16)
2713             .kr(4)
2714             .sr(1)
2715             .m(m)
2716             .n(n)
2717             .k(k)
2718             .iterations(1)
2719             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2720         }
2721       }
2722     }
2723   }
2724 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_gt_8)2725   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_gt_8) {
2726     TEST_REQUIRES_ARM_NEON_DOT;
2727     for (size_t k = 9; k < 16; k++) {
2728       GemmMicrokernelTester()
2729         .mr(2)
2730         .nr(16)
2731         .kr(4)
2732         .sr(1)
2733         .m(2)
2734         .n(16)
2735         .k(k)
2736         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2737     }
2738   }
2739 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_gt_8_subtile)2740   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_gt_8_subtile) {
2741     TEST_REQUIRES_ARM_NEON_DOT;
2742     for (size_t k = 9; k < 16; k++) {
2743       for (uint32_t n = 1; n <= 16; n++) {
2744         for (uint32_t m = 1; m <= 2; m++) {
2745           GemmMicrokernelTester()
2746             .mr(2)
2747             .nr(16)
2748             .kr(4)
2749             .sr(1)
2750             .m(m)
2751             .n(n)
2752             .k(k)
2753             .iterations(1)
2754             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2755         }
2756       }
2757     }
2758   }
2759 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_div_8)2760   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_div_8) {
2761     TEST_REQUIRES_ARM_NEON_DOT;
2762     for (size_t k = 16; k <= 80; k += 8) {
2763       GemmMicrokernelTester()
2764         .mr(2)
2765         .nr(16)
2766         .kr(4)
2767         .sr(1)
2768         .m(2)
2769         .n(16)
2770         .k(k)
2771         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2772     }
2773   }
2774 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,k_div_8_subtile)2775   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, k_div_8_subtile) {
2776     TEST_REQUIRES_ARM_NEON_DOT;
2777     for (size_t k = 16; k <= 80; k += 8) {
2778       for (uint32_t n = 1; n <= 16; n++) {
2779         for (uint32_t m = 1; m <= 2; m++) {
2780           GemmMicrokernelTester()
2781             .mr(2)
2782             .nr(16)
2783             .kr(4)
2784             .sr(1)
2785             .m(m)
2786             .n(n)
2787             .k(k)
2788             .iterations(1)
2789             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2790         }
2791       }
2792     }
2793   }
2794 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16)2795   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16) {
2796     TEST_REQUIRES_ARM_NEON_DOT;
2797     for (uint32_t n = 17; n < 32; n++) {
2798       for (size_t k = 1; k <= 40; k += 9) {
2799         GemmMicrokernelTester()
2800           .mr(2)
2801           .nr(16)
2802           .kr(4)
2803           .sr(1)
2804           .m(2)
2805           .n(n)
2806           .k(k)
2807           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2808       }
2809     }
2810   }
2811 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_strided_cn)2812   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_strided_cn) {
2813     TEST_REQUIRES_ARM_NEON_DOT;
2814     for (uint32_t n = 17; n < 32; n++) {
2815       for (size_t k = 1; k <= 40; k += 9) {
2816         GemmMicrokernelTester()
2817           .mr(2)
2818           .nr(16)
2819           .kr(4)
2820           .sr(1)
2821           .m(2)
2822           .n(n)
2823           .k(k)
2824           .cn_stride(19)
2825           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2826       }
2827     }
2828   }
2829 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_subtile)2830   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_subtile) {
2831     TEST_REQUIRES_ARM_NEON_DOT;
2832     for (uint32_t n = 17; n < 32; n++) {
2833       for (size_t k = 1; k <= 40; k += 9) {
2834         for (uint32_t m = 1; m <= 2; m++) {
2835           GemmMicrokernelTester()
2836             .mr(2)
2837             .nr(16)
2838             .kr(4)
2839             .sr(1)
2840             .m(m)
2841             .n(n)
2842             .k(k)
2843             .iterations(1)
2844             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2845         }
2846       }
2847     }
2848   }
2849 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16)2850   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16) {
2851     TEST_REQUIRES_ARM_NEON_DOT;
2852     for (uint32_t n = 32; n <= 48; n += 16) {
2853       for (size_t k = 1; k <= 40; k += 9) {
2854         GemmMicrokernelTester()
2855           .mr(2)
2856           .nr(16)
2857           .kr(4)
2858           .sr(1)
2859           .m(2)
2860           .n(n)
2861           .k(k)
2862           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2863       }
2864     }
2865   }
2866 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_strided_cn)2867   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_strided_cn) {
2868     TEST_REQUIRES_ARM_NEON_DOT;
2869     for (uint32_t n = 32; n <= 48; n += 16) {
2870       for (size_t k = 1; k <= 40; k += 9) {
2871         GemmMicrokernelTester()
2872           .mr(2)
2873           .nr(16)
2874           .kr(4)
2875           .sr(1)
2876           .m(2)
2877           .n(n)
2878           .k(k)
2879           .cn_stride(19)
2880           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2881       }
2882     }
2883   }
2884 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_subtile)2885   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_subtile) {
2886     TEST_REQUIRES_ARM_NEON_DOT;
2887     for (uint32_t n = 32; n <= 48; n += 16) {
2888       for (size_t k = 1; k <= 40; k += 9) {
2889         for (uint32_t m = 1; m <= 2; m++) {
2890           GemmMicrokernelTester()
2891             .mr(2)
2892             .nr(16)
2893             .kr(4)
2894             .sr(1)
2895             .m(m)
2896             .n(n)
2897             .k(k)
2898             .iterations(1)
2899             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2900         }
2901       }
2902     }
2903   }
2904 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,small_kernel)2905   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, small_kernel) {
2906     TEST_REQUIRES_ARM_NEON_DOT;
2907     for (size_t k = 1; k <= 40; k += 9) {
2908       GemmMicrokernelTester()
2909         .mr(2)
2910         .nr(16)
2911         .kr(4)
2912         .sr(1)
2913         .m(2)
2914         .n(16)
2915         .k(k)
2916         .ks(3)
2917         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2918     }
2919   }
2920 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,small_kernel_subtile)2921   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, small_kernel_subtile) {
2922     TEST_REQUIRES_ARM_NEON_DOT;
2923     for (size_t k = 1; k <= 40; k += 9) {
2924       for (uint32_t n = 1; n <= 16; n++) {
2925         for (uint32_t m = 1; m <= 2; m++) {
2926           GemmMicrokernelTester()
2927             .mr(2)
2928             .nr(16)
2929             .kr(4)
2930             .sr(1)
2931             .m(m)
2932             .n(n)
2933             .k(k)
2934             .ks(3)
2935             .iterations(1)
2936             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2937         }
2938       }
2939     }
2940   }
2941 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_gt_16_small_kernel)2942   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_gt_16_small_kernel) {
2943     TEST_REQUIRES_ARM_NEON_DOT;
2944     for (uint32_t n = 17; n < 32; n++) {
2945       for (size_t k = 1; k <= 40; k += 9) {
2946         GemmMicrokernelTester()
2947           .mr(2)
2948           .nr(16)
2949           .kr(4)
2950           .sr(1)
2951           .m(2)
2952           .n(n)
2953           .k(k)
2954           .ks(3)
2955           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2956       }
2957     }
2958   }
2959 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,n_div_16_small_kernel)2960   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, n_div_16_small_kernel) {
2961     TEST_REQUIRES_ARM_NEON_DOT;
2962     for (uint32_t n = 32; n <= 48; n += 16) {
2963       for (size_t k = 1; k <= 40; k += 9) {
2964         GemmMicrokernelTester()
2965           .mr(2)
2966           .nr(16)
2967           .kr(4)
2968           .sr(1)
2969           .m(2)
2970           .n(n)
2971           .k(k)
2972           .ks(3)
2973           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2974       }
2975     }
2976   }
2977 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cm_subtile)2978   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cm_subtile) {
2979     TEST_REQUIRES_ARM_NEON_DOT;
2980     for (size_t k = 1; k <= 40; k += 9) {
2981       for (uint32_t n = 1; n <= 16; n++) {
2982         for (uint32_t m = 1; m <= 2; m++) {
2983           GemmMicrokernelTester()
2984             .mr(2)
2985             .nr(16)
2986             .kr(4)
2987             .sr(1)
2988             .m(m)
2989             .n(n)
2990             .k(k)
2991             .cm_stride(19)
2992             .iterations(1)
2993             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2994         }
2995       }
2996     }
2997   }
2998 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,a_offset)2999   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, a_offset) {
3000     TEST_REQUIRES_ARM_NEON_DOT;
3001     for (size_t k = 1; k <= 40; k += 9) {
3002       GemmMicrokernelTester()
3003         .mr(2)
3004         .nr(16)
3005         .kr(4)
3006         .sr(1)
3007         .m(2)
3008         .n(16)
3009         .k(k)
3010         .ks(3)
3011         .a_offset(83)
3012         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3013     }
3014   }
3015 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,zero)3016   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, zero) {
3017     TEST_REQUIRES_ARM_NEON_DOT;
3018     for (size_t k = 1; k <= 40; k += 9) {
3019       for (uint32_t mz = 0; mz < 2; mz++) {
3020         GemmMicrokernelTester()
3021           .mr(2)
3022           .nr(16)
3023           .kr(4)
3024           .sr(1)
3025           .m(2)
3026           .n(16)
3027           .k(k)
3028           .ks(3)
3029           .a_offset(83)
3030           .zero_index(mz)
3031           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3032       }
3033     }
3034   }
3035 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,qmin)3036   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, qmin) {
3037     TEST_REQUIRES_ARM_NEON_DOT;
3038     GemmMicrokernelTester()
3039       .mr(2)
3040       .nr(16)
3041       .kr(4)
3042       .sr(1)
3043       .m(2)
3044       .n(16)
3045       .k(8)
3046       .qmin(128)
3047       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3048   }
3049 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,qmax)3050   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, qmax) {
3051     TEST_REQUIRES_ARM_NEON_DOT;
3052     GemmMicrokernelTester()
3053       .mr(2)
3054       .nr(16)
3055       .kr(4)
3056       .sr(1)
3057       .m(2)
3058       .n(16)
3059       .k(8)
3060       .qmax(128)
3061       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3062   }
3063 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,strided_cm)3064   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, strided_cm) {
3065     TEST_REQUIRES_ARM_NEON_DOT;
3066     GemmMicrokernelTester()
3067       .mr(2)
3068       .nr(16)
3069       .kr(4)
3070       .sr(1)
3071       .m(2)
3072       .n(16)
3073       .k(8)
3074       .cm_stride(19)
3075       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3076   }
3077 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_a_zero_point)3078   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_a_zero_point) {
3079     TEST_REQUIRES_ARM_NEON_DOT;
3080     for (size_t k = 1; k <= 40; k += 9) {
3081       GemmMicrokernelTester()
3082         .mr(2)
3083         .nr(16)
3084         .kr(4)
3085         .sr(1)
3086         .m(2)
3087         .n(16)
3088         .k(k)
3089         .a_zero_point(0)
3090         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3091     }
3092   }
3093 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_b_zero_point)3094   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_b_zero_point) {
3095     TEST_REQUIRES_ARM_NEON_DOT;
3096     for (size_t k = 1; k <= 40; k += 9) {
3097       GemmMicrokernelTester()
3098         .mr(2)
3099         .nr(16)
3100         .kr(4)
3101         .sr(1)
3102         .m(2)
3103         .n(16)
3104         .k(k)
3105         .b_zero_point(0)
3106         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3107     }
3108   }
3109 
TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT,no_zero_point)3110   TEST(QU8_IGEMM_MINMAX_FP32_2X16C4__NEONDOT, no_zero_point) {
3111     TEST_REQUIRES_ARM_NEON_DOT;
3112     for (size_t k = 1; k <= 40; k += 9) {
3113       GemmMicrokernelTester()
3114         .mr(2)
3115         .nr(16)
3116         .kr(4)
3117         .sr(1)
3118         .m(2)
3119         .n(16)
3120         .k(k)
3121         .a_zero_point(0)
3122         .b_zero_point(0)
3123         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3124     }
3125   }
3126 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
3127 
3128 
3129 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8)3130   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8) {
3131     TEST_REQUIRES_ARM_NEON;
3132     GemmMicrokernelTester()
3133       .mr(4)
3134       .nr(8)
3135       .kr(1)
3136       .sr(1)
3137       .m(4)
3138       .n(8)
3139       .k(8)
3140       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3141   }
3142 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cn)3143   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cn) {
3144     TEST_REQUIRES_ARM_NEON;
3145     GemmMicrokernelTester()
3146       .mr(4)
3147       .nr(8)
3148       .kr(1)
3149       .sr(1)
3150       .m(4)
3151       .n(8)
3152       .k(8)
3153       .cn_stride(11)
3154       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3155   }
3156 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile)3157   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
3158     TEST_REQUIRES_ARM_NEON;
3159     for (uint32_t n = 1; n <= 8; n++) {
3160       for (uint32_t m = 1; m <= 4; m++) {
3161         GemmMicrokernelTester()
3162           .mr(4)
3163           .nr(8)
3164           .kr(1)
3165           .sr(1)
3166           .m(m)
3167           .n(n)
3168           .k(8)
3169           .iterations(1)
3170           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3171       }
3172     }
3173   }
3174 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)3175   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
3176     TEST_REQUIRES_ARM_NEON;
3177     for (uint32_t m = 1; m <= 4; m++) {
3178       GemmMicrokernelTester()
3179         .mr(4)
3180         .nr(8)
3181         .kr(1)
3182         .sr(1)
3183         .m(m)
3184         .n(8)
3185         .k(8)
3186         .iterations(1)
3187         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3188     }
3189   }
3190 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)3191   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
3192     TEST_REQUIRES_ARM_NEON;
3193     for (uint32_t n = 1; n <= 8; n++) {
3194       GemmMicrokernelTester()
3195         .mr(4)
3196         .nr(8)
3197         .kr(1)
3198         .sr(1)
3199         .m(4)
3200         .n(n)
3201         .k(8)
3202         .iterations(1)
3203         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3204     }
3205   }
3206 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8)3207   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8) {
3208     TEST_REQUIRES_ARM_NEON;
3209     for (size_t k = 1; k < 8; k++) {
3210       GemmMicrokernelTester()
3211         .mr(4)
3212         .nr(8)
3213         .kr(1)
3214         .sr(1)
3215         .m(4)
3216         .n(8)
3217         .k(k)
3218         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3219     }
3220   }
3221 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_subtile)3222   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
3223     TEST_REQUIRES_ARM_NEON;
3224     for (size_t k = 1; k < 8; k++) {
3225       for (uint32_t n = 1; n <= 8; n++) {
3226         for (uint32_t m = 1; m <= 4; m++) {
3227           GemmMicrokernelTester()
3228             .mr(4)
3229             .nr(8)
3230             .kr(1)
3231             .sr(1)
3232             .m(m)
3233             .n(n)
3234             .k(k)
3235             .iterations(1)
3236             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3237         }
3238       }
3239     }
3240   }
3241 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8)3242   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8) {
3243     TEST_REQUIRES_ARM_NEON;
3244     for (size_t k = 9; k < 16; k++) {
3245       GemmMicrokernelTester()
3246         .mr(4)
3247         .nr(8)
3248         .kr(1)
3249         .sr(1)
3250         .m(4)
3251         .n(8)
3252         .k(k)
3253         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3254     }
3255   }
3256 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_subtile)3257   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
3258     TEST_REQUIRES_ARM_NEON;
3259     for (size_t k = 9; k < 16; k++) {
3260       for (uint32_t n = 1; n <= 8; n++) {
3261         for (uint32_t m = 1; m <= 4; m++) {
3262           GemmMicrokernelTester()
3263             .mr(4)
3264             .nr(8)
3265             .kr(1)
3266             .sr(1)
3267             .m(m)
3268             .n(n)
3269             .k(k)
3270             .iterations(1)
3271             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3272         }
3273       }
3274     }
3275   }
3276 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8)3277   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8) {
3278     TEST_REQUIRES_ARM_NEON;
3279     for (size_t k = 16; k <= 80; k += 8) {
3280       GemmMicrokernelTester()
3281         .mr(4)
3282         .nr(8)
3283         .kr(1)
3284         .sr(1)
3285         .m(4)
3286         .n(8)
3287         .k(k)
3288         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3289     }
3290   }
3291 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_subtile)3292   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
3293     TEST_REQUIRES_ARM_NEON;
3294     for (size_t k = 16; k <= 80; k += 8) {
3295       for (uint32_t n = 1; n <= 8; n++) {
3296         for (uint32_t m = 1; m <= 4; m++) {
3297           GemmMicrokernelTester()
3298             .mr(4)
3299             .nr(8)
3300             .kr(1)
3301             .sr(1)
3302             .m(m)
3303             .n(n)
3304             .k(k)
3305             .iterations(1)
3306             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3307         }
3308       }
3309     }
3310   }
3311 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8)3312   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8) {
3313     TEST_REQUIRES_ARM_NEON;
3314     for (uint32_t n = 9; n < 16; n++) {
3315       for (size_t k = 1; k <= 40; k += 9) {
3316         GemmMicrokernelTester()
3317           .mr(4)
3318           .nr(8)
3319           .kr(1)
3320           .sr(1)
3321           .m(4)
3322           .n(n)
3323           .k(k)
3324           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3325       }
3326     }
3327   }
3328 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)3329   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
3330     TEST_REQUIRES_ARM_NEON;
3331     for (uint32_t n = 9; n < 16; n++) {
3332       for (size_t k = 1; k <= 40; k += 9) {
3333         GemmMicrokernelTester()
3334           .mr(4)
3335           .nr(8)
3336           .kr(1)
3337           .sr(1)
3338           .m(4)
3339           .n(n)
3340           .k(k)
3341           .cn_stride(11)
3342           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3343       }
3344     }
3345   }
3346 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_subtile)3347   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
3348     TEST_REQUIRES_ARM_NEON;
3349     for (uint32_t n = 9; n < 16; n++) {
3350       for (size_t k = 1; k <= 40; k += 9) {
3351         for (uint32_t m = 1; m <= 4; m++) {
3352           GemmMicrokernelTester()
3353             .mr(4)
3354             .nr(8)
3355             .kr(1)
3356             .sr(1)
3357             .m(m)
3358             .n(n)
3359             .k(k)
3360             .iterations(1)
3361             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3362         }
3363       }
3364     }
3365   }
3366 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8)3367   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8) {
3368     TEST_REQUIRES_ARM_NEON;
3369     for (uint32_t n = 16; n <= 24; n += 8) {
3370       for (size_t k = 1; k <= 40; k += 9) {
3371         GemmMicrokernelTester()
3372           .mr(4)
3373           .nr(8)
3374           .kr(1)
3375           .sr(1)
3376           .m(4)
3377           .n(n)
3378           .k(k)
3379           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3380       }
3381     }
3382   }
3383 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)3384   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
3385     TEST_REQUIRES_ARM_NEON;
3386     for (uint32_t n = 16; n <= 24; n += 8) {
3387       for (size_t k = 1; k <= 40; k += 9) {
3388         GemmMicrokernelTester()
3389           .mr(4)
3390           .nr(8)
3391           .kr(1)
3392           .sr(1)
3393           .m(4)
3394           .n(n)
3395           .k(k)
3396           .cn_stride(11)
3397           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3398       }
3399     }
3400   }
3401 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_subtile)3402   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
3403     TEST_REQUIRES_ARM_NEON;
3404     for (uint32_t n = 16; n <= 24; n += 8) {
3405       for (size_t k = 1; k <= 40; k += 9) {
3406         for (uint32_t m = 1; m <= 4; m++) {
3407           GemmMicrokernelTester()
3408             .mr(4)
3409             .nr(8)
3410             .kr(1)
3411             .sr(1)
3412             .m(m)
3413             .n(n)
3414             .k(k)
3415             .iterations(1)
3416             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3417         }
3418       }
3419     }
3420   }
3421 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel)3422   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel) {
3423     TEST_REQUIRES_ARM_NEON;
3424     for (size_t k = 1; k <= 40; k += 9) {
3425       GemmMicrokernelTester()
3426         .mr(4)
3427         .nr(8)
3428         .kr(1)
3429         .sr(1)
3430         .m(4)
3431         .n(8)
3432         .k(k)
3433         .ks(3)
3434         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3435     }
3436   }
3437 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel_subtile)3438   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
3439     TEST_REQUIRES_ARM_NEON;
3440     for (size_t k = 1; k <= 40; k += 9) {
3441       for (uint32_t n = 1; n <= 8; n++) {
3442         for (uint32_t m = 1; m <= 4; m++) {
3443           GemmMicrokernelTester()
3444             .mr(4)
3445             .nr(8)
3446             .kr(1)
3447             .sr(1)
3448             .m(m)
3449             .n(n)
3450             .k(k)
3451             .ks(3)
3452             .iterations(1)
3453             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3454         }
3455       }
3456     }
3457   }
3458 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)3459   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
3460     TEST_REQUIRES_ARM_NEON;
3461     for (uint32_t n = 9; n < 16; n++) {
3462       for (size_t k = 1; k <= 40; k += 9) {
3463         GemmMicrokernelTester()
3464           .mr(4)
3465           .nr(8)
3466           .kr(1)
3467           .sr(1)
3468           .m(4)
3469           .n(n)
3470           .k(k)
3471           .ks(3)
3472           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3473       }
3474     }
3475   }
3476 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)3477   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
3478     TEST_REQUIRES_ARM_NEON;
3479     for (uint32_t n = 16; n <= 24; n += 8) {
3480       for (size_t k = 1; k <= 40; k += 9) {
3481         GemmMicrokernelTester()
3482           .mr(4)
3483           .nr(8)
3484           .kr(1)
3485           .sr(1)
3486           .m(4)
3487           .n(n)
3488           .k(k)
3489           .ks(3)
3490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3491       }
3492     }
3493   }
3494 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm_subtile)3495   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
3496     TEST_REQUIRES_ARM_NEON;
3497     for (size_t k = 1; k <= 40; k += 9) {
3498       for (uint32_t n = 1; n <= 8; n++) {
3499         for (uint32_t m = 1; m <= 4; m++) {
3500           GemmMicrokernelTester()
3501             .mr(4)
3502             .nr(8)
3503             .kr(1)
3504             .sr(1)
3505             .m(m)
3506             .n(n)
3507             .k(k)
3508             .cm_stride(11)
3509             .iterations(1)
3510             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3511         }
3512       }
3513     }
3514   }
3515 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,a_offset)3516   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, a_offset) {
3517     TEST_REQUIRES_ARM_NEON;
3518     for (size_t k = 1; k <= 40; k += 9) {
3519       GemmMicrokernelTester()
3520         .mr(4)
3521         .nr(8)
3522         .kr(1)
3523         .sr(1)
3524         .m(4)
3525         .n(8)
3526         .k(k)
3527         .ks(3)
3528         .a_offset(163)
3529         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3530     }
3531   }
3532 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,zero)3533   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, zero) {
3534     TEST_REQUIRES_ARM_NEON;
3535     for (size_t k = 1; k <= 40; k += 9) {
3536       for (uint32_t mz = 0; mz < 4; mz++) {
3537         GemmMicrokernelTester()
3538           .mr(4)
3539           .nr(8)
3540           .kr(1)
3541           .sr(1)
3542           .m(4)
3543           .n(8)
3544           .k(k)
3545           .ks(3)
3546           .a_offset(163)
3547           .zero_index(mz)
3548           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3549       }
3550     }
3551   }
3552 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmin)3553   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmin) {
3554     TEST_REQUIRES_ARM_NEON;
3555     GemmMicrokernelTester()
3556       .mr(4)
3557       .nr(8)
3558       .kr(1)
3559       .sr(1)
3560       .m(4)
3561       .n(8)
3562       .k(8)
3563       .qmin(128)
3564       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3565   }
3566 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmax)3567   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmax) {
3568     TEST_REQUIRES_ARM_NEON;
3569     GemmMicrokernelTester()
3570       .mr(4)
3571       .nr(8)
3572       .kr(1)
3573       .sr(1)
3574       .m(4)
3575       .n(8)
3576       .k(8)
3577       .qmax(128)
3578       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3579   }
3580 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm)3581   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm) {
3582     TEST_REQUIRES_ARM_NEON;
3583     GemmMicrokernelTester()
3584       .mr(4)
3585       .nr(8)
3586       .kr(1)
3587       .sr(1)
3588       .m(4)
3589       .n(8)
3590       .k(8)
3591       .cm_stride(11)
3592       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3593   }
3594 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_a_zero_point)3595   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_a_zero_point) {
3596     TEST_REQUIRES_ARM_NEON;
3597     for (size_t k = 1; k <= 40; k += 9) {
3598       GemmMicrokernelTester()
3599         .mr(4)
3600         .nr(8)
3601         .kr(1)
3602         .sr(1)
3603         .m(4)
3604         .n(8)
3605         .k(k)
3606         .a_zero_point(0)
3607         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3608     }
3609   }
3610 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_b_zero_point)3611   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_b_zero_point) {
3612     TEST_REQUIRES_ARM_NEON;
3613     for (size_t k = 1; k <= 40; k += 9) {
3614       GemmMicrokernelTester()
3615         .mr(4)
3616         .nr(8)
3617         .kr(1)
3618         .sr(1)
3619         .m(4)
3620         .n(8)
3621         .k(k)
3622         .b_zero_point(0)
3623         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3624     }
3625   }
3626 
TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,no_zero_point)3627   TEST(QU8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, no_zero_point) {
3628     TEST_REQUIRES_ARM_NEON;
3629     for (size_t k = 1; k <= 40; k += 9) {
3630       GemmMicrokernelTester()
3631         .mr(4)
3632         .nr(8)
3633         .kr(1)
3634         .sr(1)
3635         .m(4)
3636         .n(8)
3637         .k(k)
3638         .a_zero_point(0)
3639         .b_zero_point(0)
3640         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3641     }
3642   }
3643 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3644 
3645 
3646 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8)3647   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8) {
3648     TEST_REQUIRES_ARM_NEON_DOT;
3649     GemmMicrokernelTester()
3650       .mr(4)
3651       .nr(16)
3652       .kr(4)
3653       .sr(1)
3654       .m(4)
3655       .n(16)
3656       .k(8)
3657       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3658   }
3659 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cn)3660   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cn) {
3661     TEST_REQUIRES_ARM_NEON_DOT;
3662     GemmMicrokernelTester()
3663       .mr(4)
3664       .nr(16)
3665       .kr(4)
3666       .sr(1)
3667       .m(4)
3668       .n(16)
3669       .k(8)
3670       .cn_stride(19)
3671       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3672   }
3673 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile)3674   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile) {
3675     TEST_REQUIRES_ARM_NEON_DOT;
3676     for (uint32_t n = 1; n <= 16; n++) {
3677       for (uint32_t m = 1; m <= 4; m++) {
3678         GemmMicrokernelTester()
3679           .mr(4)
3680           .nr(16)
3681           .kr(4)
3682           .sr(1)
3683           .m(m)
3684           .n(n)
3685           .k(8)
3686           .iterations(1)
3687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3688       }
3689     }
3690   }
3691 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_m)3692   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_m) {
3693     TEST_REQUIRES_ARM_NEON_DOT;
3694     for (uint32_t m = 1; m <= 4; m++) {
3695       GemmMicrokernelTester()
3696         .mr(4)
3697         .nr(16)
3698         .kr(4)
3699         .sr(1)
3700         .m(m)
3701         .n(16)
3702         .k(8)
3703         .iterations(1)
3704         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3705     }
3706   }
3707 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_n)3708   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_n) {
3709     TEST_REQUIRES_ARM_NEON_DOT;
3710     for (uint32_t n = 1; n <= 16; n++) {
3711       GemmMicrokernelTester()
3712         .mr(4)
3713         .nr(16)
3714         .kr(4)
3715         .sr(1)
3716         .m(4)
3717         .n(n)
3718         .k(8)
3719         .iterations(1)
3720         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3721     }
3722   }
3723 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8)3724   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8) {
3725     TEST_REQUIRES_ARM_NEON_DOT;
3726     for (size_t k = 1; k < 8; k++) {
3727       GemmMicrokernelTester()
3728         .mr(4)
3729         .nr(16)
3730         .kr(4)
3731         .sr(1)
3732         .m(4)
3733         .n(16)
3734         .k(k)
3735         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3736     }
3737   }
3738 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_subtile)3739   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_subtile) {
3740     TEST_REQUIRES_ARM_NEON_DOT;
3741     for (size_t k = 1; k < 8; k++) {
3742       for (uint32_t n = 1; n <= 16; n++) {
3743         for (uint32_t m = 1; m <= 4; m++) {
3744           GemmMicrokernelTester()
3745             .mr(4)
3746             .nr(16)
3747             .kr(4)
3748             .sr(1)
3749             .m(m)
3750             .n(n)
3751             .k(k)
3752             .iterations(1)
3753             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3754         }
3755       }
3756     }
3757   }
3758 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8)3759   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8) {
3760     TEST_REQUIRES_ARM_NEON_DOT;
3761     for (size_t k = 9; k < 16; k++) {
3762       GemmMicrokernelTester()
3763         .mr(4)
3764         .nr(16)
3765         .kr(4)
3766         .sr(1)
3767         .m(4)
3768         .n(16)
3769         .k(k)
3770         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3771     }
3772   }
3773 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_subtile)3774   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_subtile) {
3775     TEST_REQUIRES_ARM_NEON_DOT;
3776     for (size_t k = 9; k < 16; k++) {
3777       for (uint32_t n = 1; n <= 16; n++) {
3778         for (uint32_t m = 1; m <= 4; m++) {
3779           GemmMicrokernelTester()
3780             .mr(4)
3781             .nr(16)
3782             .kr(4)
3783             .sr(1)
3784             .m(m)
3785             .n(n)
3786             .k(k)
3787             .iterations(1)
3788             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3789         }
3790       }
3791     }
3792   }
3793 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8)3794   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8) {
3795     TEST_REQUIRES_ARM_NEON_DOT;
3796     for (size_t k = 16; k <= 80; k += 8) {
3797       GemmMicrokernelTester()
3798         .mr(4)
3799         .nr(16)
3800         .kr(4)
3801         .sr(1)
3802         .m(4)
3803         .n(16)
3804         .k(k)
3805         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3806     }
3807   }
3808 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_subtile)3809   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_subtile) {
3810     TEST_REQUIRES_ARM_NEON_DOT;
3811     for (size_t k = 16; k <= 80; k += 8) {
3812       for (uint32_t n = 1; n <= 16; n++) {
3813         for (uint32_t m = 1; m <= 4; m++) {
3814           GemmMicrokernelTester()
3815             .mr(4)
3816             .nr(16)
3817             .kr(4)
3818             .sr(1)
3819             .m(m)
3820             .n(n)
3821             .k(k)
3822             .iterations(1)
3823             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3824         }
3825       }
3826     }
3827   }
3828 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16)3829   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16) {
3830     TEST_REQUIRES_ARM_NEON_DOT;
3831     for (uint32_t n = 17; n < 32; n++) {
3832       for (size_t k = 1; k <= 40; k += 9) {
3833         GemmMicrokernelTester()
3834           .mr(4)
3835           .nr(16)
3836           .kr(4)
3837           .sr(1)
3838           .m(4)
3839           .n(n)
3840           .k(k)
3841           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3842       }
3843     }
3844   }
3845 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_cn)3846   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_cn) {
3847     TEST_REQUIRES_ARM_NEON_DOT;
3848     for (uint32_t n = 17; n < 32; n++) {
3849       for (size_t k = 1; k <= 40; k += 9) {
3850         GemmMicrokernelTester()
3851           .mr(4)
3852           .nr(16)
3853           .kr(4)
3854           .sr(1)
3855           .m(4)
3856           .n(n)
3857           .k(k)
3858           .cn_stride(19)
3859           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3860       }
3861     }
3862   }
3863 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_subtile)3864   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_subtile) {
3865     TEST_REQUIRES_ARM_NEON_DOT;
3866     for (uint32_t n = 17; n < 32; n++) {
3867       for (size_t k = 1; k <= 40; k += 9) {
3868         for (uint32_t m = 1; m <= 4; m++) {
3869           GemmMicrokernelTester()
3870             .mr(4)
3871             .nr(16)
3872             .kr(4)
3873             .sr(1)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16)3884   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16) {
3885     TEST_REQUIRES_ARM_NEON_DOT;
3886     for (uint32_t n = 32; n <= 48; n += 16) {
3887       for (size_t k = 1; k <= 40; k += 9) {
3888         GemmMicrokernelTester()
3889           .mr(4)
3890           .nr(16)
3891           .kr(4)
3892           .sr(1)
3893           .m(4)
3894           .n(n)
3895           .k(k)
3896           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3897       }
3898     }
3899   }
3900 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_cn)3901   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_cn) {
3902     TEST_REQUIRES_ARM_NEON_DOT;
3903     for (uint32_t n = 32; n <= 48; n += 16) {
3904       for (size_t k = 1; k <= 40; k += 9) {
3905         GemmMicrokernelTester()
3906           .mr(4)
3907           .nr(16)
3908           .kr(4)
3909           .sr(1)
3910           .m(4)
3911           .n(n)
3912           .k(k)
3913           .cn_stride(19)
3914           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3915       }
3916     }
3917   }
3918 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_subtile)3919   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_subtile) {
3920     TEST_REQUIRES_ARM_NEON_DOT;
3921     for (uint32_t n = 32; n <= 48; n += 16) {
3922       for (size_t k = 1; k <= 40; k += 9) {
3923         for (uint32_t m = 1; m <= 4; m++) {
3924           GemmMicrokernelTester()
3925             .mr(4)
3926             .nr(16)
3927             .kr(4)
3928             .sr(1)
3929             .m(m)
3930             .n(n)
3931             .k(k)
3932             .iterations(1)
3933             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3934         }
3935       }
3936     }
3937   }
3938 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel)3939   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel) {
3940     TEST_REQUIRES_ARM_NEON_DOT;
3941     for (size_t k = 1; k <= 40; k += 9) {
3942       GemmMicrokernelTester()
3943         .mr(4)
3944         .nr(16)
3945         .kr(4)
3946         .sr(1)
3947         .m(4)
3948         .n(16)
3949         .k(k)
3950         .ks(3)
3951         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3952     }
3953   }
3954 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel_subtile)3955   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel_subtile) {
3956     TEST_REQUIRES_ARM_NEON_DOT;
3957     for (size_t k = 1; k <= 40; k += 9) {
3958       for (uint32_t n = 1; n <= 16; n++) {
3959         for (uint32_t m = 1; m <= 4; m++) {
3960           GemmMicrokernelTester()
3961             .mr(4)
3962             .nr(16)
3963             .kr(4)
3964             .sr(1)
3965             .m(m)
3966             .n(n)
3967             .k(k)
3968             .ks(3)
3969             .iterations(1)
3970             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3971         }
3972       }
3973     }
3974   }
3975 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_small_kernel)3976   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_small_kernel) {
3977     TEST_REQUIRES_ARM_NEON_DOT;
3978     for (uint32_t n = 17; n < 32; n++) {
3979       for (size_t k = 1; k <= 40; k += 9) {
3980         GemmMicrokernelTester()
3981           .mr(4)
3982           .nr(16)
3983           .kr(4)
3984           .sr(1)
3985           .m(4)
3986           .n(n)
3987           .k(k)
3988           .ks(3)
3989           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3990       }
3991     }
3992   }
3993 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_small_kernel)3994   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_small_kernel) {
3995     TEST_REQUIRES_ARM_NEON_DOT;
3996     for (uint32_t n = 32; n <= 48; n += 16) {
3997       for (size_t k = 1; k <= 40; k += 9) {
3998         GemmMicrokernelTester()
3999           .mr(4)
4000           .nr(16)
4001           .kr(4)
4002           .sr(1)
4003           .m(4)
4004           .n(n)
4005           .k(k)
4006           .ks(3)
4007           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4008       }
4009     }
4010   }
4011 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm_subtile)4012   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm_subtile) {
4013     TEST_REQUIRES_ARM_NEON_DOT;
4014     for (size_t k = 1; k <= 40; k += 9) {
4015       for (uint32_t n = 1; n <= 16; n++) {
4016         for (uint32_t m = 1; m <= 4; m++) {
4017           GemmMicrokernelTester()
4018             .mr(4)
4019             .nr(16)
4020             .kr(4)
4021             .sr(1)
4022             .m(m)
4023             .n(n)
4024             .k(k)
4025             .cm_stride(19)
4026             .iterations(1)
4027             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4028         }
4029       }
4030     }
4031   }
4032 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,a_offset)4033   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, a_offset) {
4034     TEST_REQUIRES_ARM_NEON_DOT;
4035     for (size_t k = 1; k <= 40; k += 9) {
4036       GemmMicrokernelTester()
4037         .mr(4)
4038         .nr(16)
4039         .kr(4)
4040         .sr(1)
4041         .m(4)
4042         .n(16)
4043         .k(k)
4044         .ks(3)
4045         .a_offset(163)
4046         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4047     }
4048   }
4049 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,zero)4050   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, zero) {
4051     TEST_REQUIRES_ARM_NEON_DOT;
4052     for (size_t k = 1; k <= 40; k += 9) {
4053       for (uint32_t mz = 0; mz < 4; mz++) {
4054         GemmMicrokernelTester()
4055           .mr(4)
4056           .nr(16)
4057           .kr(4)
4058           .sr(1)
4059           .m(4)
4060           .n(16)
4061           .k(k)
4062           .ks(3)
4063           .a_offset(163)
4064           .zero_index(mz)
4065           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4066       }
4067     }
4068   }
4069 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmin)4070   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmin) {
4071     TEST_REQUIRES_ARM_NEON_DOT;
4072     GemmMicrokernelTester()
4073       .mr(4)
4074       .nr(16)
4075       .kr(4)
4076       .sr(1)
4077       .m(4)
4078       .n(16)
4079       .k(8)
4080       .qmin(128)
4081       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4082   }
4083 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmax)4084   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmax) {
4085     TEST_REQUIRES_ARM_NEON_DOT;
4086     GemmMicrokernelTester()
4087       .mr(4)
4088       .nr(16)
4089       .kr(4)
4090       .sr(1)
4091       .m(4)
4092       .n(16)
4093       .k(8)
4094       .qmax(128)
4095       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4096   }
4097 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm)4098   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm) {
4099     TEST_REQUIRES_ARM_NEON_DOT;
4100     GemmMicrokernelTester()
4101       .mr(4)
4102       .nr(16)
4103       .kr(4)
4104       .sr(1)
4105       .m(4)
4106       .n(16)
4107       .k(8)
4108       .cm_stride(19)
4109       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4110   }
4111 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_a_zero_point)4112   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_a_zero_point) {
4113     TEST_REQUIRES_ARM_NEON_DOT;
4114     for (size_t k = 1; k <= 40; k += 9) {
4115       GemmMicrokernelTester()
4116         .mr(4)
4117         .nr(16)
4118         .kr(4)
4119         .sr(1)
4120         .m(4)
4121         .n(16)
4122         .k(k)
4123         .a_zero_point(0)
4124         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4125     }
4126   }
4127 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_b_zero_point)4128   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_b_zero_point) {
4129     TEST_REQUIRES_ARM_NEON_DOT;
4130     for (size_t k = 1; k <= 40; k += 9) {
4131       GemmMicrokernelTester()
4132         .mr(4)
4133         .nr(16)
4134         .kr(4)
4135         .sr(1)
4136         .m(4)
4137         .n(16)
4138         .k(k)
4139         .b_zero_point(0)
4140         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4141     }
4142   }
4143 
TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,no_zero_point)4144   TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, no_zero_point) {
4145     TEST_REQUIRES_ARM_NEON_DOT;
4146     for (size_t k = 1; k <= 40; k += 9) {
4147       GemmMicrokernelTester()
4148         .mr(4)
4149         .nr(16)
4150         .kr(4)
4151         .sr(1)
4152         .m(4)
4153         .n(16)
4154         .k(k)
4155         .a_zero_point(0)
4156         .b_zero_point(0)
4157         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
4158     }
4159   }
4160 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
4161 
4162 
4163 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)4164   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
4165     TEST_REQUIRES_X86_SSE2;
4166     GemmMicrokernelTester()
4167       .mr(1)
4168       .nr(4)
4169       .kr(2)
4170       .sr(1)
4171       .m(1)
4172       .n(4)
4173       .k(8)
4174       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4175   }
4176 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)4177   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
4178     TEST_REQUIRES_X86_SSE2;
4179     GemmMicrokernelTester()
4180       .mr(1)
4181       .nr(4)
4182       .kr(2)
4183       .sr(1)
4184       .m(1)
4185       .n(4)
4186       .k(8)
4187       .cn_stride(7)
4188       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4189   }
4190 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)4191   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
4192     TEST_REQUIRES_X86_SSE2;
4193     for (uint32_t n = 1; n <= 4; n++) {
4194       for (uint32_t m = 1; m <= 1; m++) {
4195         GemmMicrokernelTester()
4196           .mr(1)
4197           .nr(4)
4198           .kr(2)
4199           .sr(1)
4200           .m(m)
4201           .n(n)
4202           .k(8)
4203           .iterations(1)
4204           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4205       }
4206     }
4207   }
4208 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)4209   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4210     TEST_REQUIRES_X86_SSE2;
4211     for (uint32_t m = 1; m <= 1; m++) {
4212       GemmMicrokernelTester()
4213         .mr(1)
4214         .nr(4)
4215         .kr(2)
4216         .sr(1)
4217         .m(m)
4218         .n(4)
4219         .k(8)
4220         .iterations(1)
4221         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4222     }
4223   }
4224 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)4225   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4226     TEST_REQUIRES_X86_SSE2;
4227     for (uint32_t n = 1; n <= 4; n++) {
4228       GemmMicrokernelTester()
4229         .mr(1)
4230         .nr(4)
4231         .kr(2)
4232         .sr(1)
4233         .m(1)
4234         .n(n)
4235         .k(8)
4236         .iterations(1)
4237         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4238     }
4239   }
4240 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)4241   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
4242     TEST_REQUIRES_X86_SSE2;
4243     for (size_t k = 1; k < 8; k++) {
4244       GemmMicrokernelTester()
4245         .mr(1)
4246         .nr(4)
4247         .kr(2)
4248         .sr(1)
4249         .m(1)
4250         .n(4)
4251         .k(k)
4252         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4253     }
4254   }
4255 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)4256   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
4257     TEST_REQUIRES_X86_SSE2;
4258     for (size_t k = 1; k < 8; k++) {
4259       for (uint32_t n = 1; n <= 4; n++) {
4260         for (uint32_t m = 1; m <= 1; m++) {
4261           GemmMicrokernelTester()
4262             .mr(1)
4263             .nr(4)
4264             .kr(2)
4265             .sr(1)
4266             .m(m)
4267             .n(n)
4268             .k(k)
4269             .iterations(1)
4270             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4271         }
4272       }
4273     }
4274   }
4275 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)4276   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
4277     TEST_REQUIRES_X86_SSE2;
4278     for (size_t k = 9; k < 16; k++) {
4279       GemmMicrokernelTester()
4280         .mr(1)
4281         .nr(4)
4282         .kr(2)
4283         .sr(1)
4284         .m(1)
4285         .n(4)
4286         .k(k)
4287         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4288     }
4289   }
4290 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)4291   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
4292     TEST_REQUIRES_X86_SSE2;
4293     for (size_t k = 9; k < 16; k++) {
4294       for (uint32_t n = 1; n <= 4; n++) {
4295         for (uint32_t m = 1; m <= 1; m++) {
4296           GemmMicrokernelTester()
4297             .mr(1)
4298             .nr(4)
4299             .kr(2)
4300             .sr(1)
4301             .m(m)
4302             .n(n)
4303             .k(k)
4304             .iterations(1)
4305             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4306         }
4307       }
4308     }
4309   }
4310 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)4311   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
4312     TEST_REQUIRES_X86_SSE2;
4313     for (size_t k = 16; k <= 80; k += 8) {
4314       GemmMicrokernelTester()
4315         .mr(1)
4316         .nr(4)
4317         .kr(2)
4318         .sr(1)
4319         .m(1)
4320         .n(4)
4321         .k(k)
4322         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4323     }
4324   }
4325 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)4326   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
4327     TEST_REQUIRES_X86_SSE2;
4328     for (size_t k = 16; k <= 80; k += 8) {
4329       for (uint32_t n = 1; n <= 4; n++) {
4330         for (uint32_t m = 1; m <= 1; m++) {
4331           GemmMicrokernelTester()
4332             .mr(1)
4333             .nr(4)
4334             .kr(2)
4335             .sr(1)
4336             .m(m)
4337             .n(n)
4338             .k(k)
4339             .iterations(1)
4340             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4341         }
4342       }
4343     }
4344   }
4345 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)4346   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
4347     TEST_REQUIRES_X86_SSE2;
4348     for (uint32_t n = 5; n < 8; n++) {
4349       for (size_t k = 1; k <= 40; k += 9) {
4350         GemmMicrokernelTester()
4351           .mr(1)
4352           .nr(4)
4353           .kr(2)
4354           .sr(1)
4355           .m(1)
4356           .n(n)
4357           .k(k)
4358           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4359       }
4360     }
4361   }
4362 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)4363   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4364     TEST_REQUIRES_X86_SSE2;
4365     for (uint32_t n = 5; n < 8; n++) {
4366       for (size_t k = 1; k <= 40; k += 9) {
4367         GemmMicrokernelTester()
4368           .mr(1)
4369           .nr(4)
4370           .kr(2)
4371           .sr(1)
4372           .m(1)
4373           .n(n)
4374           .k(k)
4375           .cn_stride(7)
4376           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4377       }
4378     }
4379   }
4380 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)4381   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
4382     TEST_REQUIRES_X86_SSE2;
4383     for (uint32_t n = 5; n < 8; n++) {
4384       for (size_t k = 1; k <= 40; k += 9) {
4385         for (uint32_t m = 1; m <= 1; m++) {
4386           GemmMicrokernelTester()
4387             .mr(1)
4388             .nr(4)
4389             .kr(2)
4390             .sr(1)
4391             .m(m)
4392             .n(n)
4393             .k(k)
4394             .iterations(1)
4395             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4396         }
4397       }
4398     }
4399   }
4400 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)4401   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
4402     TEST_REQUIRES_X86_SSE2;
4403     for (uint32_t n = 8; n <= 12; n += 4) {
4404       for (size_t k = 1; k <= 40; k += 9) {
4405         GemmMicrokernelTester()
4406           .mr(1)
4407           .nr(4)
4408           .kr(2)
4409           .sr(1)
4410           .m(1)
4411           .n(n)
4412           .k(k)
4413           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4414       }
4415     }
4416   }
4417 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)4418   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
4419     TEST_REQUIRES_X86_SSE2;
4420     for (uint32_t n = 8; n <= 12; n += 4) {
4421       for (size_t k = 1; k <= 40; k += 9) {
4422         GemmMicrokernelTester()
4423           .mr(1)
4424           .nr(4)
4425           .kr(2)
4426           .sr(1)
4427           .m(1)
4428           .n(n)
4429           .k(k)
4430           .cn_stride(7)
4431           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4432       }
4433     }
4434   }
4435 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)4436   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
4437     TEST_REQUIRES_X86_SSE2;
4438     for (uint32_t n = 8; n <= 12; n += 4) {
4439       for (size_t k = 1; k <= 40; k += 9) {
4440         for (uint32_t m = 1; m <= 1; m++) {
4441           GemmMicrokernelTester()
4442             .mr(1)
4443             .nr(4)
4444             .kr(2)
4445             .sr(1)
4446             .m(m)
4447             .n(n)
4448             .k(k)
4449             .iterations(1)
4450             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4451         }
4452       }
4453     }
4454   }
4455 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel)4456   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
4457     TEST_REQUIRES_X86_SSE2;
4458     for (size_t k = 1; k <= 40; k += 9) {
4459       GemmMicrokernelTester()
4460         .mr(1)
4461         .nr(4)
4462         .kr(2)
4463         .sr(1)
4464         .m(1)
4465         .n(4)
4466         .k(k)
4467         .ks(3)
4468         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4469     }
4470   }
4471 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel_subtile)4472   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
4473     TEST_REQUIRES_X86_SSE2;
4474     for (size_t k = 1; k <= 40; k += 9) {
4475       for (uint32_t n = 1; n <= 4; n++) {
4476         for (uint32_t m = 1; m <= 1; m++) {
4477           GemmMicrokernelTester()
4478             .mr(1)
4479             .nr(4)
4480             .kr(2)
4481             .sr(1)
4482             .m(m)
4483             .n(n)
4484             .k(k)
4485             .ks(3)
4486             .iterations(1)
4487             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4488         }
4489       }
4490     }
4491   }
4492 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_small_kernel)4493   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
4494     TEST_REQUIRES_X86_SSE2;
4495     for (uint32_t n = 5; n < 8; n++) {
4496       for (size_t k = 1; k <= 40; k += 9) {
4497         GemmMicrokernelTester()
4498           .mr(1)
4499           .nr(4)
4500           .kr(2)
4501           .sr(1)
4502           .m(1)
4503           .n(n)
4504           .k(k)
4505           .ks(3)
4506           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4507       }
4508     }
4509   }
4510 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_small_kernel)4511   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
4512     TEST_REQUIRES_X86_SSE2;
4513     for (uint32_t n = 8; n <= 12; n += 4) {
4514       for (size_t k = 1; k <= 40; k += 9) {
4515         GemmMicrokernelTester()
4516           .mr(1)
4517           .nr(4)
4518           .kr(2)
4519           .sr(1)
4520           .m(1)
4521           .n(n)
4522           .k(k)
4523           .ks(3)
4524           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4525       }
4526     }
4527   }
4528 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)4529   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
4530     TEST_REQUIRES_X86_SSE2;
4531     for (size_t k = 1; k <= 40; k += 9) {
4532       for (uint32_t n = 1; n <= 4; n++) {
4533         for (uint32_t m = 1; m <= 1; m++) {
4534           GemmMicrokernelTester()
4535             .mr(1)
4536             .nr(4)
4537             .kr(2)
4538             .sr(1)
4539             .m(m)
4540             .n(n)
4541             .k(k)
4542             .cm_stride(7)
4543             .iterations(1)
4544             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4545         }
4546       }
4547     }
4548   }
4549 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,a_offset)4550   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
4551     TEST_REQUIRES_X86_SSE2;
4552     for (size_t k = 1; k <= 40; k += 9) {
4553       GemmMicrokernelTester()
4554         .mr(1)
4555         .nr(4)
4556         .kr(2)
4557         .sr(1)
4558         .m(1)
4559         .n(4)
4560         .k(k)
4561         .ks(3)
4562         .a_offset(43)
4563         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4564     }
4565   }
4566 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,zero)4567   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
4568     TEST_REQUIRES_X86_SSE2;
4569     for (size_t k = 1; k <= 40; k += 9) {
4570       for (uint32_t mz = 0; mz < 1; mz++) {
4571         GemmMicrokernelTester()
4572           .mr(1)
4573           .nr(4)
4574           .kr(2)
4575           .sr(1)
4576           .m(1)
4577           .n(4)
4578           .k(k)
4579           .ks(3)
4580           .a_offset(43)
4581           .zero_index(mz)
4582           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4583       }
4584     }
4585   }
4586 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)4587   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
4588     TEST_REQUIRES_X86_SSE2;
4589     GemmMicrokernelTester()
4590       .mr(1)
4591       .nr(4)
4592       .kr(2)
4593       .sr(1)
4594       .m(1)
4595       .n(4)
4596       .k(8)
4597       .qmin(128)
4598       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4599   }
4600 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)4601   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
4602     TEST_REQUIRES_X86_SSE2;
4603     GemmMicrokernelTester()
4604       .mr(1)
4605       .nr(4)
4606       .kr(2)
4607       .sr(1)
4608       .m(1)
4609       .n(4)
4610       .k(8)
4611       .qmax(128)
4612       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4613   }
4614 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)4615   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
4616     TEST_REQUIRES_X86_SSE2;
4617     GemmMicrokernelTester()
4618       .mr(1)
4619       .nr(4)
4620       .kr(2)
4621       .sr(1)
4622       .m(1)
4623       .n(4)
4624       .k(8)
4625       .cm_stride(7)
4626       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4627   }
4628 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_a_zero_point)4629   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_a_zero_point) {
4630     TEST_REQUIRES_X86_SSE2;
4631     for (size_t k = 1; k <= 40; k += 9) {
4632       GemmMicrokernelTester()
4633         .mr(1)
4634         .nr(4)
4635         .kr(2)
4636         .sr(1)
4637         .m(1)
4638         .n(4)
4639         .k(k)
4640         .a_zero_point(0)
4641         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4642     }
4643   }
4644 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_b_zero_point)4645   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_b_zero_point) {
4646     TEST_REQUIRES_X86_SSE2;
4647     for (size_t k = 1; k <= 40; k += 9) {
4648       GemmMicrokernelTester()
4649         .mr(1)
4650         .nr(4)
4651         .kr(2)
4652         .sr(1)
4653         .m(1)
4654         .n(4)
4655         .k(k)
4656         .b_zero_point(0)
4657         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4658     }
4659   }
4660 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,no_zero_point)4661   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_zero_point) {
4662     TEST_REQUIRES_X86_SSE2;
4663     for (size_t k = 1; k <= 40; k += 9) {
4664       GemmMicrokernelTester()
4665         .mr(1)
4666         .nr(4)
4667         .kr(2)
4668         .sr(1)
4669         .m(1)
4670         .n(4)
4671         .k(k)
4672         .a_zero_point(0)
4673         .b_zero_point(0)
4674         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4675     }
4676   }
4677 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4678 
4679 
4680 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8)4681   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
4682     TEST_REQUIRES_X86_SSE2;
4683     GemmMicrokernelTester()
4684       .mr(2)
4685       .nr(4)
4686       .kr(2)
4687       .sr(1)
4688       .m(2)
4689       .n(4)
4690       .k(8)
4691       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4692   }
4693 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cn)4694   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
4695     TEST_REQUIRES_X86_SSE2;
4696     GemmMicrokernelTester()
4697       .mr(2)
4698       .nr(4)
4699       .kr(2)
4700       .sr(1)
4701       .m(2)
4702       .n(4)
4703       .k(8)
4704       .cn_stride(7)
4705       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4706   }
4707 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile)4708   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
4709     TEST_REQUIRES_X86_SSE2;
4710     for (uint32_t n = 1; n <= 4; n++) {
4711       for (uint32_t m = 1; m <= 2; m++) {
4712         GemmMicrokernelTester()
4713           .mr(2)
4714           .nr(4)
4715           .kr(2)
4716           .sr(1)
4717           .m(m)
4718           .n(n)
4719           .k(8)
4720           .iterations(1)
4721           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4722       }
4723     }
4724   }
4725 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_m)4726   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
4727     TEST_REQUIRES_X86_SSE2;
4728     for (uint32_t m = 1; m <= 2; m++) {
4729       GemmMicrokernelTester()
4730         .mr(2)
4731         .nr(4)
4732         .kr(2)
4733         .sr(1)
4734         .m(m)
4735         .n(4)
4736         .k(8)
4737         .iterations(1)
4738         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4739     }
4740   }
4741 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_n)4742   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
4743     TEST_REQUIRES_X86_SSE2;
4744     for (uint32_t n = 1; n <= 4; n++) {
4745       GemmMicrokernelTester()
4746         .mr(2)
4747         .nr(4)
4748         .kr(2)
4749         .sr(1)
4750         .m(2)
4751         .n(n)
4752         .k(8)
4753         .iterations(1)
4754         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4755     }
4756   }
4757 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8)4758   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
4759     TEST_REQUIRES_X86_SSE2;
4760     for (size_t k = 1; k < 8; k++) {
4761       GemmMicrokernelTester()
4762         .mr(2)
4763         .nr(4)
4764         .kr(2)
4765         .sr(1)
4766         .m(2)
4767         .n(4)
4768         .k(k)
4769         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4770     }
4771   }
4772 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_subtile)4773   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
4774     TEST_REQUIRES_X86_SSE2;
4775     for (size_t k = 1; k < 8; k++) {
4776       for (uint32_t n = 1; n <= 4; n++) {
4777         for (uint32_t m = 1; m <= 2; m++) {
4778           GemmMicrokernelTester()
4779             .mr(2)
4780             .nr(4)
4781             .kr(2)
4782             .sr(1)
4783             .m(m)
4784             .n(n)
4785             .k(k)
4786             .iterations(1)
4787             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4788         }
4789       }
4790     }
4791   }
4792 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8)4793   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
4794     TEST_REQUIRES_X86_SSE2;
4795     for (size_t k = 9; k < 16; k++) {
4796       GemmMicrokernelTester()
4797         .mr(2)
4798         .nr(4)
4799         .kr(2)
4800         .sr(1)
4801         .m(2)
4802         .n(4)
4803         .k(k)
4804         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4805     }
4806   }
4807 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_subtile)4808   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
4809     TEST_REQUIRES_X86_SSE2;
4810     for (size_t k = 9; k < 16; k++) {
4811       for (uint32_t n = 1; n <= 4; n++) {
4812         for (uint32_t m = 1; m <= 2; m++) {
4813           GemmMicrokernelTester()
4814             .mr(2)
4815             .nr(4)
4816             .kr(2)
4817             .sr(1)
4818             .m(m)
4819             .n(n)
4820             .k(k)
4821             .iterations(1)
4822             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4823         }
4824       }
4825     }
4826   }
4827 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8)4828   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
4829     TEST_REQUIRES_X86_SSE2;
4830     for (size_t k = 16; k <= 80; k += 8) {
4831       GemmMicrokernelTester()
4832         .mr(2)
4833         .nr(4)
4834         .kr(2)
4835         .sr(1)
4836         .m(2)
4837         .n(4)
4838         .k(k)
4839         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4840     }
4841   }
4842 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_subtile)4843   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
4844     TEST_REQUIRES_X86_SSE2;
4845     for (size_t k = 16; k <= 80; k += 8) {
4846       for (uint32_t n = 1; n <= 4; n++) {
4847         for (uint32_t m = 1; m <= 2; m++) {
4848           GemmMicrokernelTester()
4849             .mr(2)
4850             .nr(4)
4851             .kr(2)
4852             .sr(1)
4853             .m(m)
4854             .n(n)
4855             .k(k)
4856             .iterations(1)
4857             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4858         }
4859       }
4860     }
4861   }
4862 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4)4863   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
4864     TEST_REQUIRES_X86_SSE2;
4865     for (uint32_t n = 5; n < 8; n++) {
4866       for (size_t k = 1; k <= 40; k += 9) {
4867         GemmMicrokernelTester()
4868           .mr(2)
4869           .nr(4)
4870           .kr(2)
4871           .sr(1)
4872           .m(2)
4873           .n(n)
4874           .k(k)
4875           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4876       }
4877     }
4878   }
4879 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_cn)4880   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
4881     TEST_REQUIRES_X86_SSE2;
4882     for (uint32_t n = 5; n < 8; n++) {
4883       for (size_t k = 1; k <= 40; k += 9) {
4884         GemmMicrokernelTester()
4885           .mr(2)
4886           .nr(4)
4887           .kr(2)
4888           .sr(1)
4889           .m(2)
4890           .n(n)
4891           .k(k)
4892           .cn_stride(7)
4893           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4894       }
4895     }
4896   }
4897 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_subtile)4898   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
4899     TEST_REQUIRES_X86_SSE2;
4900     for (uint32_t n = 5; n < 8; n++) {
4901       for (size_t k = 1; k <= 40; k += 9) {
4902         for (uint32_t m = 1; m <= 2; m++) {
4903           GemmMicrokernelTester()
4904             .mr(2)
4905             .nr(4)
4906             .kr(2)
4907             .sr(1)
4908             .m(m)
4909             .n(n)
4910             .k(k)
4911             .iterations(1)
4912             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4913         }
4914       }
4915     }
4916   }
4917 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4)4918   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
4919     TEST_REQUIRES_X86_SSE2;
4920     for (uint32_t n = 8; n <= 12; n += 4) {
4921       for (size_t k = 1; k <= 40; k += 9) {
4922         GemmMicrokernelTester()
4923           .mr(2)
4924           .nr(4)
4925           .kr(2)
4926           .sr(1)
4927           .m(2)
4928           .n(n)
4929           .k(k)
4930           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4931       }
4932     }
4933   }
4934 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_cn)4935   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
4936     TEST_REQUIRES_X86_SSE2;
4937     for (uint32_t n = 8; n <= 12; n += 4) {
4938       for (size_t k = 1; k <= 40; k += 9) {
4939         GemmMicrokernelTester()
4940           .mr(2)
4941           .nr(4)
4942           .kr(2)
4943           .sr(1)
4944           .m(2)
4945           .n(n)
4946           .k(k)
4947           .cn_stride(7)
4948           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4949       }
4950     }
4951   }
4952 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_subtile)4953   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
4954     TEST_REQUIRES_X86_SSE2;
4955     for (uint32_t n = 8; n <= 12; n += 4) {
4956       for (size_t k = 1; k <= 40; k += 9) {
4957         for (uint32_t m = 1; m <= 2; m++) {
4958           GemmMicrokernelTester()
4959             .mr(2)
4960             .nr(4)
4961             .kr(2)
4962             .sr(1)
4963             .m(m)
4964             .n(n)
4965             .k(k)
4966             .iterations(1)
4967             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4968         }
4969       }
4970     }
4971   }
4972 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel)4973   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel) {
4974     TEST_REQUIRES_X86_SSE2;
4975     for (size_t k = 1; k <= 40; k += 9) {
4976       GemmMicrokernelTester()
4977         .mr(2)
4978         .nr(4)
4979         .kr(2)
4980         .sr(1)
4981         .m(2)
4982         .n(4)
4983         .k(k)
4984         .ks(3)
4985         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4986     }
4987   }
4988 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel_subtile)4989   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel_subtile) {
4990     TEST_REQUIRES_X86_SSE2;
4991     for (size_t k = 1; k <= 40; k += 9) {
4992       for (uint32_t n = 1; n <= 4; n++) {
4993         for (uint32_t m = 1; m <= 2; m++) {
4994           GemmMicrokernelTester()
4995             .mr(2)
4996             .nr(4)
4997             .kr(2)
4998             .sr(1)
4999             .m(m)
5000             .n(n)
5001             .k(k)
5002             .ks(3)
5003             .iterations(1)
5004             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5005         }
5006       }
5007     }
5008   }
5009 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_small_kernel)5010   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
5011     TEST_REQUIRES_X86_SSE2;
5012     for (uint32_t n = 5; n < 8; n++) {
5013       for (size_t k = 1; k <= 40; k += 9) {
5014         GemmMicrokernelTester()
5015           .mr(2)
5016           .nr(4)
5017           .kr(2)
5018           .sr(1)
5019           .m(2)
5020           .n(n)
5021           .k(k)
5022           .ks(3)
5023           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5024       }
5025     }
5026   }
5027 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_small_kernel)5028   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
5029     TEST_REQUIRES_X86_SSE2;
5030     for (uint32_t n = 8; n <= 12; n += 4) {
5031       for (size_t k = 1; k <= 40; k += 9) {
5032         GemmMicrokernelTester()
5033           .mr(2)
5034           .nr(4)
5035           .kr(2)
5036           .sr(1)
5037           .m(2)
5038           .n(n)
5039           .k(k)
5040           .ks(3)
5041           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5042       }
5043     }
5044   }
5045 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm_subtile)5046   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
5047     TEST_REQUIRES_X86_SSE2;
5048     for (size_t k = 1; k <= 40; k += 9) {
5049       for (uint32_t n = 1; n <= 4; n++) {
5050         for (uint32_t m = 1; m <= 2; m++) {
5051           GemmMicrokernelTester()
5052             .mr(2)
5053             .nr(4)
5054             .kr(2)
5055             .sr(1)
5056             .m(m)
5057             .n(n)
5058             .k(k)
5059             .cm_stride(7)
5060             .iterations(1)
5061             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5062         }
5063       }
5064     }
5065   }
5066 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,a_offset)5067   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, a_offset) {
5068     TEST_REQUIRES_X86_SSE2;
5069     for (size_t k = 1; k <= 40; k += 9) {
5070       GemmMicrokernelTester()
5071         .mr(2)
5072         .nr(4)
5073         .kr(2)
5074         .sr(1)
5075         .m(2)
5076         .n(4)
5077         .k(k)
5078         .ks(3)
5079         .a_offset(83)
5080         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5081     }
5082   }
5083 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,zero)5084   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, zero) {
5085     TEST_REQUIRES_X86_SSE2;
5086     for (size_t k = 1; k <= 40; k += 9) {
5087       for (uint32_t mz = 0; mz < 2; mz++) {
5088         GemmMicrokernelTester()
5089           .mr(2)
5090           .nr(4)
5091           .kr(2)
5092           .sr(1)
5093           .m(2)
5094           .n(4)
5095           .k(k)
5096           .ks(3)
5097           .a_offset(83)
5098           .zero_index(mz)
5099           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5100       }
5101     }
5102   }
5103 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmin)5104   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
5105     TEST_REQUIRES_X86_SSE2;
5106     GemmMicrokernelTester()
5107       .mr(2)
5108       .nr(4)
5109       .kr(2)
5110       .sr(1)
5111       .m(2)
5112       .n(4)
5113       .k(8)
5114       .qmin(128)
5115       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5116   }
5117 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmax)5118   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
5119     TEST_REQUIRES_X86_SSE2;
5120     GemmMicrokernelTester()
5121       .mr(2)
5122       .nr(4)
5123       .kr(2)
5124       .sr(1)
5125       .m(2)
5126       .n(4)
5127       .k(8)
5128       .qmax(128)
5129       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5130   }
5131 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm)5132   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
5133     TEST_REQUIRES_X86_SSE2;
5134     GemmMicrokernelTester()
5135       .mr(2)
5136       .nr(4)
5137       .kr(2)
5138       .sr(1)
5139       .m(2)
5140       .n(4)
5141       .k(8)
5142       .cm_stride(7)
5143       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5144   }
5145 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_a_zero_point)5146   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_a_zero_point) {
5147     TEST_REQUIRES_X86_SSE2;
5148     for (size_t k = 1; k <= 40; k += 9) {
5149       GemmMicrokernelTester()
5150         .mr(2)
5151         .nr(4)
5152         .kr(2)
5153         .sr(1)
5154         .m(2)
5155         .n(4)
5156         .k(k)
5157         .a_zero_point(0)
5158         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5159     }
5160   }
5161 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_b_zero_point)5162   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_b_zero_point) {
5163     TEST_REQUIRES_X86_SSE2;
5164     for (size_t k = 1; k <= 40; k += 9) {
5165       GemmMicrokernelTester()
5166         .mr(2)
5167         .nr(4)
5168         .kr(2)
5169         .sr(1)
5170         .m(2)
5171         .n(4)
5172         .k(k)
5173         .b_zero_point(0)
5174         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5175     }
5176   }
5177 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,no_zero_point)5178   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_zero_point) {
5179     TEST_REQUIRES_X86_SSE2;
5180     for (size_t k = 1; k <= 40; k += 9) {
5181       GemmMicrokernelTester()
5182         .mr(2)
5183         .nr(4)
5184         .kr(2)
5185         .sr(1)
5186         .m(2)
5187         .n(4)
5188         .k(k)
5189         .a_zero_point(0)
5190         .b_zero_point(0)
5191         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5192     }
5193   }
5194 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5195 
5196 
5197 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)5198   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
5199     TEST_REQUIRES_X86_SSE41;
5200     GemmMicrokernelTester()
5201       .mr(3)
5202       .nr(4)
5203       .kr(2)
5204       .sr(1)
5205       .m(3)
5206       .n(4)
5207       .k(8)
5208       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5209   }
5210 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)5211   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
5212     TEST_REQUIRES_X86_SSE41;
5213     GemmMicrokernelTester()
5214       .mr(3)
5215       .nr(4)
5216       .kr(2)
5217       .sr(1)
5218       .m(3)
5219       .n(4)
5220       .k(8)
5221       .cn_stride(7)
5222       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5223   }
5224 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)5225   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
5226     TEST_REQUIRES_X86_SSE41;
5227     for (uint32_t n = 1; n <= 4; n++) {
5228       for (uint32_t m = 1; m <= 3; m++) {
5229         GemmMicrokernelTester()
5230           .mr(3)
5231           .nr(4)
5232           .kr(2)
5233           .sr(1)
5234           .m(m)
5235           .n(n)
5236           .k(8)
5237           .iterations(1)
5238           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5239       }
5240     }
5241   }
5242 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)5243   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5244     TEST_REQUIRES_X86_SSE41;
5245     for (uint32_t m = 1; m <= 3; m++) {
5246       GemmMicrokernelTester()
5247         .mr(3)
5248         .nr(4)
5249         .kr(2)
5250         .sr(1)
5251         .m(m)
5252         .n(4)
5253         .k(8)
5254         .iterations(1)
5255         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5256     }
5257   }
5258 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)5259   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5260     TEST_REQUIRES_X86_SSE41;
5261     for (uint32_t n = 1; n <= 4; n++) {
5262       GemmMicrokernelTester()
5263         .mr(3)
5264         .nr(4)
5265         .kr(2)
5266         .sr(1)
5267         .m(3)
5268         .n(n)
5269         .k(8)
5270         .iterations(1)
5271         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5272     }
5273   }
5274 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)5275   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
5276     TEST_REQUIRES_X86_SSE41;
5277     for (size_t k = 1; k < 8; k++) {
5278       GemmMicrokernelTester()
5279         .mr(3)
5280         .nr(4)
5281         .kr(2)
5282         .sr(1)
5283         .m(3)
5284         .n(4)
5285         .k(k)
5286         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5287     }
5288   }
5289 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)5290   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
5291     TEST_REQUIRES_X86_SSE41;
5292     for (size_t k = 1; k < 8; k++) {
5293       for (uint32_t n = 1; n <= 4; n++) {
5294         for (uint32_t m = 1; m <= 3; m++) {
5295           GemmMicrokernelTester()
5296             .mr(3)
5297             .nr(4)
5298             .kr(2)
5299             .sr(1)
5300             .m(m)
5301             .n(n)
5302             .k(k)
5303             .iterations(1)
5304             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5305         }
5306       }
5307     }
5308   }
5309 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)5310   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
5311     TEST_REQUIRES_X86_SSE41;
5312     for (size_t k = 9; k < 16; k++) {
5313       GemmMicrokernelTester()
5314         .mr(3)
5315         .nr(4)
5316         .kr(2)
5317         .sr(1)
5318         .m(3)
5319         .n(4)
5320         .k(k)
5321         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5322     }
5323   }
5324 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)5325   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
5326     TEST_REQUIRES_X86_SSE41;
5327     for (size_t k = 9; k < 16; k++) {
5328       for (uint32_t n = 1; n <= 4; n++) {
5329         for (uint32_t m = 1; m <= 3; m++) {
5330           GemmMicrokernelTester()
5331             .mr(3)
5332             .nr(4)
5333             .kr(2)
5334             .sr(1)
5335             .m(m)
5336             .n(n)
5337             .k(k)
5338             .iterations(1)
5339             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5340         }
5341       }
5342     }
5343   }
5344 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)5345   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
5346     TEST_REQUIRES_X86_SSE41;
5347     for (size_t k = 16; k <= 80; k += 8) {
5348       GemmMicrokernelTester()
5349         .mr(3)
5350         .nr(4)
5351         .kr(2)
5352         .sr(1)
5353         .m(3)
5354         .n(4)
5355         .k(k)
5356         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5357     }
5358   }
5359 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)5360   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
5361     TEST_REQUIRES_X86_SSE41;
5362     for (size_t k = 16; k <= 80; k += 8) {
5363       for (uint32_t n = 1; n <= 4; n++) {
5364         for (uint32_t m = 1; m <= 3; m++) {
5365           GemmMicrokernelTester()
5366             .mr(3)
5367             .nr(4)
5368             .kr(2)
5369             .sr(1)
5370             .m(m)
5371             .n(n)
5372             .k(k)
5373             .iterations(1)
5374             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5375         }
5376       }
5377     }
5378   }
5379 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)5380   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
5381     TEST_REQUIRES_X86_SSE41;
5382     for (uint32_t n = 5; n < 8; n++) {
5383       for (size_t k = 1; k <= 40; k += 9) {
5384         GemmMicrokernelTester()
5385           .mr(3)
5386           .nr(4)
5387           .kr(2)
5388           .sr(1)
5389           .m(3)
5390           .n(n)
5391           .k(k)
5392           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5393       }
5394     }
5395   }
5396 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)5397   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5398     TEST_REQUIRES_X86_SSE41;
5399     for (uint32_t n = 5; n < 8; n++) {
5400       for (size_t k = 1; k <= 40; k += 9) {
5401         GemmMicrokernelTester()
5402           .mr(3)
5403           .nr(4)
5404           .kr(2)
5405           .sr(1)
5406           .m(3)
5407           .n(n)
5408           .k(k)
5409           .cn_stride(7)
5410           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5411       }
5412     }
5413   }
5414 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)5415   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
5416     TEST_REQUIRES_X86_SSE41;
5417     for (uint32_t n = 5; n < 8; n++) {
5418       for (size_t k = 1; k <= 40; k += 9) {
5419         for (uint32_t m = 1; m <= 3; m++) {
5420           GemmMicrokernelTester()
5421             .mr(3)
5422             .nr(4)
5423             .kr(2)
5424             .sr(1)
5425             .m(m)
5426             .n(n)
5427             .k(k)
5428             .iterations(1)
5429             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5430         }
5431       }
5432     }
5433   }
5434 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)5435   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
5436     TEST_REQUIRES_X86_SSE41;
5437     for (uint32_t n = 8; n <= 12; n += 4) {
5438       for (size_t k = 1; k <= 40; k += 9) {
5439         GemmMicrokernelTester()
5440           .mr(3)
5441           .nr(4)
5442           .kr(2)
5443           .sr(1)
5444           .m(3)
5445           .n(n)
5446           .k(k)
5447           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5448       }
5449     }
5450   }
5451 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)5452   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
5453     TEST_REQUIRES_X86_SSE41;
5454     for (uint32_t n = 8; n <= 12; n += 4) {
5455       for (size_t k = 1; k <= 40; k += 9) {
5456         GemmMicrokernelTester()
5457           .mr(3)
5458           .nr(4)
5459           .kr(2)
5460           .sr(1)
5461           .m(3)
5462           .n(n)
5463           .k(k)
5464           .cn_stride(7)
5465           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5466       }
5467     }
5468   }
5469 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)5470   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
5471     TEST_REQUIRES_X86_SSE41;
5472     for (uint32_t n = 8; n <= 12; n += 4) {
5473       for (size_t k = 1; k <= 40; k += 9) {
5474         for (uint32_t m = 1; m <= 3; m++) {
5475           GemmMicrokernelTester()
5476             .mr(3)
5477             .nr(4)
5478             .kr(2)
5479             .sr(1)
5480             .m(m)
5481             .n(n)
5482             .k(k)
5483             .iterations(1)
5484             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5485         }
5486       }
5487     }
5488   }
5489 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel)5490   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
5491     TEST_REQUIRES_X86_SSE41;
5492     for (size_t k = 1; k <= 40; k += 9) {
5493       GemmMicrokernelTester()
5494         .mr(3)
5495         .nr(4)
5496         .kr(2)
5497         .sr(1)
5498         .m(3)
5499         .n(4)
5500         .k(k)
5501         .ks(3)
5502         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5503     }
5504   }
5505 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel_subtile)5506   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
5507     TEST_REQUIRES_X86_SSE41;
5508     for (size_t k = 1; k <= 40; k += 9) {
5509       for (uint32_t n = 1; n <= 4; n++) {
5510         for (uint32_t m = 1; m <= 3; m++) {
5511           GemmMicrokernelTester()
5512             .mr(3)
5513             .nr(4)
5514             .kr(2)
5515             .sr(1)
5516             .m(m)
5517             .n(n)
5518             .k(k)
5519             .ks(3)
5520             .iterations(1)
5521             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5522         }
5523       }
5524     }
5525   }
5526 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_small_kernel)5527   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5528     TEST_REQUIRES_X86_SSE41;
5529     for (uint32_t n = 5; n < 8; n++) {
5530       for (size_t k = 1; k <= 40; k += 9) {
5531         GemmMicrokernelTester()
5532           .mr(3)
5533           .nr(4)
5534           .kr(2)
5535           .sr(1)
5536           .m(3)
5537           .n(n)
5538           .k(k)
5539           .ks(3)
5540           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5541       }
5542     }
5543   }
5544 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_small_kernel)5545   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
5546     TEST_REQUIRES_X86_SSE41;
5547     for (uint32_t n = 8; n <= 12; n += 4) {
5548       for (size_t k = 1; k <= 40; k += 9) {
5549         GemmMicrokernelTester()
5550           .mr(3)
5551           .nr(4)
5552           .kr(2)
5553           .sr(1)
5554           .m(3)
5555           .n(n)
5556           .k(k)
5557           .ks(3)
5558           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5559       }
5560     }
5561   }
5562 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)5563   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
5564     TEST_REQUIRES_X86_SSE41;
5565     for (size_t k = 1; k <= 40; k += 9) {
5566       for (uint32_t n = 1; n <= 4; n++) {
5567         for (uint32_t m = 1; m <= 3; m++) {
5568           GemmMicrokernelTester()
5569             .mr(3)
5570             .nr(4)
5571             .kr(2)
5572             .sr(1)
5573             .m(m)
5574             .n(n)
5575             .k(k)
5576             .cm_stride(7)
5577             .iterations(1)
5578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5579         }
5580       }
5581     }
5582   }
5583 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,a_offset)5584   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
5585     TEST_REQUIRES_X86_SSE41;
5586     for (size_t k = 1; k <= 40; k += 9) {
5587       GemmMicrokernelTester()
5588         .mr(3)
5589         .nr(4)
5590         .kr(2)
5591         .sr(1)
5592         .m(3)
5593         .n(4)
5594         .k(k)
5595         .ks(3)
5596         .a_offset(127)
5597         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5598     }
5599   }
5600 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,zero)5601   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
5602     TEST_REQUIRES_X86_SSE41;
5603     for (size_t k = 1; k <= 40; k += 9) {
5604       for (uint32_t mz = 0; mz < 3; mz++) {
5605         GemmMicrokernelTester()
5606           .mr(3)
5607           .nr(4)
5608           .kr(2)
5609           .sr(1)
5610           .m(3)
5611           .n(4)
5612           .k(k)
5613           .ks(3)
5614           .a_offset(127)
5615           .zero_index(mz)
5616           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5617       }
5618     }
5619   }
5620 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)5621   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
5622     TEST_REQUIRES_X86_SSE41;
5623     GemmMicrokernelTester()
5624       .mr(3)
5625       .nr(4)
5626       .kr(2)
5627       .sr(1)
5628       .m(3)
5629       .n(4)
5630       .k(8)
5631       .qmin(128)
5632       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5633   }
5634 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)5635   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
5636     TEST_REQUIRES_X86_SSE41;
5637     GemmMicrokernelTester()
5638       .mr(3)
5639       .nr(4)
5640       .kr(2)
5641       .sr(1)
5642       .m(3)
5643       .n(4)
5644       .k(8)
5645       .qmax(128)
5646       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5647   }
5648 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)5649   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
5650     TEST_REQUIRES_X86_SSE41;
5651     GemmMicrokernelTester()
5652       .mr(3)
5653       .nr(4)
5654       .kr(2)
5655       .sr(1)
5656       .m(3)
5657       .n(4)
5658       .k(8)
5659       .cm_stride(7)
5660       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5661   }
5662 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_a_zero_point)5663   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_a_zero_point) {
5664     TEST_REQUIRES_X86_SSE41;
5665     for (size_t k = 1; k <= 40; k += 9) {
5666       GemmMicrokernelTester()
5667         .mr(3)
5668         .nr(4)
5669         .kr(2)
5670         .sr(1)
5671         .m(3)
5672         .n(4)
5673         .k(k)
5674         .a_zero_point(0)
5675         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5676     }
5677   }
5678 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_b_zero_point)5679   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_b_zero_point) {
5680     TEST_REQUIRES_X86_SSE41;
5681     for (size_t k = 1; k <= 40; k += 9) {
5682       GemmMicrokernelTester()
5683         .mr(3)
5684         .nr(4)
5685         .kr(2)
5686         .sr(1)
5687         .m(3)
5688         .n(4)
5689         .k(k)
5690         .b_zero_point(0)
5691         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5692     }
5693   }
5694 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,no_zero_point)5695   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_zero_point) {
5696     TEST_REQUIRES_X86_SSE41;
5697     for (size_t k = 1; k <= 40; k += 9) {
5698       GemmMicrokernelTester()
5699         .mr(3)
5700         .nr(4)
5701         .kr(2)
5702         .sr(1)
5703         .m(3)
5704         .n(4)
5705         .k(k)
5706         .a_zero_point(0)
5707         .b_zero_point(0)
5708         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5709     }
5710   }
5711 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5712 
5713 
5714 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8)5715   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
5716     TEST_REQUIRES_X86_SSE2;
5717     GemmMicrokernelTester()
5718       .mr(4)
5719       .nr(4)
5720       .kr(2)
5721       .sr(1)
5722       .m(4)
5723       .n(4)
5724       .k(8)
5725       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5726   }
5727 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cn)5728   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
5729     TEST_REQUIRES_X86_SSE2;
5730     GemmMicrokernelTester()
5731       .mr(4)
5732       .nr(4)
5733       .kr(2)
5734       .sr(1)
5735       .m(4)
5736       .n(4)
5737       .k(8)
5738       .cn_stride(7)
5739       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5740   }
5741 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile)5742   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
5743     TEST_REQUIRES_X86_SSE2;
5744     for (uint32_t n = 1; n <= 4; n++) {
5745       for (uint32_t m = 1; m <= 4; m++) {
5746         GemmMicrokernelTester()
5747           .mr(4)
5748           .nr(4)
5749           .kr(2)
5750           .sr(1)
5751           .m(m)
5752           .n(n)
5753           .k(8)
5754           .iterations(1)
5755           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5756       }
5757     }
5758   }
5759 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_m)5760   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
5761     TEST_REQUIRES_X86_SSE2;
5762     for (uint32_t m = 1; m <= 4; m++) {
5763       GemmMicrokernelTester()
5764         .mr(4)
5765         .nr(4)
5766         .kr(2)
5767         .sr(1)
5768         .m(m)
5769         .n(4)
5770         .k(8)
5771         .iterations(1)
5772         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5773     }
5774   }
5775 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_n)5776   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
5777     TEST_REQUIRES_X86_SSE2;
5778     for (uint32_t n = 1; n <= 4; n++) {
5779       GemmMicrokernelTester()
5780         .mr(4)
5781         .nr(4)
5782         .kr(2)
5783         .sr(1)
5784         .m(4)
5785         .n(n)
5786         .k(8)
5787         .iterations(1)
5788         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5789     }
5790   }
5791 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8)5792   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
5793     TEST_REQUIRES_X86_SSE2;
5794     for (size_t k = 1; k < 8; k++) {
5795       GemmMicrokernelTester()
5796         .mr(4)
5797         .nr(4)
5798         .kr(2)
5799         .sr(1)
5800         .m(4)
5801         .n(4)
5802         .k(k)
5803         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5804     }
5805   }
5806 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_subtile)5807   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
5808     TEST_REQUIRES_X86_SSE2;
5809     for (size_t k = 1; k < 8; k++) {
5810       for (uint32_t n = 1; n <= 4; n++) {
5811         for (uint32_t m = 1; m <= 4; m++) {
5812           GemmMicrokernelTester()
5813             .mr(4)
5814             .nr(4)
5815             .kr(2)
5816             .sr(1)
5817             .m(m)
5818             .n(n)
5819             .k(k)
5820             .iterations(1)
5821             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5822         }
5823       }
5824     }
5825   }
5826 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8)5827   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
5828     TEST_REQUIRES_X86_SSE2;
5829     for (size_t k = 9; k < 16; k++) {
5830       GemmMicrokernelTester()
5831         .mr(4)
5832         .nr(4)
5833         .kr(2)
5834         .sr(1)
5835         .m(4)
5836         .n(4)
5837         .k(k)
5838         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5839     }
5840   }
5841 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_subtile)5842   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
5843     TEST_REQUIRES_X86_SSE2;
5844     for (size_t k = 9; k < 16; k++) {
5845       for (uint32_t n = 1; n <= 4; n++) {
5846         for (uint32_t m = 1; m <= 4; m++) {
5847           GemmMicrokernelTester()
5848             .mr(4)
5849             .nr(4)
5850             .kr(2)
5851             .sr(1)
5852             .m(m)
5853             .n(n)
5854             .k(k)
5855             .iterations(1)
5856             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5857         }
5858       }
5859     }
5860   }
5861 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8)5862   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
5863     TEST_REQUIRES_X86_SSE2;
5864     for (size_t k = 16; k <= 80; k += 8) {
5865       GemmMicrokernelTester()
5866         .mr(4)
5867         .nr(4)
5868         .kr(2)
5869         .sr(1)
5870         .m(4)
5871         .n(4)
5872         .k(k)
5873         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5874     }
5875   }
5876 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_subtile)5877   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
5878     TEST_REQUIRES_X86_SSE2;
5879     for (size_t k = 16; k <= 80; k += 8) {
5880       for (uint32_t n = 1; n <= 4; n++) {
5881         for (uint32_t m = 1; m <= 4; m++) {
5882           GemmMicrokernelTester()
5883             .mr(4)
5884             .nr(4)
5885             .kr(2)
5886             .sr(1)
5887             .m(m)
5888             .n(n)
5889             .k(k)
5890             .iterations(1)
5891             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5892         }
5893       }
5894     }
5895   }
5896 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4)5897   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
5898     TEST_REQUIRES_X86_SSE2;
5899     for (uint32_t n = 5; n < 8; n++) {
5900       for (size_t k = 1; k <= 40; k += 9) {
5901         GemmMicrokernelTester()
5902           .mr(4)
5903           .nr(4)
5904           .kr(2)
5905           .sr(1)
5906           .m(4)
5907           .n(n)
5908           .k(k)
5909           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5910       }
5911     }
5912   }
5913 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_cn)5914   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
5915     TEST_REQUIRES_X86_SSE2;
5916     for (uint32_t n = 5; n < 8; n++) {
5917       for (size_t k = 1; k <= 40; k += 9) {
5918         GemmMicrokernelTester()
5919           .mr(4)
5920           .nr(4)
5921           .kr(2)
5922           .sr(1)
5923           .m(4)
5924           .n(n)
5925           .k(k)
5926           .cn_stride(7)
5927           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5928       }
5929     }
5930   }
5931 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_subtile)5932   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
5933     TEST_REQUIRES_X86_SSE2;
5934     for (uint32_t n = 5; n < 8; n++) {
5935       for (size_t k = 1; k <= 40; k += 9) {
5936         for (uint32_t m = 1; m <= 4; m++) {
5937           GemmMicrokernelTester()
5938             .mr(4)
5939             .nr(4)
5940             .kr(2)
5941             .sr(1)
5942             .m(m)
5943             .n(n)
5944             .k(k)
5945             .iterations(1)
5946             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5947         }
5948       }
5949     }
5950   }
5951 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4)5952   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
5953     TEST_REQUIRES_X86_SSE2;
5954     for (uint32_t n = 8; n <= 12; n += 4) {
5955       for (size_t k = 1; k <= 40; k += 9) {
5956         GemmMicrokernelTester()
5957           .mr(4)
5958           .nr(4)
5959           .kr(2)
5960           .sr(1)
5961           .m(4)
5962           .n(n)
5963           .k(k)
5964           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5965       }
5966     }
5967   }
5968 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_cn)5969   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
5970     TEST_REQUIRES_X86_SSE2;
5971     for (uint32_t n = 8; n <= 12; n += 4) {
5972       for (size_t k = 1; k <= 40; k += 9) {
5973         GemmMicrokernelTester()
5974           .mr(4)
5975           .nr(4)
5976           .kr(2)
5977           .sr(1)
5978           .m(4)
5979           .n(n)
5980           .k(k)
5981           .cn_stride(7)
5982           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5983       }
5984     }
5985   }
5986 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_subtile)5987   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
5988     TEST_REQUIRES_X86_SSE2;
5989     for (uint32_t n = 8; n <= 12; n += 4) {
5990       for (size_t k = 1; k <= 40; k += 9) {
5991         for (uint32_t m = 1; m <= 4; m++) {
5992           GemmMicrokernelTester()
5993             .mr(4)
5994             .nr(4)
5995             .kr(2)
5996             .sr(1)
5997             .m(m)
5998             .n(n)
5999             .k(k)
6000             .iterations(1)
6001             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6002         }
6003       }
6004     }
6005   }
6006 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel)6007   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel) {
6008     TEST_REQUIRES_X86_SSE2;
6009     for (size_t k = 1; k <= 40; k += 9) {
6010       GemmMicrokernelTester()
6011         .mr(4)
6012         .nr(4)
6013         .kr(2)
6014         .sr(1)
6015         .m(4)
6016         .n(4)
6017         .k(k)
6018         .ks(3)
6019         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6020     }
6021   }
6022 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel_subtile)6023   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel_subtile) {
6024     TEST_REQUIRES_X86_SSE2;
6025     for (size_t k = 1; k <= 40; k += 9) {
6026       for (uint32_t n = 1; n <= 4; n++) {
6027         for (uint32_t m = 1; m <= 4; m++) {
6028           GemmMicrokernelTester()
6029             .mr(4)
6030             .nr(4)
6031             .kr(2)
6032             .sr(1)
6033             .m(m)
6034             .n(n)
6035             .k(k)
6036             .ks(3)
6037             .iterations(1)
6038             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6039         }
6040       }
6041     }
6042   }
6043 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_small_kernel)6044   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
6045     TEST_REQUIRES_X86_SSE2;
6046     for (uint32_t n = 5; n < 8; n++) {
6047       for (size_t k = 1; k <= 40; k += 9) {
6048         GemmMicrokernelTester()
6049           .mr(4)
6050           .nr(4)
6051           .kr(2)
6052           .sr(1)
6053           .m(4)
6054           .n(n)
6055           .k(k)
6056           .ks(3)
6057           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6058       }
6059     }
6060   }
6061 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_small_kernel)6062   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
6063     TEST_REQUIRES_X86_SSE2;
6064     for (uint32_t n = 8; n <= 12; n += 4) {
6065       for (size_t k = 1; k <= 40; k += 9) {
6066         GemmMicrokernelTester()
6067           .mr(4)
6068           .nr(4)
6069           .kr(2)
6070           .sr(1)
6071           .m(4)
6072           .n(n)
6073           .k(k)
6074           .ks(3)
6075           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6076       }
6077     }
6078   }
6079 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm_subtile)6080   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
6081     TEST_REQUIRES_X86_SSE2;
6082     for (size_t k = 1; k <= 40; k += 9) {
6083       for (uint32_t n = 1; n <= 4; n++) {
6084         for (uint32_t m = 1; m <= 4; m++) {
6085           GemmMicrokernelTester()
6086             .mr(4)
6087             .nr(4)
6088             .kr(2)
6089             .sr(1)
6090             .m(m)
6091             .n(n)
6092             .k(k)
6093             .cm_stride(7)
6094             .iterations(1)
6095             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6096         }
6097       }
6098     }
6099   }
6100 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,a_offset)6101   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, a_offset) {
6102     TEST_REQUIRES_X86_SSE2;
6103     for (size_t k = 1; k <= 40; k += 9) {
6104       GemmMicrokernelTester()
6105         .mr(4)
6106         .nr(4)
6107         .kr(2)
6108         .sr(1)
6109         .m(4)
6110         .n(4)
6111         .k(k)
6112         .ks(3)
6113         .a_offset(163)
6114         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6115     }
6116   }
6117 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,zero)6118   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, zero) {
6119     TEST_REQUIRES_X86_SSE2;
6120     for (size_t k = 1; k <= 40; k += 9) {
6121       for (uint32_t mz = 0; mz < 4; mz++) {
6122         GemmMicrokernelTester()
6123           .mr(4)
6124           .nr(4)
6125           .kr(2)
6126           .sr(1)
6127           .m(4)
6128           .n(4)
6129           .k(k)
6130           .ks(3)
6131           .a_offset(163)
6132           .zero_index(mz)
6133           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6134       }
6135     }
6136   }
6137 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmin)6138   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
6139     TEST_REQUIRES_X86_SSE2;
6140     GemmMicrokernelTester()
6141       .mr(4)
6142       .nr(4)
6143       .kr(2)
6144       .sr(1)
6145       .m(4)
6146       .n(4)
6147       .k(8)
6148       .qmin(128)
6149       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6150   }
6151 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmax)6152   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
6153     TEST_REQUIRES_X86_SSE2;
6154     GemmMicrokernelTester()
6155       .mr(4)
6156       .nr(4)
6157       .kr(2)
6158       .sr(1)
6159       .m(4)
6160       .n(4)
6161       .k(8)
6162       .qmax(128)
6163       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6164   }
6165 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm)6166   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
6167     TEST_REQUIRES_X86_SSE2;
6168     GemmMicrokernelTester()
6169       .mr(4)
6170       .nr(4)
6171       .kr(2)
6172       .sr(1)
6173       .m(4)
6174       .n(4)
6175       .k(8)
6176       .cm_stride(7)
6177       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6178   }
6179 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_a_zero_point)6180   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_a_zero_point) {
6181     TEST_REQUIRES_X86_SSE2;
6182     for (size_t k = 1; k <= 40; k += 9) {
6183       GemmMicrokernelTester()
6184         .mr(4)
6185         .nr(4)
6186         .kr(2)
6187         .sr(1)
6188         .m(4)
6189         .n(4)
6190         .k(k)
6191         .a_zero_point(0)
6192         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6193     }
6194   }
6195 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_b_zero_point)6196   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_b_zero_point) {
6197     TEST_REQUIRES_X86_SSE2;
6198     for (size_t k = 1; k <= 40; k += 9) {
6199       GemmMicrokernelTester()
6200         .mr(4)
6201         .nr(4)
6202         .kr(2)
6203         .sr(1)
6204         .m(4)
6205         .n(4)
6206         .k(k)
6207         .b_zero_point(0)
6208         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6209     }
6210   }
6211 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,no_zero_point)6212   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_zero_point) {
6213     TEST_REQUIRES_X86_SSE2;
6214     for (size_t k = 1; k <= 40; k += 9) {
6215       GemmMicrokernelTester()
6216         .mr(4)
6217         .nr(4)
6218         .kr(2)
6219         .sr(1)
6220         .m(4)
6221         .n(4)
6222         .k(k)
6223         .a_zero_point(0)
6224         .b_zero_point(0)
6225         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6226     }
6227   }
6228 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6229 
6230 
6231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)6232   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
6233     TEST_REQUIRES_X86_AVX;
6234     GemmMicrokernelTester()
6235       .mr(1)
6236       .nr(4)
6237       .kr(2)
6238       .sr(1)
6239       .m(1)
6240       .n(4)
6241       .k(8)
6242       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6243   }
6244 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)6245   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
6246     TEST_REQUIRES_X86_AVX;
6247     GemmMicrokernelTester()
6248       .mr(1)
6249       .nr(4)
6250       .kr(2)
6251       .sr(1)
6252       .m(1)
6253       .n(4)
6254       .k(8)
6255       .cn_stride(7)
6256       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6257   }
6258 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)6259   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
6260     TEST_REQUIRES_X86_AVX;
6261     for (uint32_t n = 1; n <= 4; n++) {
6262       for (uint32_t m = 1; m <= 1; m++) {
6263         GemmMicrokernelTester()
6264           .mr(1)
6265           .nr(4)
6266           .kr(2)
6267           .sr(1)
6268           .m(m)
6269           .n(n)
6270           .k(8)
6271           .iterations(1)
6272           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6273       }
6274     }
6275   }
6276 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)6277   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
6278     TEST_REQUIRES_X86_AVX;
6279     for (uint32_t m = 1; m <= 1; m++) {
6280       GemmMicrokernelTester()
6281         .mr(1)
6282         .nr(4)
6283         .kr(2)
6284         .sr(1)
6285         .m(m)
6286         .n(4)
6287         .k(8)
6288         .iterations(1)
6289         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6290     }
6291   }
6292 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)6293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
6294     TEST_REQUIRES_X86_AVX;
6295     for (uint32_t n = 1; n <= 4; n++) {
6296       GemmMicrokernelTester()
6297         .mr(1)
6298         .nr(4)
6299         .kr(2)
6300         .sr(1)
6301         .m(1)
6302         .n(n)
6303         .k(8)
6304         .iterations(1)
6305         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6306     }
6307   }
6308 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)6309   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
6310     TEST_REQUIRES_X86_AVX;
6311     for (size_t k = 1; k < 8; k++) {
6312       GemmMicrokernelTester()
6313         .mr(1)
6314         .nr(4)
6315         .kr(2)
6316         .sr(1)
6317         .m(1)
6318         .n(4)
6319         .k(k)
6320         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6321     }
6322   }
6323 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)6324   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
6325     TEST_REQUIRES_X86_AVX;
6326     for (size_t k = 1; k < 8; k++) {
6327       for (uint32_t n = 1; n <= 4; n++) {
6328         for (uint32_t m = 1; m <= 1; m++) {
6329           GemmMicrokernelTester()
6330             .mr(1)
6331             .nr(4)
6332             .kr(2)
6333             .sr(1)
6334             .m(m)
6335             .n(n)
6336             .k(k)
6337             .iterations(1)
6338             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6339         }
6340       }
6341     }
6342   }
6343 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)6344   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
6345     TEST_REQUIRES_X86_AVX;
6346     for (size_t k = 9; k < 16; k++) {
6347       GemmMicrokernelTester()
6348         .mr(1)
6349         .nr(4)
6350         .kr(2)
6351         .sr(1)
6352         .m(1)
6353         .n(4)
6354         .k(k)
6355         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6356     }
6357   }
6358 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)6359   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
6360     TEST_REQUIRES_X86_AVX;
6361     for (size_t k = 9; k < 16; k++) {
6362       for (uint32_t n = 1; n <= 4; n++) {
6363         for (uint32_t m = 1; m <= 1; m++) {
6364           GemmMicrokernelTester()
6365             .mr(1)
6366             .nr(4)
6367             .kr(2)
6368             .sr(1)
6369             .m(m)
6370             .n(n)
6371             .k(k)
6372             .iterations(1)
6373             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6374         }
6375       }
6376     }
6377   }
6378 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)6379   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
6380     TEST_REQUIRES_X86_AVX;
6381     for (size_t k = 16; k <= 80; k += 8) {
6382       GemmMicrokernelTester()
6383         .mr(1)
6384         .nr(4)
6385         .kr(2)
6386         .sr(1)
6387         .m(1)
6388         .n(4)
6389         .k(k)
6390         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6391     }
6392   }
6393 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)6394   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
6395     TEST_REQUIRES_X86_AVX;
6396     for (size_t k = 16; k <= 80; k += 8) {
6397       for (uint32_t n = 1; n <= 4; n++) {
6398         for (uint32_t m = 1; m <= 1; m++) {
6399           GemmMicrokernelTester()
6400             .mr(1)
6401             .nr(4)
6402             .kr(2)
6403             .sr(1)
6404             .m(m)
6405             .n(n)
6406             .k(k)
6407             .iterations(1)
6408             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6409         }
6410       }
6411     }
6412   }
6413 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)6414   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
6415     TEST_REQUIRES_X86_AVX;
6416     for (uint32_t n = 5; n < 8; n++) {
6417       for (size_t k = 1; k <= 40; k += 9) {
6418         GemmMicrokernelTester()
6419           .mr(1)
6420           .nr(4)
6421           .kr(2)
6422           .sr(1)
6423           .m(1)
6424           .n(n)
6425           .k(k)
6426           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6427       }
6428     }
6429   }
6430 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)6431   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
6432     TEST_REQUIRES_X86_AVX;
6433     for (uint32_t n = 5; n < 8; n++) {
6434       for (size_t k = 1; k <= 40; k += 9) {
6435         GemmMicrokernelTester()
6436           .mr(1)
6437           .nr(4)
6438           .kr(2)
6439           .sr(1)
6440           .m(1)
6441           .n(n)
6442           .k(k)
6443           .cn_stride(7)
6444           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6445       }
6446     }
6447   }
6448 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)6449   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
6450     TEST_REQUIRES_X86_AVX;
6451     for (uint32_t n = 5; n < 8; n++) {
6452       for (size_t k = 1; k <= 40; k += 9) {
6453         for (uint32_t m = 1; m <= 1; m++) {
6454           GemmMicrokernelTester()
6455             .mr(1)
6456             .nr(4)
6457             .kr(2)
6458             .sr(1)
6459             .m(m)
6460             .n(n)
6461             .k(k)
6462             .iterations(1)
6463             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6464         }
6465       }
6466     }
6467   }
6468 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)6469   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
6470     TEST_REQUIRES_X86_AVX;
6471     for (uint32_t n = 8; n <= 12; n += 4) {
6472       for (size_t k = 1; k <= 40; k += 9) {
6473         GemmMicrokernelTester()
6474           .mr(1)
6475           .nr(4)
6476           .kr(2)
6477           .sr(1)
6478           .m(1)
6479           .n(n)
6480           .k(k)
6481           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6482       }
6483     }
6484   }
6485 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)6486   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
6487     TEST_REQUIRES_X86_AVX;
6488     for (uint32_t n = 8; n <= 12; n += 4) {
6489       for (size_t k = 1; k <= 40; k += 9) {
6490         GemmMicrokernelTester()
6491           .mr(1)
6492           .nr(4)
6493           .kr(2)
6494           .sr(1)
6495           .m(1)
6496           .n(n)
6497           .k(k)
6498           .cn_stride(7)
6499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6500       }
6501     }
6502   }
6503 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)6504   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
6505     TEST_REQUIRES_X86_AVX;
6506     for (uint32_t n = 8; n <= 12; n += 4) {
6507       for (size_t k = 1; k <= 40; k += 9) {
6508         for (uint32_t m = 1; m <= 1; m++) {
6509           GemmMicrokernelTester()
6510             .mr(1)
6511             .nr(4)
6512             .kr(2)
6513             .sr(1)
6514             .m(m)
6515             .n(n)
6516             .k(k)
6517             .iterations(1)
6518             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6519         }
6520       }
6521     }
6522   }
6523 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel)6524   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
6525     TEST_REQUIRES_X86_AVX;
6526     for (size_t k = 1; k <= 40; k += 9) {
6527       GemmMicrokernelTester()
6528         .mr(1)
6529         .nr(4)
6530         .kr(2)
6531         .sr(1)
6532         .m(1)
6533         .n(4)
6534         .k(k)
6535         .ks(3)
6536         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6537     }
6538   }
6539 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel_subtile)6540   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
6541     TEST_REQUIRES_X86_AVX;
6542     for (size_t k = 1; k <= 40; k += 9) {
6543       for (uint32_t n = 1; n <= 4; n++) {
6544         for (uint32_t m = 1; m <= 1; m++) {
6545           GemmMicrokernelTester()
6546             .mr(1)
6547             .nr(4)
6548             .kr(2)
6549             .sr(1)
6550             .m(m)
6551             .n(n)
6552             .k(k)
6553             .ks(3)
6554             .iterations(1)
6555             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6556         }
6557       }
6558     }
6559   }
6560 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_small_kernel)6561   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
6562     TEST_REQUIRES_X86_AVX;
6563     for (uint32_t n = 5; n < 8; n++) {
6564       for (size_t k = 1; k <= 40; k += 9) {
6565         GemmMicrokernelTester()
6566           .mr(1)
6567           .nr(4)
6568           .kr(2)
6569           .sr(1)
6570           .m(1)
6571           .n(n)
6572           .k(k)
6573           .ks(3)
6574           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6575       }
6576     }
6577   }
6578 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_small_kernel)6579   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
6580     TEST_REQUIRES_X86_AVX;
6581     for (uint32_t n = 8; n <= 12; n += 4) {
6582       for (size_t k = 1; k <= 40; k += 9) {
6583         GemmMicrokernelTester()
6584           .mr(1)
6585           .nr(4)
6586           .kr(2)
6587           .sr(1)
6588           .m(1)
6589           .n(n)
6590           .k(k)
6591           .ks(3)
6592           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6593       }
6594     }
6595   }
6596 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)6597   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
6598     TEST_REQUIRES_X86_AVX;
6599     for (size_t k = 1; k <= 40; k += 9) {
6600       for (uint32_t n = 1; n <= 4; n++) {
6601         for (uint32_t m = 1; m <= 1; m++) {
6602           GemmMicrokernelTester()
6603             .mr(1)
6604             .nr(4)
6605             .kr(2)
6606             .sr(1)
6607             .m(m)
6608             .n(n)
6609             .k(k)
6610             .cm_stride(7)
6611             .iterations(1)
6612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6613         }
6614       }
6615     }
6616   }
6617 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,a_offset)6618   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
6619     TEST_REQUIRES_X86_AVX;
6620     for (size_t k = 1; k <= 40; k += 9) {
6621       GemmMicrokernelTester()
6622         .mr(1)
6623         .nr(4)
6624         .kr(2)
6625         .sr(1)
6626         .m(1)
6627         .n(4)
6628         .k(k)
6629         .ks(3)
6630         .a_offset(43)
6631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6632     }
6633   }
6634 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,zero)6635   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
6636     TEST_REQUIRES_X86_AVX;
6637     for (size_t k = 1; k <= 40; k += 9) {
6638       for (uint32_t mz = 0; mz < 1; mz++) {
6639         GemmMicrokernelTester()
6640           .mr(1)
6641           .nr(4)
6642           .kr(2)
6643           .sr(1)
6644           .m(1)
6645           .n(4)
6646           .k(k)
6647           .ks(3)
6648           .a_offset(43)
6649           .zero_index(mz)
6650           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6651       }
6652     }
6653   }
6654 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)6655   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
6656     TEST_REQUIRES_X86_AVX;
6657     GemmMicrokernelTester()
6658       .mr(1)
6659       .nr(4)
6660       .kr(2)
6661       .sr(1)
6662       .m(1)
6663       .n(4)
6664       .k(8)
6665       .qmin(128)
6666       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6667   }
6668 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)6669   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
6670     TEST_REQUIRES_X86_AVX;
6671     GemmMicrokernelTester()
6672       .mr(1)
6673       .nr(4)
6674       .kr(2)
6675       .sr(1)
6676       .m(1)
6677       .n(4)
6678       .k(8)
6679       .qmax(128)
6680       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6681   }
6682 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)6683   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
6684     TEST_REQUIRES_X86_AVX;
6685     GemmMicrokernelTester()
6686       .mr(1)
6687       .nr(4)
6688       .kr(2)
6689       .sr(1)
6690       .m(1)
6691       .n(4)
6692       .k(8)
6693       .cm_stride(7)
6694       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6695   }
6696 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_a_zero_point)6697   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_a_zero_point) {
6698     TEST_REQUIRES_X86_AVX;
6699     for (size_t k = 1; k <= 40; k += 9) {
6700       GemmMicrokernelTester()
6701         .mr(1)
6702         .nr(4)
6703         .kr(2)
6704         .sr(1)
6705         .m(1)
6706         .n(4)
6707         .k(k)
6708         .a_zero_point(0)
6709         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6710     }
6711   }
6712 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_b_zero_point)6713   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_b_zero_point) {
6714     TEST_REQUIRES_X86_AVX;
6715     for (size_t k = 1; k <= 40; k += 9) {
6716       GemmMicrokernelTester()
6717         .mr(1)
6718         .nr(4)
6719         .kr(2)
6720         .sr(1)
6721         .m(1)
6722         .n(4)
6723         .k(k)
6724         .b_zero_point(0)
6725         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6726     }
6727   }
6728 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,no_zero_point)6729   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_zero_point) {
6730     TEST_REQUIRES_X86_AVX;
6731     for (size_t k = 1; k <= 40; k += 9) {
6732       GemmMicrokernelTester()
6733         .mr(1)
6734         .nr(4)
6735         .kr(2)
6736         .sr(1)
6737         .m(1)
6738         .n(4)
6739         .k(k)
6740         .a_zero_point(0)
6741         .b_zero_point(0)
6742         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6743     }
6744   }
6745 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6746 
6747 
6748 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8)6749   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
6750     TEST_REQUIRES_X86_XOP;
6751     GemmMicrokernelTester()
6752       .mr(1)
6753       .nr(4)
6754       .kr(2)
6755       .sr(1)
6756       .m(1)
6757       .n(4)
6758       .k(8)
6759       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6760   }
6761 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cn)6762   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
6763     TEST_REQUIRES_X86_XOP;
6764     GemmMicrokernelTester()
6765       .mr(1)
6766       .nr(4)
6767       .kr(2)
6768       .sr(1)
6769       .m(1)
6770       .n(4)
6771       .k(8)
6772       .cn_stride(7)
6773       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6774   }
6775 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile)6776   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
6777     TEST_REQUIRES_X86_XOP;
6778     for (uint32_t n = 1; n <= 4; n++) {
6779       for (uint32_t m = 1; m <= 1; m++) {
6780         GemmMicrokernelTester()
6781           .mr(1)
6782           .nr(4)
6783           .kr(2)
6784           .sr(1)
6785           .m(m)
6786           .n(n)
6787           .k(8)
6788           .iterations(1)
6789           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6790       }
6791     }
6792   }
6793 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_m)6794   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
6795     TEST_REQUIRES_X86_XOP;
6796     for (uint32_t m = 1; m <= 1; m++) {
6797       GemmMicrokernelTester()
6798         .mr(1)
6799         .nr(4)
6800         .kr(2)
6801         .sr(1)
6802         .m(m)
6803         .n(4)
6804         .k(8)
6805         .iterations(1)
6806         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6807     }
6808   }
6809 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_n)6810   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
6811     TEST_REQUIRES_X86_XOP;
6812     for (uint32_t n = 1; n <= 4; n++) {
6813       GemmMicrokernelTester()
6814         .mr(1)
6815         .nr(4)
6816         .kr(2)
6817         .sr(1)
6818         .m(1)
6819         .n(n)
6820         .k(8)
6821         .iterations(1)
6822         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6823     }
6824   }
6825 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8)6826   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
6827     TEST_REQUIRES_X86_XOP;
6828     for (size_t k = 1; k < 8; k++) {
6829       GemmMicrokernelTester()
6830         .mr(1)
6831         .nr(4)
6832         .kr(2)
6833         .sr(1)
6834         .m(1)
6835         .n(4)
6836         .k(k)
6837         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6838     }
6839   }
6840 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_subtile)6841   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
6842     TEST_REQUIRES_X86_XOP;
6843     for (size_t k = 1; k < 8; k++) {
6844       for (uint32_t n = 1; n <= 4; n++) {
6845         for (uint32_t m = 1; m <= 1; m++) {
6846           GemmMicrokernelTester()
6847             .mr(1)
6848             .nr(4)
6849             .kr(2)
6850             .sr(1)
6851             .m(m)
6852             .n(n)
6853             .k(k)
6854             .iterations(1)
6855             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6856         }
6857       }
6858     }
6859   }
6860 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8)6861   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
6862     TEST_REQUIRES_X86_XOP;
6863     for (size_t k = 9; k < 16; k++) {
6864       GemmMicrokernelTester()
6865         .mr(1)
6866         .nr(4)
6867         .kr(2)
6868         .sr(1)
6869         .m(1)
6870         .n(4)
6871         .k(k)
6872         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6873     }
6874   }
6875 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_subtile)6876   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
6877     TEST_REQUIRES_X86_XOP;
6878     for (size_t k = 9; k < 16; k++) {
6879       for (uint32_t n = 1; n <= 4; n++) {
6880         for (uint32_t m = 1; m <= 1; m++) {
6881           GemmMicrokernelTester()
6882             .mr(1)
6883             .nr(4)
6884             .kr(2)
6885             .sr(1)
6886             .m(m)
6887             .n(n)
6888             .k(k)
6889             .iterations(1)
6890             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6891         }
6892       }
6893     }
6894   }
6895 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8)6896   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
6897     TEST_REQUIRES_X86_XOP;
6898     for (size_t k = 16; k <= 80; k += 8) {
6899       GemmMicrokernelTester()
6900         .mr(1)
6901         .nr(4)
6902         .kr(2)
6903         .sr(1)
6904         .m(1)
6905         .n(4)
6906         .k(k)
6907         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6908     }
6909   }
6910 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_subtile)6911   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
6912     TEST_REQUIRES_X86_XOP;
6913     for (size_t k = 16; k <= 80; k += 8) {
6914       for (uint32_t n = 1; n <= 4; n++) {
6915         for (uint32_t m = 1; m <= 1; m++) {
6916           GemmMicrokernelTester()
6917             .mr(1)
6918             .nr(4)
6919             .kr(2)
6920             .sr(1)
6921             .m(m)
6922             .n(n)
6923             .k(k)
6924             .iterations(1)
6925             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6926         }
6927       }
6928     }
6929   }
6930 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4)6931   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
6932     TEST_REQUIRES_X86_XOP;
6933     for (uint32_t n = 5; n < 8; n++) {
6934       for (size_t k = 1; k <= 40; k += 9) {
6935         GemmMicrokernelTester()
6936           .mr(1)
6937           .nr(4)
6938           .kr(2)
6939           .sr(1)
6940           .m(1)
6941           .n(n)
6942           .k(k)
6943           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6944       }
6945     }
6946   }
6947 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_cn)6948   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
6949     TEST_REQUIRES_X86_XOP;
6950     for (uint32_t n = 5; n < 8; n++) {
6951       for (size_t k = 1; k <= 40; k += 9) {
6952         GemmMicrokernelTester()
6953           .mr(1)
6954           .nr(4)
6955           .kr(2)
6956           .sr(1)
6957           .m(1)
6958           .n(n)
6959           .k(k)
6960           .cn_stride(7)
6961           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6962       }
6963     }
6964   }
6965 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_subtile)6966   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
6967     TEST_REQUIRES_X86_XOP;
6968     for (uint32_t n = 5; n < 8; n++) {
6969       for (size_t k = 1; k <= 40; k += 9) {
6970         for (uint32_t m = 1; m <= 1; m++) {
6971           GemmMicrokernelTester()
6972             .mr(1)
6973             .nr(4)
6974             .kr(2)
6975             .sr(1)
6976             .m(m)
6977             .n(n)
6978             .k(k)
6979             .iterations(1)
6980             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6981         }
6982       }
6983     }
6984   }
6985 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4)6986   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
6987     TEST_REQUIRES_X86_XOP;
6988     for (uint32_t n = 8; n <= 12; n += 4) {
6989       for (size_t k = 1; k <= 40; k += 9) {
6990         GemmMicrokernelTester()
6991           .mr(1)
6992           .nr(4)
6993           .kr(2)
6994           .sr(1)
6995           .m(1)
6996           .n(n)
6997           .k(k)
6998           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999       }
7000     }
7001   }
7002 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_cn)7003   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
7004     TEST_REQUIRES_X86_XOP;
7005     for (uint32_t n = 8; n <= 12; n += 4) {
7006       for (size_t k = 1; k <= 40; k += 9) {
7007         GemmMicrokernelTester()
7008           .mr(1)
7009           .nr(4)
7010           .kr(2)
7011           .sr(1)
7012           .m(1)
7013           .n(n)
7014           .k(k)
7015           .cn_stride(7)
7016           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7017       }
7018     }
7019   }
7020 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_subtile)7021   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
7022     TEST_REQUIRES_X86_XOP;
7023     for (uint32_t n = 8; n <= 12; n += 4) {
7024       for (size_t k = 1; k <= 40; k += 9) {
7025         for (uint32_t m = 1; m <= 1; m++) {
7026           GemmMicrokernelTester()
7027             .mr(1)
7028             .nr(4)
7029             .kr(2)
7030             .sr(1)
7031             .m(m)
7032             .n(n)
7033             .k(k)
7034             .iterations(1)
7035             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7036         }
7037       }
7038     }
7039   }
7040 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel)7041   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel) {
7042     TEST_REQUIRES_X86_XOP;
7043     for (size_t k = 1; k <= 40; k += 9) {
7044       GemmMicrokernelTester()
7045         .mr(1)
7046         .nr(4)
7047         .kr(2)
7048         .sr(1)
7049         .m(1)
7050         .n(4)
7051         .k(k)
7052         .ks(3)
7053         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7054     }
7055   }
7056 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel_subtile)7057   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel_subtile) {
7058     TEST_REQUIRES_X86_XOP;
7059     for (size_t k = 1; k <= 40; k += 9) {
7060       for (uint32_t n = 1; n <= 4; n++) {
7061         for (uint32_t m = 1; m <= 1; m++) {
7062           GemmMicrokernelTester()
7063             .mr(1)
7064             .nr(4)
7065             .kr(2)
7066             .sr(1)
7067             .m(m)
7068             .n(n)
7069             .k(k)
7070             .ks(3)
7071             .iterations(1)
7072             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7073         }
7074       }
7075     }
7076   }
7077 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_small_kernel)7078   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
7079     TEST_REQUIRES_X86_XOP;
7080     for (uint32_t n = 5; n < 8; n++) {
7081       for (size_t k = 1; k <= 40; k += 9) {
7082         GemmMicrokernelTester()
7083           .mr(1)
7084           .nr(4)
7085           .kr(2)
7086           .sr(1)
7087           .m(1)
7088           .n(n)
7089           .k(k)
7090           .ks(3)
7091           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7092       }
7093     }
7094   }
7095 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_small_kernel)7096   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_small_kernel) {
7097     TEST_REQUIRES_X86_XOP;
7098     for (uint32_t n = 8; n <= 12; n += 4) {
7099       for (size_t k = 1; k <= 40; k += 9) {
7100         GemmMicrokernelTester()
7101           .mr(1)
7102           .nr(4)
7103           .kr(2)
7104           .sr(1)
7105           .m(1)
7106           .n(n)
7107           .k(k)
7108           .ks(3)
7109           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7110       }
7111     }
7112   }
7113 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm_subtile)7114   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
7115     TEST_REQUIRES_X86_XOP;
7116     for (size_t k = 1; k <= 40; k += 9) {
7117       for (uint32_t n = 1; n <= 4; n++) {
7118         for (uint32_t m = 1; m <= 1; m++) {
7119           GemmMicrokernelTester()
7120             .mr(1)
7121             .nr(4)
7122             .kr(2)
7123             .sr(1)
7124             .m(m)
7125             .n(n)
7126             .k(k)
7127             .cm_stride(7)
7128             .iterations(1)
7129             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7130         }
7131       }
7132     }
7133   }
7134 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,a_offset)7135   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, a_offset) {
7136     TEST_REQUIRES_X86_XOP;
7137     for (size_t k = 1; k <= 40; k += 9) {
7138       GemmMicrokernelTester()
7139         .mr(1)
7140         .nr(4)
7141         .kr(2)
7142         .sr(1)
7143         .m(1)
7144         .n(4)
7145         .k(k)
7146         .ks(3)
7147         .a_offset(43)
7148         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7149     }
7150   }
7151 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,zero)7152   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, zero) {
7153     TEST_REQUIRES_X86_XOP;
7154     for (size_t k = 1; k <= 40; k += 9) {
7155       for (uint32_t mz = 0; mz < 1; mz++) {
7156         GemmMicrokernelTester()
7157           .mr(1)
7158           .nr(4)
7159           .kr(2)
7160           .sr(1)
7161           .m(1)
7162           .n(4)
7163           .k(k)
7164           .ks(3)
7165           .a_offset(43)
7166           .zero_index(mz)
7167           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7168       }
7169     }
7170   }
7171 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmin)7172   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
7173     TEST_REQUIRES_X86_XOP;
7174     GemmMicrokernelTester()
7175       .mr(1)
7176       .nr(4)
7177       .kr(2)
7178       .sr(1)
7179       .m(1)
7180       .n(4)
7181       .k(8)
7182       .qmin(128)
7183       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7184   }
7185 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmax)7186   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
7187     TEST_REQUIRES_X86_XOP;
7188     GemmMicrokernelTester()
7189       .mr(1)
7190       .nr(4)
7191       .kr(2)
7192       .sr(1)
7193       .m(1)
7194       .n(4)
7195       .k(8)
7196       .qmax(128)
7197       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7198   }
7199 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm)7200   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
7201     TEST_REQUIRES_X86_XOP;
7202     GemmMicrokernelTester()
7203       .mr(1)
7204       .nr(4)
7205       .kr(2)
7206       .sr(1)
7207       .m(1)
7208       .n(4)
7209       .k(8)
7210       .cm_stride(7)
7211       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7212   }
7213 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_a_zero_point)7214   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_a_zero_point) {
7215     TEST_REQUIRES_X86_XOP;
7216     for (size_t k = 1; k <= 40; k += 9) {
7217       GemmMicrokernelTester()
7218         .mr(1)
7219         .nr(4)
7220         .kr(2)
7221         .sr(1)
7222         .m(1)
7223         .n(4)
7224         .k(k)
7225         .a_zero_point(0)
7226         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7227     }
7228   }
7229 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_b_zero_point)7230   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_b_zero_point) {
7231     TEST_REQUIRES_X86_XOP;
7232     for (size_t k = 1; k <= 40; k += 9) {
7233       GemmMicrokernelTester()
7234         .mr(1)
7235         .nr(4)
7236         .kr(2)
7237         .sr(1)
7238         .m(1)
7239         .n(4)
7240         .k(k)
7241         .b_zero_point(0)
7242         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7243     }
7244   }
7245 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,no_zero_point)7246   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_zero_point) {
7247     TEST_REQUIRES_X86_XOP;
7248     for (size_t k = 1; k <= 40; k += 9) {
7249       GemmMicrokernelTester()
7250         .mr(1)
7251         .nr(4)
7252         .kr(2)
7253         .sr(1)
7254         .m(1)
7255         .n(4)
7256         .k(k)
7257         .a_zero_point(0)
7258         .b_zero_point(0)
7259         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7260     }
7261   }
7262 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7263 
7264 
7265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)7266   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
7267     TEST_REQUIRES_X86_SSE2;
7268     GemmMicrokernelTester()
7269       .mr(1)
7270       .nr(4)
7271       .kr(2)
7272       .sr(1)
7273       .m(1)
7274       .n(4)
7275       .k(8)
7276       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7277   }
7278 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)7279   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
7280     TEST_REQUIRES_X86_SSE2;
7281     GemmMicrokernelTester()
7282       .mr(1)
7283       .nr(4)
7284       .kr(2)
7285       .sr(1)
7286       .m(1)
7287       .n(4)
7288       .k(8)
7289       .cn_stride(7)
7290       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7291   }
7292 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)7293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
7294     TEST_REQUIRES_X86_SSE2;
7295     for (uint32_t n = 1; n <= 4; n++) {
7296       for (uint32_t m = 1; m <= 1; m++) {
7297         GemmMicrokernelTester()
7298           .mr(1)
7299           .nr(4)
7300           .kr(2)
7301           .sr(1)
7302           .m(m)
7303           .n(n)
7304           .k(8)
7305           .iterations(1)
7306           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7307       }
7308     }
7309   }
7310 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)7311   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7312     TEST_REQUIRES_X86_SSE2;
7313     for (uint32_t m = 1; m <= 1; m++) {
7314       GemmMicrokernelTester()
7315         .mr(1)
7316         .nr(4)
7317         .kr(2)
7318         .sr(1)
7319         .m(m)
7320         .n(4)
7321         .k(8)
7322         .iterations(1)
7323         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7324     }
7325   }
7326 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)7327   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7328     TEST_REQUIRES_X86_SSE2;
7329     for (uint32_t n = 1; n <= 4; n++) {
7330       GemmMicrokernelTester()
7331         .mr(1)
7332         .nr(4)
7333         .kr(2)
7334         .sr(1)
7335         .m(1)
7336         .n(n)
7337         .k(8)
7338         .iterations(1)
7339         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7340     }
7341   }
7342 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)7343   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
7344     TEST_REQUIRES_X86_SSE2;
7345     for (size_t k = 1; k < 8; k++) {
7346       GemmMicrokernelTester()
7347         .mr(1)
7348         .nr(4)
7349         .kr(2)
7350         .sr(1)
7351         .m(1)
7352         .n(4)
7353         .k(k)
7354         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7355     }
7356   }
7357 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)7358   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
7359     TEST_REQUIRES_X86_SSE2;
7360     for (size_t k = 1; k < 8; k++) {
7361       for (uint32_t n = 1; n <= 4; n++) {
7362         for (uint32_t m = 1; m <= 1; m++) {
7363           GemmMicrokernelTester()
7364             .mr(1)
7365             .nr(4)
7366             .kr(2)
7367             .sr(1)
7368             .m(m)
7369             .n(n)
7370             .k(k)
7371             .iterations(1)
7372             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7373         }
7374       }
7375     }
7376   }
7377 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)7378   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
7379     TEST_REQUIRES_X86_SSE2;
7380     for (size_t k = 9; k < 16; k++) {
7381       GemmMicrokernelTester()
7382         .mr(1)
7383         .nr(4)
7384         .kr(2)
7385         .sr(1)
7386         .m(1)
7387         .n(4)
7388         .k(k)
7389         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7390     }
7391   }
7392 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)7393   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
7394     TEST_REQUIRES_X86_SSE2;
7395     for (size_t k = 9; k < 16; k++) {
7396       for (uint32_t n = 1; n <= 4; n++) {
7397         for (uint32_t m = 1; m <= 1; m++) {
7398           GemmMicrokernelTester()
7399             .mr(1)
7400             .nr(4)
7401             .kr(2)
7402             .sr(1)
7403             .m(m)
7404             .n(n)
7405             .k(k)
7406             .iterations(1)
7407             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7408         }
7409       }
7410     }
7411   }
7412 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)7413   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
7414     TEST_REQUIRES_X86_SSE2;
7415     for (size_t k = 16; k <= 80; k += 8) {
7416       GemmMicrokernelTester()
7417         .mr(1)
7418         .nr(4)
7419         .kr(2)
7420         .sr(1)
7421         .m(1)
7422         .n(4)
7423         .k(k)
7424         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7425     }
7426   }
7427 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)7428   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
7429     TEST_REQUIRES_X86_SSE2;
7430     for (size_t k = 16; k <= 80; k += 8) {
7431       for (uint32_t n = 1; n <= 4; n++) {
7432         for (uint32_t m = 1; m <= 1; m++) {
7433           GemmMicrokernelTester()
7434             .mr(1)
7435             .nr(4)
7436             .kr(2)
7437             .sr(1)
7438             .m(m)
7439             .n(n)
7440             .k(k)
7441             .iterations(1)
7442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7443         }
7444       }
7445     }
7446   }
7447 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)7448   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
7449     TEST_REQUIRES_X86_SSE2;
7450     for (uint32_t n = 5; n < 8; n++) {
7451       for (size_t k = 1; k <= 40; k += 9) {
7452         GemmMicrokernelTester()
7453           .mr(1)
7454           .nr(4)
7455           .kr(2)
7456           .sr(1)
7457           .m(1)
7458           .n(n)
7459           .k(k)
7460           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7461       }
7462     }
7463   }
7464 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)7465   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7466     TEST_REQUIRES_X86_SSE2;
7467     for (uint32_t n = 5; n < 8; n++) {
7468       for (size_t k = 1; k <= 40; k += 9) {
7469         GemmMicrokernelTester()
7470           .mr(1)
7471           .nr(4)
7472           .kr(2)
7473           .sr(1)
7474           .m(1)
7475           .n(n)
7476           .k(k)
7477           .cn_stride(7)
7478           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7479       }
7480     }
7481   }
7482 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)7483   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
7484     TEST_REQUIRES_X86_SSE2;
7485     for (uint32_t n = 5; n < 8; n++) {
7486       for (size_t k = 1; k <= 40; k += 9) {
7487         for (uint32_t m = 1; m <= 1; m++) {
7488           GemmMicrokernelTester()
7489             .mr(1)
7490             .nr(4)
7491             .kr(2)
7492             .sr(1)
7493             .m(m)
7494             .n(n)
7495             .k(k)
7496             .iterations(1)
7497             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7498         }
7499       }
7500     }
7501   }
7502 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)7503   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
7504     TEST_REQUIRES_X86_SSE2;
7505     for (uint32_t n = 8; n <= 12; n += 4) {
7506       for (size_t k = 1; k <= 40; k += 9) {
7507         GemmMicrokernelTester()
7508           .mr(1)
7509           .nr(4)
7510           .kr(2)
7511           .sr(1)
7512           .m(1)
7513           .n(n)
7514           .k(k)
7515           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7516       }
7517     }
7518   }
7519 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)7520   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
7521     TEST_REQUIRES_X86_SSE2;
7522     for (uint32_t n = 8; n <= 12; n += 4) {
7523       for (size_t k = 1; k <= 40; k += 9) {
7524         GemmMicrokernelTester()
7525           .mr(1)
7526           .nr(4)
7527           .kr(2)
7528           .sr(1)
7529           .m(1)
7530           .n(n)
7531           .k(k)
7532           .cn_stride(7)
7533           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7534       }
7535     }
7536   }
7537 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)7538   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
7539     TEST_REQUIRES_X86_SSE2;
7540     for (uint32_t n = 8; n <= 12; n += 4) {
7541       for (size_t k = 1; k <= 40; k += 9) {
7542         for (uint32_t m = 1; m <= 1; m++) {
7543           GemmMicrokernelTester()
7544             .mr(1)
7545             .nr(4)
7546             .kr(2)
7547             .sr(1)
7548             .m(m)
7549             .n(n)
7550             .k(k)
7551             .iterations(1)
7552             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7553         }
7554       }
7555     }
7556   }
7557 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)7558   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
7559     TEST_REQUIRES_X86_SSE2;
7560     for (size_t k = 1; k <= 40; k += 9) {
7561       GemmMicrokernelTester()
7562         .mr(1)
7563         .nr(4)
7564         .kr(2)
7565         .sr(1)
7566         .m(1)
7567         .n(4)
7568         .k(k)
7569         .ks(3)
7570         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7571     }
7572   }
7573 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)7574   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
7575     TEST_REQUIRES_X86_SSE2;
7576     for (size_t k = 1; k <= 40; k += 9) {
7577       for (uint32_t n = 1; n <= 4; n++) {
7578         for (uint32_t m = 1; m <= 1; m++) {
7579           GemmMicrokernelTester()
7580             .mr(1)
7581             .nr(4)
7582             .kr(2)
7583             .sr(1)
7584             .m(m)
7585             .n(n)
7586             .k(k)
7587             .ks(3)
7588             .iterations(1)
7589             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7590         }
7591       }
7592     }
7593   }
7594 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)7595   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
7596     TEST_REQUIRES_X86_SSE2;
7597     for (uint32_t n = 5; n < 8; n++) {
7598       for (size_t k = 1; k <= 40; k += 9) {
7599         GemmMicrokernelTester()
7600           .mr(1)
7601           .nr(4)
7602           .kr(2)
7603           .sr(1)
7604           .m(1)
7605           .n(n)
7606           .k(k)
7607           .ks(3)
7608           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7609       }
7610     }
7611   }
7612 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)7613   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
7614     TEST_REQUIRES_X86_SSE2;
7615     for (uint32_t n = 8; n <= 12; n += 4) {
7616       for (size_t k = 1; k <= 40; k += 9) {
7617         GemmMicrokernelTester()
7618           .mr(1)
7619           .nr(4)
7620           .kr(2)
7621           .sr(1)
7622           .m(1)
7623           .n(n)
7624           .k(k)
7625           .ks(3)
7626           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7627       }
7628     }
7629   }
7630 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)7631   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
7632     TEST_REQUIRES_X86_SSE2;
7633     for (size_t k = 1; k <= 40; k += 9) {
7634       for (uint32_t n = 1; n <= 4; n++) {
7635         for (uint32_t m = 1; m <= 1; m++) {
7636           GemmMicrokernelTester()
7637             .mr(1)
7638             .nr(4)
7639             .kr(2)
7640             .sr(1)
7641             .m(m)
7642             .n(n)
7643             .k(k)
7644             .cm_stride(7)
7645             .iterations(1)
7646             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7647         }
7648       }
7649     }
7650   }
7651 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)7652   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
7653     TEST_REQUIRES_X86_SSE2;
7654     for (size_t k = 1; k <= 40; k += 9) {
7655       GemmMicrokernelTester()
7656         .mr(1)
7657         .nr(4)
7658         .kr(2)
7659         .sr(1)
7660         .m(1)
7661         .n(4)
7662         .k(k)
7663         .ks(3)
7664         .a_offset(43)
7665         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7666     }
7667   }
7668 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)7669   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
7670     TEST_REQUIRES_X86_SSE2;
7671     for (size_t k = 1; k <= 40; k += 9) {
7672       for (uint32_t mz = 0; mz < 1; mz++) {
7673         GemmMicrokernelTester()
7674           .mr(1)
7675           .nr(4)
7676           .kr(2)
7677           .sr(1)
7678           .m(1)
7679           .n(4)
7680           .k(k)
7681           .ks(3)
7682           .a_offset(43)
7683           .zero_index(mz)
7684           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7685       }
7686     }
7687   }
7688 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)7689   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
7690     TEST_REQUIRES_X86_SSE2;
7691     GemmMicrokernelTester()
7692       .mr(1)
7693       .nr(4)
7694       .kr(2)
7695       .sr(1)
7696       .m(1)
7697       .n(4)
7698       .k(8)
7699       .qmin(128)
7700       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7701   }
7702 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)7703   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
7704     TEST_REQUIRES_X86_SSE2;
7705     GemmMicrokernelTester()
7706       .mr(1)
7707       .nr(4)
7708       .kr(2)
7709       .sr(1)
7710       .m(1)
7711       .n(4)
7712       .k(8)
7713       .qmax(128)
7714       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7715   }
7716 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)7717   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
7718     TEST_REQUIRES_X86_SSE2;
7719     GemmMicrokernelTester()
7720       .mr(1)
7721       .nr(4)
7722       .kr(2)
7723       .sr(1)
7724       .m(1)
7725       .n(4)
7726       .k(8)
7727       .cm_stride(7)
7728       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7729   }
7730 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_a_zero_point)7731   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_a_zero_point) {
7732     TEST_REQUIRES_X86_SSE2;
7733     for (size_t k = 1; k <= 40; k += 9) {
7734       GemmMicrokernelTester()
7735         .mr(1)
7736         .nr(4)
7737         .kr(2)
7738         .sr(1)
7739         .m(1)
7740         .n(4)
7741         .k(k)
7742         .a_zero_point(0)
7743         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7744     }
7745   }
7746 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_b_zero_point)7747   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_b_zero_point) {
7748     TEST_REQUIRES_X86_SSE2;
7749     for (size_t k = 1; k <= 40; k += 9) {
7750       GemmMicrokernelTester()
7751         .mr(1)
7752         .nr(4)
7753         .kr(2)
7754         .sr(1)
7755         .m(1)
7756         .n(4)
7757         .k(k)
7758         .b_zero_point(0)
7759         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7760     }
7761   }
7762 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,no_zero_point)7763   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_zero_point) {
7764     TEST_REQUIRES_X86_SSE2;
7765     for (size_t k = 1; k <= 40; k += 9) {
7766       GemmMicrokernelTester()
7767         .mr(1)
7768         .nr(4)
7769         .kr(2)
7770         .sr(1)
7771         .m(1)
7772         .n(4)
7773         .k(k)
7774         .a_zero_point(0)
7775         .b_zero_point(0)
7776         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7777     }
7778   }
7779 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7780 
7781 
7782 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)7783   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
7784     TEST_REQUIRES_X86_SSE41;
7785     GemmMicrokernelTester()
7786       .mr(1)
7787       .nr(4)
7788       .kr(2)
7789       .sr(1)
7790       .m(1)
7791       .n(4)
7792       .k(8)
7793       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7794   }
7795 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)7796   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
7797     TEST_REQUIRES_X86_SSE41;
7798     GemmMicrokernelTester()
7799       .mr(1)
7800       .nr(4)
7801       .kr(2)
7802       .sr(1)
7803       .m(1)
7804       .n(4)
7805       .k(8)
7806       .cn_stride(7)
7807       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7808   }
7809 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)7810   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
7811     TEST_REQUIRES_X86_SSE41;
7812     for (uint32_t n = 1; n <= 4; n++) {
7813       for (uint32_t m = 1; m <= 1; m++) {
7814         GemmMicrokernelTester()
7815           .mr(1)
7816           .nr(4)
7817           .kr(2)
7818           .sr(1)
7819           .m(m)
7820           .n(n)
7821           .k(8)
7822           .iterations(1)
7823           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7824       }
7825     }
7826   }
7827 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)7828   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
7829     TEST_REQUIRES_X86_SSE41;
7830     for (uint32_t m = 1; m <= 1; m++) {
7831       GemmMicrokernelTester()
7832         .mr(1)
7833         .nr(4)
7834         .kr(2)
7835         .sr(1)
7836         .m(m)
7837         .n(4)
7838         .k(8)
7839         .iterations(1)
7840         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7841     }
7842   }
7843 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)7844   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
7845     TEST_REQUIRES_X86_SSE41;
7846     for (uint32_t n = 1; n <= 4; n++) {
7847       GemmMicrokernelTester()
7848         .mr(1)
7849         .nr(4)
7850         .kr(2)
7851         .sr(1)
7852         .m(1)
7853         .n(n)
7854         .k(8)
7855         .iterations(1)
7856         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7857     }
7858   }
7859 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)7860   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
7861     TEST_REQUIRES_X86_SSE41;
7862     for (size_t k = 1; k < 8; k++) {
7863       GemmMicrokernelTester()
7864         .mr(1)
7865         .nr(4)
7866         .kr(2)
7867         .sr(1)
7868         .m(1)
7869         .n(4)
7870         .k(k)
7871         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7872     }
7873   }
7874 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)7875   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
7876     TEST_REQUIRES_X86_SSE41;
7877     for (size_t k = 1; k < 8; k++) {
7878       for (uint32_t n = 1; n <= 4; n++) {
7879         for (uint32_t m = 1; m <= 1; m++) {
7880           GemmMicrokernelTester()
7881             .mr(1)
7882             .nr(4)
7883             .kr(2)
7884             .sr(1)
7885             .m(m)
7886             .n(n)
7887             .k(k)
7888             .iterations(1)
7889             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7890         }
7891       }
7892     }
7893   }
7894 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)7895   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
7896     TEST_REQUIRES_X86_SSE41;
7897     for (size_t k = 9; k < 16; k++) {
7898       GemmMicrokernelTester()
7899         .mr(1)
7900         .nr(4)
7901         .kr(2)
7902         .sr(1)
7903         .m(1)
7904         .n(4)
7905         .k(k)
7906         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7907     }
7908   }
7909 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)7910   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
7911     TEST_REQUIRES_X86_SSE41;
7912     for (size_t k = 9; k < 16; k++) {
7913       for (uint32_t n = 1; n <= 4; n++) {
7914         for (uint32_t m = 1; m <= 1; m++) {
7915           GemmMicrokernelTester()
7916             .mr(1)
7917             .nr(4)
7918             .kr(2)
7919             .sr(1)
7920             .m(m)
7921             .n(n)
7922             .k(k)
7923             .iterations(1)
7924             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7925         }
7926       }
7927     }
7928   }
7929 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)7930   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
7931     TEST_REQUIRES_X86_SSE41;
7932     for (size_t k = 16; k <= 80; k += 8) {
7933       GemmMicrokernelTester()
7934         .mr(1)
7935         .nr(4)
7936         .kr(2)
7937         .sr(1)
7938         .m(1)
7939         .n(4)
7940         .k(k)
7941         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7942     }
7943   }
7944 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)7945   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
7946     TEST_REQUIRES_X86_SSE41;
7947     for (size_t k = 16; k <= 80; k += 8) {
7948       for (uint32_t n = 1; n <= 4; n++) {
7949         for (uint32_t m = 1; m <= 1; m++) {
7950           GemmMicrokernelTester()
7951             .mr(1)
7952             .nr(4)
7953             .kr(2)
7954             .sr(1)
7955             .m(m)
7956             .n(n)
7957             .k(k)
7958             .iterations(1)
7959             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7960         }
7961       }
7962     }
7963   }
7964 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)7965   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
7966     TEST_REQUIRES_X86_SSE41;
7967     for (uint32_t n = 5; n < 8; n++) {
7968       for (size_t k = 1; k <= 40; k += 9) {
7969         GemmMicrokernelTester()
7970           .mr(1)
7971           .nr(4)
7972           .kr(2)
7973           .sr(1)
7974           .m(1)
7975           .n(n)
7976           .k(k)
7977           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7978       }
7979     }
7980   }
7981 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)7982   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
7983     TEST_REQUIRES_X86_SSE41;
7984     for (uint32_t n = 5; n < 8; n++) {
7985       for (size_t k = 1; k <= 40; k += 9) {
7986         GemmMicrokernelTester()
7987           .mr(1)
7988           .nr(4)
7989           .kr(2)
7990           .sr(1)
7991           .m(1)
7992           .n(n)
7993           .k(k)
7994           .cn_stride(7)
7995           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7996       }
7997     }
7998   }
7999 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)8000   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
8001     TEST_REQUIRES_X86_SSE41;
8002     for (uint32_t n = 5; n < 8; n++) {
8003       for (size_t k = 1; k <= 40; k += 9) {
8004         for (uint32_t m = 1; m <= 1; m++) {
8005           GemmMicrokernelTester()
8006             .mr(1)
8007             .nr(4)
8008             .kr(2)
8009             .sr(1)
8010             .m(m)
8011             .n(n)
8012             .k(k)
8013             .iterations(1)
8014             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8015         }
8016       }
8017     }
8018   }
8019 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)8020   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
8021     TEST_REQUIRES_X86_SSE41;
8022     for (uint32_t n = 8; n <= 12; n += 4) {
8023       for (size_t k = 1; k <= 40; k += 9) {
8024         GemmMicrokernelTester()
8025           .mr(1)
8026           .nr(4)
8027           .kr(2)
8028           .sr(1)
8029           .m(1)
8030           .n(n)
8031           .k(k)
8032           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8033       }
8034     }
8035   }
8036 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)8037   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
8038     TEST_REQUIRES_X86_SSE41;
8039     for (uint32_t n = 8; n <= 12; n += 4) {
8040       for (size_t k = 1; k <= 40; k += 9) {
8041         GemmMicrokernelTester()
8042           .mr(1)
8043           .nr(4)
8044           .kr(2)
8045           .sr(1)
8046           .m(1)
8047           .n(n)
8048           .k(k)
8049           .cn_stride(7)
8050           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8051       }
8052     }
8053   }
8054 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)8055   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
8056     TEST_REQUIRES_X86_SSE41;
8057     for (uint32_t n = 8; n <= 12; n += 4) {
8058       for (size_t k = 1; k <= 40; k += 9) {
8059         for (uint32_t m = 1; m <= 1; m++) {
8060           GemmMicrokernelTester()
8061             .mr(1)
8062             .nr(4)
8063             .kr(2)
8064             .sr(1)
8065             .m(m)
8066             .n(n)
8067             .k(k)
8068             .iterations(1)
8069             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8070         }
8071       }
8072     }
8073   }
8074 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel)8075   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
8076     TEST_REQUIRES_X86_SSE41;
8077     for (size_t k = 1; k <= 40; k += 9) {
8078       GemmMicrokernelTester()
8079         .mr(1)
8080         .nr(4)
8081         .kr(2)
8082         .sr(1)
8083         .m(1)
8084         .n(4)
8085         .k(k)
8086         .ks(3)
8087         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8088     }
8089   }
8090 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel_subtile)8091   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
8092     TEST_REQUIRES_X86_SSE41;
8093     for (size_t k = 1; k <= 40; k += 9) {
8094       for (uint32_t n = 1; n <= 4; n++) {
8095         for (uint32_t m = 1; m <= 1; m++) {
8096           GemmMicrokernelTester()
8097             .mr(1)
8098             .nr(4)
8099             .kr(2)
8100             .sr(1)
8101             .m(m)
8102             .n(n)
8103             .k(k)
8104             .ks(3)
8105             .iterations(1)
8106             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8107         }
8108       }
8109     }
8110   }
8111 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_small_kernel)8112   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
8113     TEST_REQUIRES_X86_SSE41;
8114     for (uint32_t n = 5; n < 8; n++) {
8115       for (size_t k = 1; k <= 40; k += 9) {
8116         GemmMicrokernelTester()
8117           .mr(1)
8118           .nr(4)
8119           .kr(2)
8120           .sr(1)
8121           .m(1)
8122           .n(n)
8123           .k(k)
8124           .ks(3)
8125           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8126       }
8127     }
8128   }
8129 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_small_kernel)8130   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
8131     TEST_REQUIRES_X86_SSE41;
8132     for (uint32_t n = 8; n <= 12; n += 4) {
8133       for (size_t k = 1; k <= 40; k += 9) {
8134         GemmMicrokernelTester()
8135           .mr(1)
8136           .nr(4)
8137           .kr(2)
8138           .sr(1)
8139           .m(1)
8140           .n(n)
8141           .k(k)
8142           .ks(3)
8143           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8144       }
8145     }
8146   }
8147 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)8148   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
8149     TEST_REQUIRES_X86_SSE41;
8150     for (size_t k = 1; k <= 40; k += 9) {
8151       for (uint32_t n = 1; n <= 4; n++) {
8152         for (uint32_t m = 1; m <= 1; m++) {
8153           GemmMicrokernelTester()
8154             .mr(1)
8155             .nr(4)
8156             .kr(2)
8157             .sr(1)
8158             .m(m)
8159             .n(n)
8160             .k(k)
8161             .cm_stride(7)
8162             .iterations(1)
8163             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8164         }
8165       }
8166     }
8167   }
8168 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,a_offset)8169   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
8170     TEST_REQUIRES_X86_SSE41;
8171     for (size_t k = 1; k <= 40; k += 9) {
8172       GemmMicrokernelTester()
8173         .mr(1)
8174         .nr(4)
8175         .kr(2)
8176         .sr(1)
8177         .m(1)
8178         .n(4)
8179         .k(k)
8180         .ks(3)
8181         .a_offset(43)
8182         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8183     }
8184   }
8185 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,zero)8186   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
8187     TEST_REQUIRES_X86_SSE41;
8188     for (size_t k = 1; k <= 40; k += 9) {
8189       for (uint32_t mz = 0; mz < 1; mz++) {
8190         GemmMicrokernelTester()
8191           .mr(1)
8192           .nr(4)
8193           .kr(2)
8194           .sr(1)
8195           .m(1)
8196           .n(4)
8197           .k(k)
8198           .ks(3)
8199           .a_offset(43)
8200           .zero_index(mz)
8201           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8202       }
8203     }
8204   }
8205 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)8206   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
8207     TEST_REQUIRES_X86_SSE41;
8208     GemmMicrokernelTester()
8209       .mr(1)
8210       .nr(4)
8211       .kr(2)
8212       .sr(1)
8213       .m(1)
8214       .n(4)
8215       .k(8)
8216       .qmin(128)
8217       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8218   }
8219 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)8220   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
8221     TEST_REQUIRES_X86_SSE41;
8222     GemmMicrokernelTester()
8223       .mr(1)
8224       .nr(4)
8225       .kr(2)
8226       .sr(1)
8227       .m(1)
8228       .n(4)
8229       .k(8)
8230       .qmax(128)
8231       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8232   }
8233 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)8234   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
8235     TEST_REQUIRES_X86_SSE41;
8236     GemmMicrokernelTester()
8237       .mr(1)
8238       .nr(4)
8239       .kr(2)
8240       .sr(1)
8241       .m(1)
8242       .n(4)
8243       .k(8)
8244       .cm_stride(7)
8245       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8246   }
8247 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_a_zero_point)8248   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_a_zero_point) {
8249     TEST_REQUIRES_X86_SSE41;
8250     for (size_t k = 1; k <= 40; k += 9) {
8251       GemmMicrokernelTester()
8252         .mr(1)
8253         .nr(4)
8254         .kr(2)
8255         .sr(1)
8256         .m(1)
8257         .n(4)
8258         .k(k)
8259         .a_zero_point(0)
8260         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8261     }
8262   }
8263 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_b_zero_point)8264   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_b_zero_point) {
8265     TEST_REQUIRES_X86_SSE41;
8266     for (size_t k = 1; k <= 40; k += 9) {
8267       GemmMicrokernelTester()
8268         .mr(1)
8269         .nr(4)
8270         .kr(2)
8271         .sr(1)
8272         .m(1)
8273         .n(4)
8274         .k(k)
8275         .b_zero_point(0)
8276         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8277     }
8278   }
8279 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,no_zero_point)8280   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_zero_point) {
8281     TEST_REQUIRES_X86_SSE41;
8282     for (size_t k = 1; k <= 40; k += 9) {
8283       GemmMicrokernelTester()
8284         .mr(1)
8285         .nr(4)
8286         .kr(2)
8287         .sr(1)
8288         .m(1)
8289         .n(4)
8290         .k(k)
8291         .a_zero_point(0)
8292         .b_zero_point(0)
8293         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8294     }
8295   }
8296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8297 
8298 
8299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)8300   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
8301     TEST_REQUIRES_X86_SSE2;
8302     GemmMicrokernelTester()
8303       .mr(2)
8304       .nr(4)
8305       .kr(2)
8306       .sr(1)
8307       .m(2)
8308       .n(4)
8309       .k(8)
8310       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8311   }
8312 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)8313   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
8314     TEST_REQUIRES_X86_SSE2;
8315     GemmMicrokernelTester()
8316       .mr(2)
8317       .nr(4)
8318       .kr(2)
8319       .sr(1)
8320       .m(2)
8321       .n(4)
8322       .k(8)
8323       .cn_stride(7)
8324       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8325   }
8326 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)8327   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
8328     TEST_REQUIRES_X86_SSE2;
8329     for (uint32_t n = 1; n <= 4; n++) {
8330       for (uint32_t m = 1; m <= 2; m++) {
8331         GemmMicrokernelTester()
8332           .mr(2)
8333           .nr(4)
8334           .kr(2)
8335           .sr(1)
8336           .m(m)
8337           .n(n)
8338           .k(8)
8339           .iterations(1)
8340           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8341       }
8342     }
8343   }
8344 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)8345   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
8346     TEST_REQUIRES_X86_SSE2;
8347     for (uint32_t m = 1; m <= 2; m++) {
8348       GemmMicrokernelTester()
8349         .mr(2)
8350         .nr(4)
8351         .kr(2)
8352         .sr(1)
8353         .m(m)
8354         .n(4)
8355         .k(8)
8356         .iterations(1)
8357         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8358     }
8359   }
8360 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)8361   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
8362     TEST_REQUIRES_X86_SSE2;
8363     for (uint32_t n = 1; n <= 4; n++) {
8364       GemmMicrokernelTester()
8365         .mr(2)
8366         .nr(4)
8367         .kr(2)
8368         .sr(1)
8369         .m(2)
8370         .n(n)
8371         .k(8)
8372         .iterations(1)
8373         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8374     }
8375   }
8376 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)8377   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
8378     TEST_REQUIRES_X86_SSE2;
8379     for (size_t k = 1; k < 8; k++) {
8380       GemmMicrokernelTester()
8381         .mr(2)
8382         .nr(4)
8383         .kr(2)
8384         .sr(1)
8385         .m(2)
8386         .n(4)
8387         .k(k)
8388         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8389     }
8390   }
8391 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)8392   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
8393     TEST_REQUIRES_X86_SSE2;
8394     for (size_t k = 1; k < 8; k++) {
8395       for (uint32_t n = 1; n <= 4; n++) {
8396         for (uint32_t m = 1; m <= 2; m++) {
8397           GemmMicrokernelTester()
8398             .mr(2)
8399             .nr(4)
8400             .kr(2)
8401             .sr(1)
8402             .m(m)
8403             .n(n)
8404             .k(k)
8405             .iterations(1)
8406             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8407         }
8408       }
8409     }
8410   }
8411 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)8412   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
8413     TEST_REQUIRES_X86_SSE2;
8414     for (size_t k = 9; k < 16; k++) {
8415       GemmMicrokernelTester()
8416         .mr(2)
8417         .nr(4)
8418         .kr(2)
8419         .sr(1)
8420         .m(2)
8421         .n(4)
8422         .k(k)
8423         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8424     }
8425   }
8426 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)8427   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
8428     TEST_REQUIRES_X86_SSE2;
8429     for (size_t k = 9; k < 16; k++) {
8430       for (uint32_t n = 1; n <= 4; n++) {
8431         for (uint32_t m = 1; m <= 2; m++) {
8432           GemmMicrokernelTester()
8433             .mr(2)
8434             .nr(4)
8435             .kr(2)
8436             .sr(1)
8437             .m(m)
8438             .n(n)
8439             .k(k)
8440             .iterations(1)
8441             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8442         }
8443       }
8444     }
8445   }
8446 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)8447   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
8448     TEST_REQUIRES_X86_SSE2;
8449     for (size_t k = 16; k <= 80; k += 8) {
8450       GemmMicrokernelTester()
8451         .mr(2)
8452         .nr(4)
8453         .kr(2)
8454         .sr(1)
8455         .m(2)
8456         .n(4)
8457         .k(k)
8458         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8459     }
8460   }
8461 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)8462   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
8463     TEST_REQUIRES_X86_SSE2;
8464     for (size_t k = 16; k <= 80; k += 8) {
8465       for (uint32_t n = 1; n <= 4; n++) {
8466         for (uint32_t m = 1; m <= 2; m++) {
8467           GemmMicrokernelTester()
8468             .mr(2)
8469             .nr(4)
8470             .kr(2)
8471             .sr(1)
8472             .m(m)
8473             .n(n)
8474             .k(k)
8475             .iterations(1)
8476             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8477         }
8478       }
8479     }
8480   }
8481 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)8482   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
8483     TEST_REQUIRES_X86_SSE2;
8484     for (uint32_t n = 5; n < 8; n++) {
8485       for (size_t k = 1; k <= 40; k += 9) {
8486         GemmMicrokernelTester()
8487           .mr(2)
8488           .nr(4)
8489           .kr(2)
8490           .sr(1)
8491           .m(2)
8492           .n(n)
8493           .k(k)
8494           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8495       }
8496     }
8497   }
8498 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)8499   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
8500     TEST_REQUIRES_X86_SSE2;
8501     for (uint32_t n = 5; n < 8; n++) {
8502       for (size_t k = 1; k <= 40; k += 9) {
8503         GemmMicrokernelTester()
8504           .mr(2)
8505           .nr(4)
8506           .kr(2)
8507           .sr(1)
8508           .m(2)
8509           .n(n)
8510           .k(k)
8511           .cn_stride(7)
8512           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8513       }
8514     }
8515   }
8516 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)8517   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
8518     TEST_REQUIRES_X86_SSE2;
8519     for (uint32_t n = 5; n < 8; n++) {
8520       for (size_t k = 1; k <= 40; k += 9) {
8521         for (uint32_t m = 1; m <= 2; m++) {
8522           GemmMicrokernelTester()
8523             .mr(2)
8524             .nr(4)
8525             .kr(2)
8526             .sr(1)
8527             .m(m)
8528             .n(n)
8529             .k(k)
8530             .iterations(1)
8531             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8532         }
8533       }
8534     }
8535   }
8536 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)8537   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
8538     TEST_REQUIRES_X86_SSE2;
8539     for (uint32_t n = 8; n <= 12; n += 4) {
8540       for (size_t k = 1; k <= 40; k += 9) {
8541         GemmMicrokernelTester()
8542           .mr(2)
8543           .nr(4)
8544           .kr(2)
8545           .sr(1)
8546           .m(2)
8547           .n(n)
8548           .k(k)
8549           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8550       }
8551     }
8552   }
8553 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)8554   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
8555     TEST_REQUIRES_X86_SSE2;
8556     for (uint32_t n = 8; n <= 12; n += 4) {
8557       for (size_t k = 1; k <= 40; k += 9) {
8558         GemmMicrokernelTester()
8559           .mr(2)
8560           .nr(4)
8561           .kr(2)
8562           .sr(1)
8563           .m(2)
8564           .n(n)
8565           .k(k)
8566           .cn_stride(7)
8567           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8568       }
8569     }
8570   }
8571 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)8572   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
8573     TEST_REQUIRES_X86_SSE2;
8574     for (uint32_t n = 8; n <= 12; n += 4) {
8575       for (size_t k = 1; k <= 40; k += 9) {
8576         for (uint32_t m = 1; m <= 2; m++) {
8577           GemmMicrokernelTester()
8578             .mr(2)
8579             .nr(4)
8580             .kr(2)
8581             .sr(1)
8582             .m(m)
8583             .n(n)
8584             .k(k)
8585             .iterations(1)
8586             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8587         }
8588       }
8589     }
8590   }
8591 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel)8592   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
8593     TEST_REQUIRES_X86_SSE2;
8594     for (size_t k = 1; k <= 40; k += 9) {
8595       GemmMicrokernelTester()
8596         .mr(2)
8597         .nr(4)
8598         .kr(2)
8599         .sr(1)
8600         .m(2)
8601         .n(4)
8602         .k(k)
8603         .ks(3)
8604         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8605     }
8606   }
8607 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel_subtile)8608   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
8609     TEST_REQUIRES_X86_SSE2;
8610     for (size_t k = 1; k <= 40; k += 9) {
8611       for (uint32_t n = 1; n <= 4; n++) {
8612         for (uint32_t m = 1; m <= 2; m++) {
8613           GemmMicrokernelTester()
8614             .mr(2)
8615             .nr(4)
8616             .kr(2)
8617             .sr(1)
8618             .m(m)
8619             .n(n)
8620             .k(k)
8621             .ks(3)
8622             .iterations(1)
8623             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8624         }
8625       }
8626     }
8627   }
8628 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_small_kernel)8629   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
8630     TEST_REQUIRES_X86_SSE2;
8631     for (uint32_t n = 5; n < 8; n++) {
8632       for (size_t k = 1; k <= 40; k += 9) {
8633         GemmMicrokernelTester()
8634           .mr(2)
8635           .nr(4)
8636           .kr(2)
8637           .sr(1)
8638           .m(2)
8639           .n(n)
8640           .k(k)
8641           .ks(3)
8642           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8643       }
8644     }
8645   }
8646 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_small_kernel)8647   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
8648     TEST_REQUIRES_X86_SSE2;
8649     for (uint32_t n = 8; n <= 12; n += 4) {
8650       for (size_t k = 1; k <= 40; k += 9) {
8651         GemmMicrokernelTester()
8652           .mr(2)
8653           .nr(4)
8654           .kr(2)
8655           .sr(1)
8656           .m(2)
8657           .n(n)
8658           .k(k)
8659           .ks(3)
8660           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8661       }
8662     }
8663   }
8664 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)8665   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
8666     TEST_REQUIRES_X86_SSE2;
8667     for (size_t k = 1; k <= 40; k += 9) {
8668       for (uint32_t n = 1; n <= 4; n++) {
8669         for (uint32_t m = 1; m <= 2; m++) {
8670           GemmMicrokernelTester()
8671             .mr(2)
8672             .nr(4)
8673             .kr(2)
8674             .sr(1)
8675             .m(m)
8676             .n(n)
8677             .k(k)
8678             .cm_stride(7)
8679             .iterations(1)
8680             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8681         }
8682       }
8683     }
8684   }
8685 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,a_offset)8686   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
8687     TEST_REQUIRES_X86_SSE2;
8688     for (size_t k = 1; k <= 40; k += 9) {
8689       GemmMicrokernelTester()
8690         .mr(2)
8691         .nr(4)
8692         .kr(2)
8693         .sr(1)
8694         .m(2)
8695         .n(4)
8696         .k(k)
8697         .ks(3)
8698         .a_offset(83)
8699         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8700     }
8701   }
8702 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,zero)8703   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
8704     TEST_REQUIRES_X86_SSE2;
8705     for (size_t k = 1; k <= 40; k += 9) {
8706       for (uint32_t mz = 0; mz < 2; mz++) {
8707         GemmMicrokernelTester()
8708           .mr(2)
8709           .nr(4)
8710           .kr(2)
8711           .sr(1)
8712           .m(2)
8713           .n(4)
8714           .k(k)
8715           .ks(3)
8716           .a_offset(83)
8717           .zero_index(mz)
8718           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8719       }
8720     }
8721   }
8722 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)8723   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
8724     TEST_REQUIRES_X86_SSE2;
8725     GemmMicrokernelTester()
8726       .mr(2)
8727       .nr(4)
8728       .kr(2)
8729       .sr(1)
8730       .m(2)
8731       .n(4)
8732       .k(8)
8733       .qmin(128)
8734       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8735   }
8736 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)8737   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
8738     TEST_REQUIRES_X86_SSE2;
8739     GemmMicrokernelTester()
8740       .mr(2)
8741       .nr(4)
8742       .kr(2)
8743       .sr(1)
8744       .m(2)
8745       .n(4)
8746       .k(8)
8747       .qmax(128)
8748       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8749   }
8750 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)8751   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
8752     TEST_REQUIRES_X86_SSE2;
8753     GemmMicrokernelTester()
8754       .mr(2)
8755       .nr(4)
8756       .kr(2)
8757       .sr(1)
8758       .m(2)
8759       .n(4)
8760       .k(8)
8761       .cm_stride(7)
8762       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8763   }
8764 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_a_zero_point)8765   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_a_zero_point) {
8766     TEST_REQUIRES_X86_SSE2;
8767     for (size_t k = 1; k <= 40; k += 9) {
8768       GemmMicrokernelTester()
8769         .mr(2)
8770         .nr(4)
8771         .kr(2)
8772         .sr(1)
8773         .m(2)
8774         .n(4)
8775         .k(k)
8776         .a_zero_point(0)
8777         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8778     }
8779   }
8780 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_b_zero_point)8781   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_b_zero_point) {
8782     TEST_REQUIRES_X86_SSE2;
8783     for (size_t k = 1; k <= 40; k += 9) {
8784       GemmMicrokernelTester()
8785         .mr(2)
8786         .nr(4)
8787         .kr(2)
8788         .sr(1)
8789         .m(2)
8790         .n(4)
8791         .k(k)
8792         .b_zero_point(0)
8793         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8794     }
8795   }
8796 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,no_zero_point)8797   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_zero_point) {
8798     TEST_REQUIRES_X86_SSE2;
8799     for (size_t k = 1; k <= 40; k += 9) {
8800       GemmMicrokernelTester()
8801         .mr(2)
8802         .nr(4)
8803         .kr(2)
8804         .sr(1)
8805         .m(2)
8806         .n(4)
8807         .k(k)
8808         .a_zero_point(0)
8809         .b_zero_point(0)
8810         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8811     }
8812   }
8813 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8814 
8815 
8816 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)8817   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
8818     TEST_REQUIRES_X86_SSE41;
8819     GemmMicrokernelTester()
8820       .mr(2)
8821       .nr(4)
8822       .kr(2)
8823       .sr(1)
8824       .m(2)
8825       .n(4)
8826       .k(8)
8827       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8828   }
8829 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)8830   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
8831     TEST_REQUIRES_X86_SSE41;
8832     GemmMicrokernelTester()
8833       .mr(2)
8834       .nr(4)
8835       .kr(2)
8836       .sr(1)
8837       .m(2)
8838       .n(4)
8839       .k(8)
8840       .cn_stride(7)
8841       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8842   }
8843 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)8844   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
8845     TEST_REQUIRES_X86_SSE41;
8846     for (uint32_t n = 1; n <= 4; n++) {
8847       for (uint32_t m = 1; m <= 2; m++) {
8848         GemmMicrokernelTester()
8849           .mr(2)
8850           .nr(4)
8851           .kr(2)
8852           .sr(1)
8853           .m(m)
8854           .n(n)
8855           .k(8)
8856           .iterations(1)
8857           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8858       }
8859     }
8860   }
8861 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)8862   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8863     TEST_REQUIRES_X86_SSE41;
8864     for (uint32_t m = 1; m <= 2; m++) {
8865       GemmMicrokernelTester()
8866         .mr(2)
8867         .nr(4)
8868         .kr(2)
8869         .sr(1)
8870         .m(m)
8871         .n(4)
8872         .k(8)
8873         .iterations(1)
8874         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8875     }
8876   }
8877 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)8878   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8879     TEST_REQUIRES_X86_SSE41;
8880     for (uint32_t n = 1; n <= 4; n++) {
8881       GemmMicrokernelTester()
8882         .mr(2)
8883         .nr(4)
8884         .kr(2)
8885         .sr(1)
8886         .m(2)
8887         .n(n)
8888         .k(8)
8889         .iterations(1)
8890         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8891     }
8892   }
8893 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)8894   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
8895     TEST_REQUIRES_X86_SSE41;
8896     for (size_t k = 1; k < 8; k++) {
8897       GemmMicrokernelTester()
8898         .mr(2)
8899         .nr(4)
8900         .kr(2)
8901         .sr(1)
8902         .m(2)
8903         .n(4)
8904         .k(k)
8905         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8906     }
8907   }
8908 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)8909   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
8910     TEST_REQUIRES_X86_SSE41;
8911     for (size_t k = 1; k < 8; k++) {
8912       for (uint32_t n = 1; n <= 4; n++) {
8913         for (uint32_t m = 1; m <= 2; m++) {
8914           GemmMicrokernelTester()
8915             .mr(2)
8916             .nr(4)
8917             .kr(2)
8918             .sr(1)
8919             .m(m)
8920             .n(n)
8921             .k(k)
8922             .iterations(1)
8923             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8924         }
8925       }
8926     }
8927   }
8928 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)8929   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
8930     TEST_REQUIRES_X86_SSE41;
8931     for (size_t k = 9; k < 16; k++) {
8932       GemmMicrokernelTester()
8933         .mr(2)
8934         .nr(4)
8935         .kr(2)
8936         .sr(1)
8937         .m(2)
8938         .n(4)
8939         .k(k)
8940         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8941     }
8942   }
8943 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)8944   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
8945     TEST_REQUIRES_X86_SSE41;
8946     for (size_t k = 9; k < 16; k++) {
8947       for (uint32_t n = 1; n <= 4; n++) {
8948         for (uint32_t m = 1; m <= 2; m++) {
8949           GemmMicrokernelTester()
8950             .mr(2)
8951             .nr(4)
8952             .kr(2)
8953             .sr(1)
8954             .m(m)
8955             .n(n)
8956             .k(k)
8957             .iterations(1)
8958             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8959         }
8960       }
8961     }
8962   }
8963 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)8964   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
8965     TEST_REQUIRES_X86_SSE41;
8966     for (size_t k = 16; k <= 80; k += 8) {
8967       GemmMicrokernelTester()
8968         .mr(2)
8969         .nr(4)
8970         .kr(2)
8971         .sr(1)
8972         .m(2)
8973         .n(4)
8974         .k(k)
8975         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8976     }
8977   }
8978 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)8979   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
8980     TEST_REQUIRES_X86_SSE41;
8981     for (size_t k = 16; k <= 80; k += 8) {
8982       for (uint32_t n = 1; n <= 4; n++) {
8983         for (uint32_t m = 1; m <= 2; m++) {
8984           GemmMicrokernelTester()
8985             .mr(2)
8986             .nr(4)
8987             .kr(2)
8988             .sr(1)
8989             .m(m)
8990             .n(n)
8991             .k(k)
8992             .iterations(1)
8993             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8994         }
8995       }
8996     }
8997   }
8998 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)8999   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
9000     TEST_REQUIRES_X86_SSE41;
9001     for (uint32_t n = 5; n < 8; n++) {
9002       for (size_t k = 1; k <= 40; k += 9) {
9003         GemmMicrokernelTester()
9004           .mr(2)
9005           .nr(4)
9006           .kr(2)
9007           .sr(1)
9008           .m(2)
9009           .n(n)
9010           .k(k)
9011           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9012       }
9013     }
9014   }
9015 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)9016   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9017     TEST_REQUIRES_X86_SSE41;
9018     for (uint32_t n = 5; n < 8; n++) {
9019       for (size_t k = 1; k <= 40; k += 9) {
9020         GemmMicrokernelTester()
9021           .mr(2)
9022           .nr(4)
9023           .kr(2)
9024           .sr(1)
9025           .m(2)
9026           .n(n)
9027           .k(k)
9028           .cn_stride(7)
9029           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9030       }
9031     }
9032   }
9033 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)9034   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
9035     TEST_REQUIRES_X86_SSE41;
9036     for (uint32_t n = 5; n < 8; n++) {
9037       for (size_t k = 1; k <= 40; k += 9) {
9038         for (uint32_t m = 1; m <= 2; m++) {
9039           GemmMicrokernelTester()
9040             .mr(2)
9041             .nr(4)
9042             .kr(2)
9043             .sr(1)
9044             .m(m)
9045             .n(n)
9046             .k(k)
9047             .iterations(1)
9048             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9049         }
9050       }
9051     }
9052   }
9053 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)9054   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
9055     TEST_REQUIRES_X86_SSE41;
9056     for (uint32_t n = 8; n <= 12; n += 4) {
9057       for (size_t k = 1; k <= 40; k += 9) {
9058         GemmMicrokernelTester()
9059           .mr(2)
9060           .nr(4)
9061           .kr(2)
9062           .sr(1)
9063           .m(2)
9064           .n(n)
9065           .k(k)
9066           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9067       }
9068     }
9069   }
9070 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)9071   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
9072     TEST_REQUIRES_X86_SSE41;
9073     for (uint32_t n = 8; n <= 12; n += 4) {
9074       for (size_t k = 1; k <= 40; k += 9) {
9075         GemmMicrokernelTester()
9076           .mr(2)
9077           .nr(4)
9078           .kr(2)
9079           .sr(1)
9080           .m(2)
9081           .n(n)
9082           .k(k)
9083           .cn_stride(7)
9084           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9085       }
9086     }
9087   }
9088 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)9089   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
9090     TEST_REQUIRES_X86_SSE41;
9091     for (uint32_t n = 8; n <= 12; n += 4) {
9092       for (size_t k = 1; k <= 40; k += 9) {
9093         for (uint32_t m = 1; m <= 2; m++) {
9094           GemmMicrokernelTester()
9095             .mr(2)
9096             .nr(4)
9097             .kr(2)
9098             .sr(1)
9099             .m(m)
9100             .n(n)
9101             .k(k)
9102             .iterations(1)
9103             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9104         }
9105       }
9106     }
9107   }
9108 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel)9109   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
9110     TEST_REQUIRES_X86_SSE41;
9111     for (size_t k = 1; k <= 40; k += 9) {
9112       GemmMicrokernelTester()
9113         .mr(2)
9114         .nr(4)
9115         .kr(2)
9116         .sr(1)
9117         .m(2)
9118         .n(4)
9119         .k(k)
9120         .ks(3)
9121         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9122     }
9123   }
9124 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel_subtile)9125   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
9126     TEST_REQUIRES_X86_SSE41;
9127     for (size_t k = 1; k <= 40; k += 9) {
9128       for (uint32_t n = 1; n <= 4; n++) {
9129         for (uint32_t m = 1; m <= 2; m++) {
9130           GemmMicrokernelTester()
9131             .mr(2)
9132             .nr(4)
9133             .kr(2)
9134             .sr(1)
9135             .m(m)
9136             .n(n)
9137             .k(k)
9138             .ks(3)
9139             .iterations(1)
9140             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9141         }
9142       }
9143     }
9144   }
9145 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_small_kernel)9146   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
9147     TEST_REQUIRES_X86_SSE41;
9148     for (uint32_t n = 5; n < 8; n++) {
9149       for (size_t k = 1; k <= 40; k += 9) {
9150         GemmMicrokernelTester()
9151           .mr(2)
9152           .nr(4)
9153           .kr(2)
9154           .sr(1)
9155           .m(2)
9156           .n(n)
9157           .k(k)
9158           .ks(3)
9159           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9160       }
9161     }
9162   }
9163 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_small_kernel)9164   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
9165     TEST_REQUIRES_X86_SSE41;
9166     for (uint32_t n = 8; n <= 12; n += 4) {
9167       for (size_t k = 1; k <= 40; k += 9) {
9168         GemmMicrokernelTester()
9169           .mr(2)
9170           .nr(4)
9171           .kr(2)
9172           .sr(1)
9173           .m(2)
9174           .n(n)
9175           .k(k)
9176           .ks(3)
9177           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9178       }
9179     }
9180   }
9181 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)9182   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
9183     TEST_REQUIRES_X86_SSE41;
9184     for (size_t k = 1; k <= 40; k += 9) {
9185       for (uint32_t n = 1; n <= 4; n++) {
9186         for (uint32_t m = 1; m <= 2; m++) {
9187           GemmMicrokernelTester()
9188             .mr(2)
9189             .nr(4)
9190             .kr(2)
9191             .sr(1)
9192             .m(m)
9193             .n(n)
9194             .k(k)
9195             .cm_stride(7)
9196             .iterations(1)
9197             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9198         }
9199       }
9200     }
9201   }
9202 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,a_offset)9203   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
9204     TEST_REQUIRES_X86_SSE41;
9205     for (size_t k = 1; k <= 40; k += 9) {
9206       GemmMicrokernelTester()
9207         .mr(2)
9208         .nr(4)
9209         .kr(2)
9210         .sr(1)
9211         .m(2)
9212         .n(4)
9213         .k(k)
9214         .ks(3)
9215         .a_offset(83)
9216         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9217     }
9218   }
9219 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,zero)9220   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
9221     TEST_REQUIRES_X86_SSE41;
9222     for (size_t k = 1; k <= 40; k += 9) {
9223       for (uint32_t mz = 0; mz < 2; mz++) {
9224         GemmMicrokernelTester()
9225           .mr(2)
9226           .nr(4)
9227           .kr(2)
9228           .sr(1)
9229           .m(2)
9230           .n(4)
9231           .k(k)
9232           .ks(3)
9233           .a_offset(83)
9234           .zero_index(mz)
9235           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9236       }
9237     }
9238   }
9239 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)9240   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
9241     TEST_REQUIRES_X86_SSE41;
9242     GemmMicrokernelTester()
9243       .mr(2)
9244       .nr(4)
9245       .kr(2)
9246       .sr(1)
9247       .m(2)
9248       .n(4)
9249       .k(8)
9250       .qmin(128)
9251       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9252   }
9253 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)9254   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
9255     TEST_REQUIRES_X86_SSE41;
9256     GemmMicrokernelTester()
9257       .mr(2)
9258       .nr(4)
9259       .kr(2)
9260       .sr(1)
9261       .m(2)
9262       .n(4)
9263       .k(8)
9264       .qmax(128)
9265       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9266   }
9267 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)9268   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
9269     TEST_REQUIRES_X86_SSE41;
9270     GemmMicrokernelTester()
9271       .mr(2)
9272       .nr(4)
9273       .kr(2)
9274       .sr(1)
9275       .m(2)
9276       .n(4)
9277       .k(8)
9278       .cm_stride(7)
9279       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9280   }
9281 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_a_zero_point)9282   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_a_zero_point) {
9283     TEST_REQUIRES_X86_SSE41;
9284     for (size_t k = 1; k <= 40; k += 9) {
9285       GemmMicrokernelTester()
9286         .mr(2)
9287         .nr(4)
9288         .kr(2)
9289         .sr(1)
9290         .m(2)
9291         .n(4)
9292         .k(k)
9293         .a_zero_point(0)
9294         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9295     }
9296   }
9297 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_b_zero_point)9298   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_b_zero_point) {
9299     TEST_REQUIRES_X86_SSE41;
9300     for (size_t k = 1; k <= 40; k += 9) {
9301       GemmMicrokernelTester()
9302         .mr(2)
9303         .nr(4)
9304         .kr(2)
9305         .sr(1)
9306         .m(2)
9307         .n(4)
9308         .k(k)
9309         .b_zero_point(0)
9310         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9311     }
9312   }
9313 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,no_zero_point)9314   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_zero_point) {
9315     TEST_REQUIRES_X86_SSE41;
9316     for (size_t k = 1; k <= 40; k += 9) {
9317       GemmMicrokernelTester()
9318         .mr(2)
9319         .nr(4)
9320         .kr(2)
9321         .sr(1)
9322         .m(2)
9323         .n(4)
9324         .k(k)
9325         .a_zero_point(0)
9326         .b_zero_point(0)
9327         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9328     }
9329   }
9330 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9331 
9332 
9333 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)9334   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
9335     TEST_REQUIRES_X86_SSE2;
9336     GemmMicrokernelTester()
9337       .mr(4)
9338       .nr(4)
9339       .kr(2)
9340       .sr(1)
9341       .m(4)
9342       .n(4)
9343       .k(8)
9344       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9345   }
9346 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)9347   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
9348     TEST_REQUIRES_X86_SSE2;
9349     GemmMicrokernelTester()
9350       .mr(4)
9351       .nr(4)
9352       .kr(2)
9353       .sr(1)
9354       .m(4)
9355       .n(4)
9356       .k(8)
9357       .cn_stride(7)
9358       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9359   }
9360 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)9361   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
9362     TEST_REQUIRES_X86_SSE2;
9363     for (uint32_t n = 1; n <= 4; n++) {
9364       for (uint32_t m = 1; m <= 4; m++) {
9365         GemmMicrokernelTester()
9366           .mr(4)
9367           .nr(4)
9368           .kr(2)
9369           .sr(1)
9370           .m(m)
9371           .n(n)
9372           .k(8)
9373           .iterations(1)
9374           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9375       }
9376     }
9377   }
9378 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)9379   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
9380     TEST_REQUIRES_X86_SSE2;
9381     for (uint32_t m = 1; m <= 4; m++) {
9382       GemmMicrokernelTester()
9383         .mr(4)
9384         .nr(4)
9385         .kr(2)
9386         .sr(1)
9387         .m(m)
9388         .n(4)
9389         .k(8)
9390         .iterations(1)
9391         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9392     }
9393   }
9394 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)9395   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
9396     TEST_REQUIRES_X86_SSE2;
9397     for (uint32_t n = 1; n <= 4; n++) {
9398       GemmMicrokernelTester()
9399         .mr(4)
9400         .nr(4)
9401         .kr(2)
9402         .sr(1)
9403         .m(4)
9404         .n(n)
9405         .k(8)
9406         .iterations(1)
9407         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9408     }
9409   }
9410 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)9411   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
9412     TEST_REQUIRES_X86_SSE2;
9413     for (size_t k = 1; k < 8; k++) {
9414       GemmMicrokernelTester()
9415         .mr(4)
9416         .nr(4)
9417         .kr(2)
9418         .sr(1)
9419         .m(4)
9420         .n(4)
9421         .k(k)
9422         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9423     }
9424   }
9425 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)9426   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
9427     TEST_REQUIRES_X86_SSE2;
9428     for (size_t k = 1; k < 8; k++) {
9429       for (uint32_t n = 1; n <= 4; n++) {
9430         for (uint32_t m = 1; m <= 4; m++) {
9431           GemmMicrokernelTester()
9432             .mr(4)
9433             .nr(4)
9434             .kr(2)
9435             .sr(1)
9436             .m(m)
9437             .n(n)
9438             .k(k)
9439             .iterations(1)
9440             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9441         }
9442       }
9443     }
9444   }
9445 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)9446   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
9447     TEST_REQUIRES_X86_SSE2;
9448     for (size_t k = 9; k < 16; k++) {
9449       GemmMicrokernelTester()
9450         .mr(4)
9451         .nr(4)
9452         .kr(2)
9453         .sr(1)
9454         .m(4)
9455         .n(4)
9456         .k(k)
9457         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9458     }
9459   }
9460 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)9461   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
9462     TEST_REQUIRES_X86_SSE2;
9463     for (size_t k = 9; k < 16; k++) {
9464       for (uint32_t n = 1; n <= 4; n++) {
9465         for (uint32_t m = 1; m <= 4; m++) {
9466           GemmMicrokernelTester()
9467             .mr(4)
9468             .nr(4)
9469             .kr(2)
9470             .sr(1)
9471             .m(m)
9472             .n(n)
9473             .k(k)
9474             .iterations(1)
9475             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9476         }
9477       }
9478     }
9479   }
9480 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)9481   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
9482     TEST_REQUIRES_X86_SSE2;
9483     for (size_t k = 16; k <= 80; k += 8) {
9484       GemmMicrokernelTester()
9485         .mr(4)
9486         .nr(4)
9487         .kr(2)
9488         .sr(1)
9489         .m(4)
9490         .n(4)
9491         .k(k)
9492         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9493     }
9494   }
9495 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)9496   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
9497     TEST_REQUIRES_X86_SSE2;
9498     for (size_t k = 16; k <= 80; k += 8) {
9499       for (uint32_t n = 1; n <= 4; n++) {
9500         for (uint32_t m = 1; m <= 4; m++) {
9501           GemmMicrokernelTester()
9502             .mr(4)
9503             .nr(4)
9504             .kr(2)
9505             .sr(1)
9506             .m(m)
9507             .n(n)
9508             .k(k)
9509             .iterations(1)
9510             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9511         }
9512       }
9513     }
9514   }
9515 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)9516   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
9517     TEST_REQUIRES_X86_SSE2;
9518     for (uint32_t n = 5; n < 8; n++) {
9519       for (size_t k = 1; k <= 40; k += 9) {
9520         GemmMicrokernelTester()
9521           .mr(4)
9522           .nr(4)
9523           .kr(2)
9524           .sr(1)
9525           .m(4)
9526           .n(n)
9527           .k(k)
9528           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9529       }
9530     }
9531   }
9532 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)9533   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
9534     TEST_REQUIRES_X86_SSE2;
9535     for (uint32_t n = 5; n < 8; n++) {
9536       for (size_t k = 1; k <= 40; k += 9) {
9537         GemmMicrokernelTester()
9538           .mr(4)
9539           .nr(4)
9540           .kr(2)
9541           .sr(1)
9542           .m(4)
9543           .n(n)
9544           .k(k)
9545           .cn_stride(7)
9546           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9547       }
9548     }
9549   }
9550 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)9551   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
9552     TEST_REQUIRES_X86_SSE2;
9553     for (uint32_t n = 5; n < 8; n++) {
9554       for (size_t k = 1; k <= 40; k += 9) {
9555         for (uint32_t m = 1; m <= 4; m++) {
9556           GemmMicrokernelTester()
9557             .mr(4)
9558             .nr(4)
9559             .kr(2)
9560             .sr(1)
9561             .m(m)
9562             .n(n)
9563             .k(k)
9564             .iterations(1)
9565             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9566         }
9567       }
9568     }
9569   }
9570 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)9571   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
9572     TEST_REQUIRES_X86_SSE2;
9573     for (uint32_t n = 8; n <= 12; n += 4) {
9574       for (size_t k = 1; k <= 40; k += 9) {
9575         GemmMicrokernelTester()
9576           .mr(4)
9577           .nr(4)
9578           .kr(2)
9579           .sr(1)
9580           .m(4)
9581           .n(n)
9582           .k(k)
9583           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9584       }
9585     }
9586   }
9587 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)9588   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
9589     TEST_REQUIRES_X86_SSE2;
9590     for (uint32_t n = 8; n <= 12; n += 4) {
9591       for (size_t k = 1; k <= 40; k += 9) {
9592         GemmMicrokernelTester()
9593           .mr(4)
9594           .nr(4)
9595           .kr(2)
9596           .sr(1)
9597           .m(4)
9598           .n(n)
9599           .k(k)
9600           .cn_stride(7)
9601           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9602       }
9603     }
9604   }
9605 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)9606   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
9607     TEST_REQUIRES_X86_SSE2;
9608     for (uint32_t n = 8; n <= 12; n += 4) {
9609       for (size_t k = 1; k <= 40; k += 9) {
9610         for (uint32_t m = 1; m <= 4; m++) {
9611           GemmMicrokernelTester()
9612             .mr(4)
9613             .nr(4)
9614             .kr(2)
9615             .sr(1)
9616             .m(m)
9617             .n(n)
9618             .k(k)
9619             .iterations(1)
9620             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9621         }
9622       }
9623     }
9624   }
9625 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)9626   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
9627     TEST_REQUIRES_X86_SSE2;
9628     for (size_t k = 1; k <= 40; k += 9) {
9629       GemmMicrokernelTester()
9630         .mr(4)
9631         .nr(4)
9632         .kr(2)
9633         .sr(1)
9634         .m(4)
9635         .n(4)
9636         .k(k)
9637         .ks(3)
9638         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9639     }
9640   }
9641 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)9642   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
9643     TEST_REQUIRES_X86_SSE2;
9644     for (size_t k = 1; k <= 40; k += 9) {
9645       for (uint32_t n = 1; n <= 4; n++) {
9646         for (uint32_t m = 1; m <= 4; m++) {
9647           GemmMicrokernelTester()
9648             .mr(4)
9649             .nr(4)
9650             .kr(2)
9651             .sr(1)
9652             .m(m)
9653             .n(n)
9654             .k(k)
9655             .ks(3)
9656             .iterations(1)
9657             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9658         }
9659       }
9660     }
9661   }
9662 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)9663   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
9664     TEST_REQUIRES_X86_SSE2;
9665     for (uint32_t n = 5; n < 8; n++) {
9666       for (size_t k = 1; k <= 40; k += 9) {
9667         GemmMicrokernelTester()
9668           .mr(4)
9669           .nr(4)
9670           .kr(2)
9671           .sr(1)
9672           .m(4)
9673           .n(n)
9674           .k(k)
9675           .ks(3)
9676           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9677       }
9678     }
9679   }
9680 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)9681   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
9682     TEST_REQUIRES_X86_SSE2;
9683     for (uint32_t n = 8; n <= 12; n += 4) {
9684       for (size_t k = 1; k <= 40; k += 9) {
9685         GemmMicrokernelTester()
9686           .mr(4)
9687           .nr(4)
9688           .kr(2)
9689           .sr(1)
9690           .m(4)
9691           .n(n)
9692           .k(k)
9693           .ks(3)
9694           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9695       }
9696     }
9697   }
9698 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)9699   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
9700     TEST_REQUIRES_X86_SSE2;
9701     for (size_t k = 1; k <= 40; k += 9) {
9702       for (uint32_t n = 1; n <= 4; n++) {
9703         for (uint32_t m = 1; m <= 4; m++) {
9704           GemmMicrokernelTester()
9705             .mr(4)
9706             .nr(4)
9707             .kr(2)
9708             .sr(1)
9709             .m(m)
9710             .n(n)
9711             .k(k)
9712             .cm_stride(7)
9713             .iterations(1)
9714             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9715         }
9716       }
9717     }
9718   }
9719 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)9720   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
9721     TEST_REQUIRES_X86_SSE2;
9722     for (size_t k = 1; k <= 40; k += 9) {
9723       GemmMicrokernelTester()
9724         .mr(4)
9725         .nr(4)
9726         .kr(2)
9727         .sr(1)
9728         .m(4)
9729         .n(4)
9730         .k(k)
9731         .ks(3)
9732         .a_offset(163)
9733         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9734     }
9735   }
9736 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)9737   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
9738     TEST_REQUIRES_X86_SSE2;
9739     for (size_t k = 1; k <= 40; k += 9) {
9740       for (uint32_t mz = 0; mz < 4; mz++) {
9741         GemmMicrokernelTester()
9742           .mr(4)
9743           .nr(4)
9744           .kr(2)
9745           .sr(1)
9746           .m(4)
9747           .n(4)
9748           .k(k)
9749           .ks(3)
9750           .a_offset(163)
9751           .zero_index(mz)
9752           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9753       }
9754     }
9755   }
9756 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)9757   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
9758     TEST_REQUIRES_X86_SSE2;
9759     GemmMicrokernelTester()
9760       .mr(4)
9761       .nr(4)
9762       .kr(2)
9763       .sr(1)
9764       .m(4)
9765       .n(4)
9766       .k(8)
9767       .qmin(128)
9768       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9769   }
9770 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)9771   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
9772     TEST_REQUIRES_X86_SSE2;
9773     GemmMicrokernelTester()
9774       .mr(4)
9775       .nr(4)
9776       .kr(2)
9777       .sr(1)
9778       .m(4)
9779       .n(4)
9780       .k(8)
9781       .qmax(128)
9782       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9783   }
9784 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)9785   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
9786     TEST_REQUIRES_X86_SSE2;
9787     GemmMicrokernelTester()
9788       .mr(4)
9789       .nr(4)
9790       .kr(2)
9791       .sr(1)
9792       .m(4)
9793       .n(4)
9794       .k(8)
9795       .cm_stride(7)
9796       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9797   }
9798 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_a_zero_point)9799   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_a_zero_point) {
9800     TEST_REQUIRES_X86_SSE2;
9801     for (size_t k = 1; k <= 40; k += 9) {
9802       GemmMicrokernelTester()
9803         .mr(4)
9804         .nr(4)
9805         .kr(2)
9806         .sr(1)
9807         .m(4)
9808         .n(4)
9809         .k(k)
9810         .a_zero_point(0)
9811         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9812     }
9813   }
9814 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_b_zero_point)9815   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_b_zero_point) {
9816     TEST_REQUIRES_X86_SSE2;
9817     for (size_t k = 1; k <= 40; k += 9) {
9818       GemmMicrokernelTester()
9819         .mr(4)
9820         .nr(4)
9821         .kr(2)
9822         .sr(1)
9823         .m(4)
9824         .n(4)
9825         .k(k)
9826         .b_zero_point(0)
9827         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9828     }
9829   }
9830 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,no_zero_point)9831   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_zero_point) {
9832     TEST_REQUIRES_X86_SSE2;
9833     for (size_t k = 1; k <= 40; k += 9) {
9834       GemmMicrokernelTester()
9835         .mr(4)
9836         .nr(4)
9837         .kr(2)
9838         .sr(1)
9839         .m(4)
9840         .n(4)
9841         .k(k)
9842         .a_zero_point(0)
9843         .b_zero_point(0)
9844         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9845     }
9846   }
9847 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9848 
9849 
9850 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8)9851   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
9852     TEST_REQUIRES_X86_AVX;
9853     GemmMicrokernelTester()
9854       .mr(1)
9855       .nr(4)
9856       .kr(2)
9857       .sr(1)
9858       .m(1)
9859       .n(4)
9860       .k(8)
9861       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9862   }
9863 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cn)9864   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
9865     TEST_REQUIRES_X86_AVX;
9866     GemmMicrokernelTester()
9867       .mr(1)
9868       .nr(4)
9869       .kr(2)
9870       .sr(1)
9871       .m(1)
9872       .n(4)
9873       .k(8)
9874       .cn_stride(7)
9875       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9876   }
9877 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile)9878   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
9879     TEST_REQUIRES_X86_AVX;
9880     for (uint32_t n = 1; n <= 4; n++) {
9881       for (uint32_t m = 1; m <= 1; m++) {
9882         GemmMicrokernelTester()
9883           .mr(1)
9884           .nr(4)
9885           .kr(2)
9886           .sr(1)
9887           .m(m)
9888           .n(n)
9889           .k(8)
9890           .iterations(1)
9891           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9892       }
9893     }
9894   }
9895 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_m)9896   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
9897     TEST_REQUIRES_X86_AVX;
9898     for (uint32_t m = 1; m <= 1; m++) {
9899       GemmMicrokernelTester()
9900         .mr(1)
9901         .nr(4)
9902         .kr(2)
9903         .sr(1)
9904         .m(m)
9905         .n(4)
9906         .k(8)
9907         .iterations(1)
9908         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9909     }
9910   }
9911 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_n)9912   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
9913     TEST_REQUIRES_X86_AVX;
9914     for (uint32_t n = 1; n <= 4; n++) {
9915       GemmMicrokernelTester()
9916         .mr(1)
9917         .nr(4)
9918         .kr(2)
9919         .sr(1)
9920         .m(1)
9921         .n(n)
9922         .k(8)
9923         .iterations(1)
9924         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9925     }
9926   }
9927 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8)9928   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
9929     TEST_REQUIRES_X86_AVX;
9930     for (size_t k = 1; k < 8; k++) {
9931       GemmMicrokernelTester()
9932         .mr(1)
9933         .nr(4)
9934         .kr(2)
9935         .sr(1)
9936         .m(1)
9937         .n(4)
9938         .k(k)
9939         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9940     }
9941   }
9942 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_subtile)9943   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
9944     TEST_REQUIRES_X86_AVX;
9945     for (size_t k = 1; k < 8; k++) {
9946       for (uint32_t n = 1; n <= 4; n++) {
9947         for (uint32_t m = 1; m <= 1; m++) {
9948           GemmMicrokernelTester()
9949             .mr(1)
9950             .nr(4)
9951             .kr(2)
9952             .sr(1)
9953             .m(m)
9954             .n(n)
9955             .k(k)
9956             .iterations(1)
9957             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9958         }
9959       }
9960     }
9961   }
9962 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8)9963   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
9964     TEST_REQUIRES_X86_AVX;
9965     for (size_t k = 9; k < 16; k++) {
9966       GemmMicrokernelTester()
9967         .mr(1)
9968         .nr(4)
9969         .kr(2)
9970         .sr(1)
9971         .m(1)
9972         .n(4)
9973         .k(k)
9974         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9975     }
9976   }
9977 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_subtile)9978   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
9979     TEST_REQUIRES_X86_AVX;
9980     for (size_t k = 9; k < 16; k++) {
9981       for (uint32_t n = 1; n <= 4; n++) {
9982         for (uint32_t m = 1; m <= 1; m++) {
9983           GemmMicrokernelTester()
9984             .mr(1)
9985             .nr(4)
9986             .kr(2)
9987             .sr(1)
9988             .m(m)
9989             .n(n)
9990             .k(k)
9991             .iterations(1)
9992             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9993         }
9994       }
9995     }
9996   }
9997 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8)9998   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
9999     TEST_REQUIRES_X86_AVX;
10000     for (size_t k = 16; k <= 80; k += 8) {
10001       GemmMicrokernelTester()
10002         .mr(1)
10003         .nr(4)
10004         .kr(2)
10005         .sr(1)
10006         .m(1)
10007         .n(4)
10008         .k(k)
10009         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10010     }
10011   }
10012 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_subtile)10013   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
10014     TEST_REQUIRES_X86_AVX;
10015     for (size_t k = 16; k <= 80; k += 8) {
10016       for (uint32_t n = 1; n <= 4; n++) {
10017         for (uint32_t m = 1; m <= 1; m++) {
10018           GemmMicrokernelTester()
10019             .mr(1)
10020             .nr(4)
10021             .kr(2)
10022             .sr(1)
10023             .m(m)
10024             .n(n)
10025             .k(k)
10026             .iterations(1)
10027             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10028         }
10029       }
10030     }
10031   }
10032 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4)10033   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
10034     TEST_REQUIRES_X86_AVX;
10035     for (uint32_t n = 5; n < 8; n++) {
10036       for (size_t k = 1; k <= 40; k += 9) {
10037         GemmMicrokernelTester()
10038           .mr(1)
10039           .nr(4)
10040           .kr(2)
10041           .sr(1)
10042           .m(1)
10043           .n(n)
10044           .k(k)
10045           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10046       }
10047     }
10048   }
10049 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_cn)10050   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
10051     TEST_REQUIRES_X86_AVX;
10052     for (uint32_t n = 5; n < 8; n++) {
10053       for (size_t k = 1; k <= 40; k += 9) {
10054         GemmMicrokernelTester()
10055           .mr(1)
10056           .nr(4)
10057           .kr(2)
10058           .sr(1)
10059           .m(1)
10060           .n(n)
10061           .k(k)
10062           .cn_stride(7)
10063           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10064       }
10065     }
10066   }
10067 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_subtile)10068   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
10069     TEST_REQUIRES_X86_AVX;
10070     for (uint32_t n = 5; n < 8; n++) {
10071       for (size_t k = 1; k <= 40; k += 9) {
10072         for (uint32_t m = 1; m <= 1; m++) {
10073           GemmMicrokernelTester()
10074             .mr(1)
10075             .nr(4)
10076             .kr(2)
10077             .sr(1)
10078             .m(m)
10079             .n(n)
10080             .k(k)
10081             .iterations(1)
10082             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10083         }
10084       }
10085     }
10086   }
10087 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4)10088   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
10089     TEST_REQUIRES_X86_AVX;
10090     for (uint32_t n = 8; n <= 12; n += 4) {
10091       for (size_t k = 1; k <= 40; k += 9) {
10092         GemmMicrokernelTester()
10093           .mr(1)
10094           .nr(4)
10095           .kr(2)
10096           .sr(1)
10097           .m(1)
10098           .n(n)
10099           .k(k)
10100           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10101       }
10102     }
10103   }
10104 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_cn)10105   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
10106     TEST_REQUIRES_X86_AVX;
10107     for (uint32_t n = 8; n <= 12; n += 4) {
10108       for (size_t k = 1; k <= 40; k += 9) {
10109         GemmMicrokernelTester()
10110           .mr(1)
10111           .nr(4)
10112           .kr(2)
10113           .sr(1)
10114           .m(1)
10115           .n(n)
10116           .k(k)
10117           .cn_stride(7)
10118           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10119       }
10120     }
10121   }
10122 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_subtile)10123   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
10124     TEST_REQUIRES_X86_AVX;
10125     for (uint32_t n = 8; n <= 12; n += 4) {
10126       for (size_t k = 1; k <= 40; k += 9) {
10127         for (uint32_t m = 1; m <= 1; m++) {
10128           GemmMicrokernelTester()
10129             .mr(1)
10130             .nr(4)
10131             .kr(2)
10132             .sr(1)
10133             .m(m)
10134             .n(n)
10135             .k(k)
10136             .iterations(1)
10137             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10138         }
10139       }
10140     }
10141   }
10142 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel)10143   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel) {
10144     TEST_REQUIRES_X86_AVX;
10145     for (size_t k = 1; k <= 40; k += 9) {
10146       GemmMicrokernelTester()
10147         .mr(1)
10148         .nr(4)
10149         .kr(2)
10150         .sr(1)
10151         .m(1)
10152         .n(4)
10153         .k(k)
10154         .ks(3)
10155         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10156     }
10157   }
10158 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel_subtile)10159   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel_subtile) {
10160     TEST_REQUIRES_X86_AVX;
10161     for (size_t k = 1; k <= 40; k += 9) {
10162       for (uint32_t n = 1; n <= 4; n++) {
10163         for (uint32_t m = 1; m <= 1; m++) {
10164           GemmMicrokernelTester()
10165             .mr(1)
10166             .nr(4)
10167             .kr(2)
10168             .sr(1)
10169             .m(m)
10170             .n(n)
10171             .k(k)
10172             .ks(3)
10173             .iterations(1)
10174             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10175         }
10176       }
10177     }
10178   }
10179 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_small_kernel)10180   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
10181     TEST_REQUIRES_X86_AVX;
10182     for (uint32_t n = 5; n < 8; n++) {
10183       for (size_t k = 1; k <= 40; k += 9) {
10184         GemmMicrokernelTester()
10185           .mr(1)
10186           .nr(4)
10187           .kr(2)
10188           .sr(1)
10189           .m(1)
10190           .n(n)
10191           .k(k)
10192           .ks(3)
10193           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10194       }
10195     }
10196   }
10197 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_small_kernel)10198   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_small_kernel) {
10199     TEST_REQUIRES_X86_AVX;
10200     for (uint32_t n = 8; n <= 12; n += 4) {
10201       for (size_t k = 1; k <= 40; k += 9) {
10202         GemmMicrokernelTester()
10203           .mr(1)
10204           .nr(4)
10205           .kr(2)
10206           .sr(1)
10207           .m(1)
10208           .n(n)
10209           .k(k)
10210           .ks(3)
10211           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10212       }
10213     }
10214   }
10215 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm_subtile)10216   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
10217     TEST_REQUIRES_X86_AVX;
10218     for (size_t k = 1; k <= 40; k += 9) {
10219       for (uint32_t n = 1; n <= 4; n++) {
10220         for (uint32_t m = 1; m <= 1; m++) {
10221           GemmMicrokernelTester()
10222             .mr(1)
10223             .nr(4)
10224             .kr(2)
10225             .sr(1)
10226             .m(m)
10227             .n(n)
10228             .k(k)
10229             .cm_stride(7)
10230             .iterations(1)
10231             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10232         }
10233       }
10234     }
10235   }
10236 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,a_offset)10237   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, a_offset) {
10238     TEST_REQUIRES_X86_AVX;
10239     for (size_t k = 1; k <= 40; k += 9) {
10240       GemmMicrokernelTester()
10241         .mr(1)
10242         .nr(4)
10243         .kr(2)
10244         .sr(1)
10245         .m(1)
10246         .n(4)
10247         .k(k)
10248         .ks(3)
10249         .a_offset(43)
10250         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10251     }
10252   }
10253 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,zero)10254   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, zero) {
10255     TEST_REQUIRES_X86_AVX;
10256     for (size_t k = 1; k <= 40; k += 9) {
10257       for (uint32_t mz = 0; mz < 1; mz++) {
10258         GemmMicrokernelTester()
10259           .mr(1)
10260           .nr(4)
10261           .kr(2)
10262           .sr(1)
10263           .m(1)
10264           .n(4)
10265           .k(k)
10266           .ks(3)
10267           .a_offset(43)
10268           .zero_index(mz)
10269           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10270       }
10271     }
10272   }
10273 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmin)10274   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
10275     TEST_REQUIRES_X86_AVX;
10276     GemmMicrokernelTester()
10277       .mr(1)
10278       .nr(4)
10279       .kr(2)
10280       .sr(1)
10281       .m(1)
10282       .n(4)
10283       .k(8)
10284       .qmin(128)
10285       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10286   }
10287 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmax)10288   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
10289     TEST_REQUIRES_X86_AVX;
10290     GemmMicrokernelTester()
10291       .mr(1)
10292       .nr(4)
10293       .kr(2)
10294       .sr(1)
10295       .m(1)
10296       .n(4)
10297       .k(8)
10298       .qmax(128)
10299       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10300   }
10301 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm)10302   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
10303     TEST_REQUIRES_X86_AVX;
10304     GemmMicrokernelTester()
10305       .mr(1)
10306       .nr(4)
10307       .kr(2)
10308       .sr(1)
10309       .m(1)
10310       .n(4)
10311       .k(8)
10312       .cm_stride(7)
10313       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10314   }
10315 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_a_zero_point)10316   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_a_zero_point) {
10317     TEST_REQUIRES_X86_AVX;
10318     for (size_t k = 1; k <= 40; k += 9) {
10319       GemmMicrokernelTester()
10320         .mr(1)
10321         .nr(4)
10322         .kr(2)
10323         .sr(1)
10324         .m(1)
10325         .n(4)
10326         .k(k)
10327         .a_zero_point(0)
10328         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10329     }
10330   }
10331 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_b_zero_point)10332   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_b_zero_point) {
10333     TEST_REQUIRES_X86_AVX;
10334     for (size_t k = 1; k <= 40; k += 9) {
10335       GemmMicrokernelTester()
10336         .mr(1)
10337         .nr(4)
10338         .kr(2)
10339         .sr(1)
10340         .m(1)
10341         .n(4)
10342         .k(k)
10343         .b_zero_point(0)
10344         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10345     }
10346   }
10347 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,no_zero_point)10348   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_zero_point) {
10349     TEST_REQUIRES_X86_AVX;
10350     for (size_t k = 1; k <= 40; k += 9) {
10351       GemmMicrokernelTester()
10352         .mr(1)
10353         .nr(4)
10354         .kr(2)
10355         .sr(1)
10356         .m(1)
10357         .n(4)
10358         .k(k)
10359         .a_zero_point(0)
10360         .b_zero_point(0)
10361         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10362     }
10363   }
10364 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10365 
10366 
10367 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8)10368   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
10369     TEST_REQUIRES_X86_AVX;
10370     GemmMicrokernelTester()
10371       .mr(2)
10372       .nr(4)
10373       .kr(2)
10374       .sr(1)
10375       .m(2)
10376       .n(4)
10377       .k(8)
10378       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10379   }
10380 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cn)10381   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
10382     TEST_REQUIRES_X86_AVX;
10383     GemmMicrokernelTester()
10384       .mr(2)
10385       .nr(4)
10386       .kr(2)
10387       .sr(1)
10388       .m(2)
10389       .n(4)
10390       .k(8)
10391       .cn_stride(7)
10392       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10393   }
10394 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile)10395   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
10396     TEST_REQUIRES_X86_AVX;
10397     for (uint32_t n = 1; n <= 4; n++) {
10398       for (uint32_t m = 1; m <= 2; m++) {
10399         GemmMicrokernelTester()
10400           .mr(2)
10401           .nr(4)
10402           .kr(2)
10403           .sr(1)
10404           .m(m)
10405           .n(n)
10406           .k(8)
10407           .iterations(1)
10408           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10409       }
10410     }
10411   }
10412 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_m)10413   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
10414     TEST_REQUIRES_X86_AVX;
10415     for (uint32_t m = 1; m <= 2; m++) {
10416       GemmMicrokernelTester()
10417         .mr(2)
10418         .nr(4)
10419         .kr(2)
10420         .sr(1)
10421         .m(m)
10422         .n(4)
10423         .k(8)
10424         .iterations(1)
10425         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10426     }
10427   }
10428 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_n)10429   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
10430     TEST_REQUIRES_X86_AVX;
10431     for (uint32_t n = 1; n <= 4; n++) {
10432       GemmMicrokernelTester()
10433         .mr(2)
10434         .nr(4)
10435         .kr(2)
10436         .sr(1)
10437         .m(2)
10438         .n(n)
10439         .k(8)
10440         .iterations(1)
10441         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10442     }
10443   }
10444 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8)10445   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
10446     TEST_REQUIRES_X86_AVX;
10447     for (size_t k = 1; k < 8; k++) {
10448       GemmMicrokernelTester()
10449         .mr(2)
10450         .nr(4)
10451         .kr(2)
10452         .sr(1)
10453         .m(2)
10454         .n(4)
10455         .k(k)
10456         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10457     }
10458   }
10459 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_subtile)10460   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
10461     TEST_REQUIRES_X86_AVX;
10462     for (size_t k = 1; k < 8; k++) {
10463       for (uint32_t n = 1; n <= 4; n++) {
10464         for (uint32_t m = 1; m <= 2; m++) {
10465           GemmMicrokernelTester()
10466             .mr(2)
10467             .nr(4)
10468             .kr(2)
10469             .sr(1)
10470             .m(m)
10471             .n(n)
10472             .k(k)
10473             .iterations(1)
10474             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10475         }
10476       }
10477     }
10478   }
10479 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8)10480   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
10481     TEST_REQUIRES_X86_AVX;
10482     for (size_t k = 9; k < 16; k++) {
10483       GemmMicrokernelTester()
10484         .mr(2)
10485         .nr(4)
10486         .kr(2)
10487         .sr(1)
10488         .m(2)
10489         .n(4)
10490         .k(k)
10491         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10492     }
10493   }
10494 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_subtile)10495   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
10496     TEST_REQUIRES_X86_AVX;
10497     for (size_t k = 9; k < 16; k++) {
10498       for (uint32_t n = 1; n <= 4; n++) {
10499         for (uint32_t m = 1; m <= 2; m++) {
10500           GemmMicrokernelTester()
10501             .mr(2)
10502             .nr(4)
10503             .kr(2)
10504             .sr(1)
10505             .m(m)
10506             .n(n)
10507             .k(k)
10508             .iterations(1)
10509             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10510         }
10511       }
10512     }
10513   }
10514 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8)10515   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
10516     TEST_REQUIRES_X86_AVX;
10517     for (size_t k = 16; k <= 80; k += 8) {
10518       GemmMicrokernelTester()
10519         .mr(2)
10520         .nr(4)
10521         .kr(2)
10522         .sr(1)
10523         .m(2)
10524         .n(4)
10525         .k(k)
10526         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10527     }
10528   }
10529 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_subtile)10530   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
10531     TEST_REQUIRES_X86_AVX;
10532     for (size_t k = 16; k <= 80; k += 8) {
10533       for (uint32_t n = 1; n <= 4; n++) {
10534         for (uint32_t m = 1; m <= 2; m++) {
10535           GemmMicrokernelTester()
10536             .mr(2)
10537             .nr(4)
10538             .kr(2)
10539             .sr(1)
10540             .m(m)
10541             .n(n)
10542             .k(k)
10543             .iterations(1)
10544             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10545         }
10546       }
10547     }
10548   }
10549 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4)10550   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
10551     TEST_REQUIRES_X86_AVX;
10552     for (uint32_t n = 5; n < 8; n++) {
10553       for (size_t k = 1; k <= 40; k += 9) {
10554         GemmMicrokernelTester()
10555           .mr(2)
10556           .nr(4)
10557           .kr(2)
10558           .sr(1)
10559           .m(2)
10560           .n(n)
10561           .k(k)
10562           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10563       }
10564     }
10565   }
10566 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_cn)10567   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
10568     TEST_REQUIRES_X86_AVX;
10569     for (uint32_t n = 5; n < 8; n++) {
10570       for (size_t k = 1; k <= 40; k += 9) {
10571         GemmMicrokernelTester()
10572           .mr(2)
10573           .nr(4)
10574           .kr(2)
10575           .sr(1)
10576           .m(2)
10577           .n(n)
10578           .k(k)
10579           .cn_stride(7)
10580           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10581       }
10582     }
10583   }
10584 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_subtile)10585   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
10586     TEST_REQUIRES_X86_AVX;
10587     for (uint32_t n = 5; n < 8; n++) {
10588       for (size_t k = 1; k <= 40; k += 9) {
10589         for (uint32_t m = 1; m <= 2; m++) {
10590           GemmMicrokernelTester()
10591             .mr(2)
10592             .nr(4)
10593             .kr(2)
10594             .sr(1)
10595             .m(m)
10596             .n(n)
10597             .k(k)
10598             .iterations(1)
10599             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10600         }
10601       }
10602     }
10603   }
10604 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4)10605   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
10606     TEST_REQUIRES_X86_AVX;
10607     for (uint32_t n = 8; n <= 12; n += 4) {
10608       for (size_t k = 1; k <= 40; k += 9) {
10609         GemmMicrokernelTester()
10610           .mr(2)
10611           .nr(4)
10612           .kr(2)
10613           .sr(1)
10614           .m(2)
10615           .n(n)
10616           .k(k)
10617           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10618       }
10619     }
10620   }
10621 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_cn)10622   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
10623     TEST_REQUIRES_X86_AVX;
10624     for (uint32_t n = 8; n <= 12; n += 4) {
10625       for (size_t k = 1; k <= 40; k += 9) {
10626         GemmMicrokernelTester()
10627           .mr(2)
10628           .nr(4)
10629           .kr(2)
10630           .sr(1)
10631           .m(2)
10632           .n(n)
10633           .k(k)
10634           .cn_stride(7)
10635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10636       }
10637     }
10638   }
10639 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_subtile)10640   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
10641     TEST_REQUIRES_X86_AVX;
10642     for (uint32_t n = 8; n <= 12; n += 4) {
10643       for (size_t k = 1; k <= 40; k += 9) {
10644         for (uint32_t m = 1; m <= 2; m++) {
10645           GemmMicrokernelTester()
10646             .mr(2)
10647             .nr(4)
10648             .kr(2)
10649             .sr(1)
10650             .m(m)
10651             .n(n)
10652             .k(k)
10653             .iterations(1)
10654             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10655         }
10656       }
10657     }
10658   }
10659 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel)10660   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel) {
10661     TEST_REQUIRES_X86_AVX;
10662     for (size_t k = 1; k <= 40; k += 9) {
10663       GemmMicrokernelTester()
10664         .mr(2)
10665         .nr(4)
10666         .kr(2)
10667         .sr(1)
10668         .m(2)
10669         .n(4)
10670         .k(k)
10671         .ks(3)
10672         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10673     }
10674   }
10675 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel_subtile)10676   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel_subtile) {
10677     TEST_REQUIRES_X86_AVX;
10678     for (size_t k = 1; k <= 40; k += 9) {
10679       for (uint32_t n = 1; n <= 4; n++) {
10680         for (uint32_t m = 1; m <= 2; m++) {
10681           GemmMicrokernelTester()
10682             .mr(2)
10683             .nr(4)
10684             .kr(2)
10685             .sr(1)
10686             .m(m)
10687             .n(n)
10688             .k(k)
10689             .ks(3)
10690             .iterations(1)
10691             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10692         }
10693       }
10694     }
10695   }
10696 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_small_kernel)10697   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
10698     TEST_REQUIRES_X86_AVX;
10699     for (uint32_t n = 5; n < 8; n++) {
10700       for (size_t k = 1; k <= 40; k += 9) {
10701         GemmMicrokernelTester()
10702           .mr(2)
10703           .nr(4)
10704           .kr(2)
10705           .sr(1)
10706           .m(2)
10707           .n(n)
10708           .k(k)
10709           .ks(3)
10710           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10711       }
10712     }
10713   }
10714 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_small_kernel)10715   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_small_kernel) {
10716     TEST_REQUIRES_X86_AVX;
10717     for (uint32_t n = 8; n <= 12; n += 4) {
10718       for (size_t k = 1; k <= 40; k += 9) {
10719         GemmMicrokernelTester()
10720           .mr(2)
10721           .nr(4)
10722           .kr(2)
10723           .sr(1)
10724           .m(2)
10725           .n(n)
10726           .k(k)
10727           .ks(3)
10728           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10729       }
10730     }
10731   }
10732 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm_subtile)10733   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
10734     TEST_REQUIRES_X86_AVX;
10735     for (size_t k = 1; k <= 40; k += 9) {
10736       for (uint32_t n = 1; n <= 4; n++) {
10737         for (uint32_t m = 1; m <= 2; m++) {
10738           GemmMicrokernelTester()
10739             .mr(2)
10740             .nr(4)
10741             .kr(2)
10742             .sr(1)
10743             .m(m)
10744             .n(n)
10745             .k(k)
10746             .cm_stride(7)
10747             .iterations(1)
10748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10749         }
10750       }
10751     }
10752   }
10753 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,a_offset)10754   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, a_offset) {
10755     TEST_REQUIRES_X86_AVX;
10756     for (size_t k = 1; k <= 40; k += 9) {
10757       GemmMicrokernelTester()
10758         .mr(2)
10759         .nr(4)
10760         .kr(2)
10761         .sr(1)
10762         .m(2)
10763         .n(4)
10764         .k(k)
10765         .ks(3)
10766         .a_offset(83)
10767         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10768     }
10769   }
10770 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,zero)10771   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, zero) {
10772     TEST_REQUIRES_X86_AVX;
10773     for (size_t k = 1; k <= 40; k += 9) {
10774       for (uint32_t mz = 0; mz < 2; mz++) {
10775         GemmMicrokernelTester()
10776           .mr(2)
10777           .nr(4)
10778           .kr(2)
10779           .sr(1)
10780           .m(2)
10781           .n(4)
10782           .k(k)
10783           .ks(3)
10784           .a_offset(83)
10785           .zero_index(mz)
10786           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10787       }
10788     }
10789   }
10790 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmin)10791   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
10792     TEST_REQUIRES_X86_AVX;
10793     GemmMicrokernelTester()
10794       .mr(2)
10795       .nr(4)
10796       .kr(2)
10797       .sr(1)
10798       .m(2)
10799       .n(4)
10800       .k(8)
10801       .qmin(128)
10802       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10803   }
10804 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmax)10805   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
10806     TEST_REQUIRES_X86_AVX;
10807     GemmMicrokernelTester()
10808       .mr(2)
10809       .nr(4)
10810       .kr(2)
10811       .sr(1)
10812       .m(2)
10813       .n(4)
10814       .k(8)
10815       .qmax(128)
10816       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10817   }
10818 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm)10819   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
10820     TEST_REQUIRES_X86_AVX;
10821     GemmMicrokernelTester()
10822       .mr(2)
10823       .nr(4)
10824       .kr(2)
10825       .sr(1)
10826       .m(2)
10827       .n(4)
10828       .k(8)
10829       .cm_stride(7)
10830       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10831   }
10832 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_a_zero_point)10833   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_a_zero_point) {
10834     TEST_REQUIRES_X86_AVX;
10835     for (size_t k = 1; k <= 40; k += 9) {
10836       GemmMicrokernelTester()
10837         .mr(2)
10838         .nr(4)
10839         .kr(2)
10840         .sr(1)
10841         .m(2)
10842         .n(4)
10843         .k(k)
10844         .a_zero_point(0)
10845         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10846     }
10847   }
10848 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_b_zero_point)10849   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_b_zero_point) {
10850     TEST_REQUIRES_X86_AVX;
10851     for (size_t k = 1; k <= 40; k += 9) {
10852       GemmMicrokernelTester()
10853         .mr(2)
10854         .nr(4)
10855         .kr(2)
10856         .sr(1)
10857         .m(2)
10858         .n(4)
10859         .k(k)
10860         .b_zero_point(0)
10861         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10862     }
10863   }
10864 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,no_zero_point)10865   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_zero_point) {
10866     TEST_REQUIRES_X86_AVX;
10867     for (size_t k = 1; k <= 40; k += 9) {
10868       GemmMicrokernelTester()
10869         .mr(2)
10870         .nr(4)
10871         .kr(2)
10872         .sr(1)
10873         .m(2)
10874         .n(4)
10875         .k(k)
10876         .a_zero_point(0)
10877         .b_zero_point(0)
10878         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10879     }
10880   }
10881 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10882 
10883 
10884 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)10885   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
10886     TEST_REQUIRES_X86_XOP;
10887     GemmMicrokernelTester()
10888       .mr(3)
10889       .nr(4)
10890       .kr(2)
10891       .sr(1)
10892       .m(3)
10893       .n(4)
10894       .k(8)
10895       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10896   }
10897 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)10898   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
10899     TEST_REQUIRES_X86_XOP;
10900     GemmMicrokernelTester()
10901       .mr(3)
10902       .nr(4)
10903       .kr(2)
10904       .sr(1)
10905       .m(3)
10906       .n(4)
10907       .k(8)
10908       .cn_stride(7)
10909       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10910   }
10911 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)10912   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
10913     TEST_REQUIRES_X86_XOP;
10914     for (uint32_t n = 1; n <= 4; n++) {
10915       for (uint32_t m = 1; m <= 3; m++) {
10916         GemmMicrokernelTester()
10917           .mr(3)
10918           .nr(4)
10919           .kr(2)
10920           .sr(1)
10921           .m(m)
10922           .n(n)
10923           .k(8)
10924           .iterations(1)
10925           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10926       }
10927     }
10928   }
10929 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)10930   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
10931     TEST_REQUIRES_X86_XOP;
10932     for (uint32_t m = 1; m <= 3; m++) {
10933       GemmMicrokernelTester()
10934         .mr(3)
10935         .nr(4)
10936         .kr(2)
10937         .sr(1)
10938         .m(m)
10939         .n(4)
10940         .k(8)
10941         .iterations(1)
10942         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10943     }
10944   }
10945 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)10946   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
10947     TEST_REQUIRES_X86_XOP;
10948     for (uint32_t n = 1; n <= 4; n++) {
10949       GemmMicrokernelTester()
10950         .mr(3)
10951         .nr(4)
10952         .kr(2)
10953         .sr(1)
10954         .m(3)
10955         .n(n)
10956         .k(8)
10957         .iterations(1)
10958         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10959     }
10960   }
10961 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)10962   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
10963     TEST_REQUIRES_X86_XOP;
10964     for (size_t k = 1; k < 8; k++) {
10965       GemmMicrokernelTester()
10966         .mr(3)
10967         .nr(4)
10968         .kr(2)
10969         .sr(1)
10970         .m(3)
10971         .n(4)
10972         .k(k)
10973         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10974     }
10975   }
10976 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)10977   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
10978     TEST_REQUIRES_X86_XOP;
10979     for (size_t k = 1; k < 8; k++) {
10980       for (uint32_t n = 1; n <= 4; n++) {
10981         for (uint32_t m = 1; m <= 3; m++) {
10982           GemmMicrokernelTester()
10983             .mr(3)
10984             .nr(4)
10985             .kr(2)
10986             .sr(1)
10987             .m(m)
10988             .n(n)
10989             .k(k)
10990             .iterations(1)
10991             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
10992         }
10993       }
10994     }
10995   }
10996 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)10997   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
10998     TEST_REQUIRES_X86_XOP;
10999     for (size_t k = 9; k < 16; k++) {
11000       GemmMicrokernelTester()
11001         .mr(3)
11002         .nr(4)
11003         .kr(2)
11004         .sr(1)
11005         .m(3)
11006         .n(4)
11007         .k(k)
11008         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11009     }
11010   }
11011 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)11012   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
11013     TEST_REQUIRES_X86_XOP;
11014     for (size_t k = 9; k < 16; k++) {
11015       for (uint32_t n = 1; n <= 4; n++) {
11016         for (uint32_t m = 1; m <= 3; m++) {
11017           GemmMicrokernelTester()
11018             .mr(3)
11019             .nr(4)
11020             .kr(2)
11021             .sr(1)
11022             .m(m)
11023             .n(n)
11024             .k(k)
11025             .iterations(1)
11026             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11027         }
11028       }
11029     }
11030   }
11031 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)11032   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
11033     TEST_REQUIRES_X86_XOP;
11034     for (size_t k = 16; k <= 80; k += 8) {
11035       GemmMicrokernelTester()
11036         .mr(3)
11037         .nr(4)
11038         .kr(2)
11039         .sr(1)
11040         .m(3)
11041         .n(4)
11042         .k(k)
11043         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11044     }
11045   }
11046 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)11047   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
11048     TEST_REQUIRES_X86_XOP;
11049     for (size_t k = 16; k <= 80; k += 8) {
11050       for (uint32_t n = 1; n <= 4; n++) {
11051         for (uint32_t m = 1; m <= 3; m++) {
11052           GemmMicrokernelTester()
11053             .mr(3)
11054             .nr(4)
11055             .kr(2)
11056             .sr(1)
11057             .m(m)
11058             .n(n)
11059             .k(k)
11060             .iterations(1)
11061             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11062         }
11063       }
11064     }
11065   }
11066 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)11067   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
11068     TEST_REQUIRES_X86_XOP;
11069     for (uint32_t n = 5; n < 8; n++) {
11070       for (size_t k = 1; k <= 40; k += 9) {
11071         GemmMicrokernelTester()
11072           .mr(3)
11073           .nr(4)
11074           .kr(2)
11075           .sr(1)
11076           .m(3)
11077           .n(n)
11078           .k(k)
11079           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11080       }
11081     }
11082   }
11083 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)11084   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
11085     TEST_REQUIRES_X86_XOP;
11086     for (uint32_t n = 5; n < 8; n++) {
11087       for (size_t k = 1; k <= 40; k += 9) {
11088         GemmMicrokernelTester()
11089           .mr(3)
11090           .nr(4)
11091           .kr(2)
11092           .sr(1)
11093           .m(3)
11094           .n(n)
11095           .k(k)
11096           .cn_stride(7)
11097           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11098       }
11099     }
11100   }
11101 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)11102   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
11103     TEST_REQUIRES_X86_XOP;
11104     for (uint32_t n = 5; n < 8; n++) {
11105       for (size_t k = 1; k <= 40; k += 9) {
11106         for (uint32_t m = 1; m <= 3; m++) {
11107           GemmMicrokernelTester()
11108             .mr(3)
11109             .nr(4)
11110             .kr(2)
11111             .sr(1)
11112             .m(m)
11113             .n(n)
11114             .k(k)
11115             .iterations(1)
11116             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11117         }
11118       }
11119     }
11120   }
11121 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)11122   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
11123     TEST_REQUIRES_X86_XOP;
11124     for (uint32_t n = 8; n <= 12; n += 4) {
11125       for (size_t k = 1; k <= 40; k += 9) {
11126         GemmMicrokernelTester()
11127           .mr(3)
11128           .nr(4)
11129           .kr(2)
11130           .sr(1)
11131           .m(3)
11132           .n(n)
11133           .k(k)
11134           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11135       }
11136     }
11137   }
11138 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)11139   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
11140     TEST_REQUIRES_X86_XOP;
11141     for (uint32_t n = 8; n <= 12; n += 4) {
11142       for (size_t k = 1; k <= 40; k += 9) {
11143         GemmMicrokernelTester()
11144           .mr(3)
11145           .nr(4)
11146           .kr(2)
11147           .sr(1)
11148           .m(3)
11149           .n(n)
11150           .k(k)
11151           .cn_stride(7)
11152           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11153       }
11154     }
11155   }
11156 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)11157   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
11158     TEST_REQUIRES_X86_XOP;
11159     for (uint32_t n = 8; n <= 12; n += 4) {
11160       for (size_t k = 1; k <= 40; k += 9) {
11161         for (uint32_t m = 1; m <= 3; m++) {
11162           GemmMicrokernelTester()
11163             .mr(3)
11164             .nr(4)
11165             .kr(2)
11166             .sr(1)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .iterations(1)
11171             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11172         }
11173       }
11174     }
11175   }
11176 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel)11177   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
11178     TEST_REQUIRES_X86_XOP;
11179     for (size_t k = 1; k <= 40; k += 9) {
11180       GemmMicrokernelTester()
11181         .mr(3)
11182         .nr(4)
11183         .kr(2)
11184         .sr(1)
11185         .m(3)
11186         .n(4)
11187         .k(k)
11188         .ks(3)
11189         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11190     }
11191   }
11192 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel_subtile)11193   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
11194     TEST_REQUIRES_X86_XOP;
11195     for (size_t k = 1; k <= 40; k += 9) {
11196       for (uint32_t n = 1; n <= 4; n++) {
11197         for (uint32_t m = 1; m <= 3; m++) {
11198           GemmMicrokernelTester()
11199             .mr(3)
11200             .nr(4)
11201             .kr(2)
11202             .sr(1)
11203             .m(m)
11204             .n(n)
11205             .k(k)
11206             .ks(3)
11207             .iterations(1)
11208             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11209         }
11210       }
11211     }
11212   }
11213 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_small_kernel)11214   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
11215     TEST_REQUIRES_X86_XOP;
11216     for (uint32_t n = 5; n < 8; n++) {
11217       for (size_t k = 1; k <= 40; k += 9) {
11218         GemmMicrokernelTester()
11219           .mr(3)
11220           .nr(4)
11221           .kr(2)
11222           .sr(1)
11223           .m(3)
11224           .n(n)
11225           .k(k)
11226           .ks(3)
11227           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11228       }
11229     }
11230   }
11231 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_small_kernel)11232   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
11233     TEST_REQUIRES_X86_XOP;
11234     for (uint32_t n = 8; n <= 12; n += 4) {
11235       for (size_t k = 1; k <= 40; k += 9) {
11236         GemmMicrokernelTester()
11237           .mr(3)
11238           .nr(4)
11239           .kr(2)
11240           .sr(1)
11241           .m(3)
11242           .n(n)
11243           .k(k)
11244           .ks(3)
11245           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11246       }
11247     }
11248   }
11249 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)11250   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
11251     TEST_REQUIRES_X86_XOP;
11252     for (size_t k = 1; k <= 40; k += 9) {
11253       for (uint32_t n = 1; n <= 4; n++) {
11254         for (uint32_t m = 1; m <= 3; m++) {
11255           GemmMicrokernelTester()
11256             .mr(3)
11257             .nr(4)
11258             .kr(2)
11259             .sr(1)
11260             .m(m)
11261             .n(n)
11262             .k(k)
11263             .cm_stride(7)
11264             .iterations(1)
11265             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11266         }
11267       }
11268     }
11269   }
11270 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,a_offset)11271   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
11272     TEST_REQUIRES_X86_XOP;
11273     for (size_t k = 1; k <= 40; k += 9) {
11274       GemmMicrokernelTester()
11275         .mr(3)
11276         .nr(4)
11277         .kr(2)
11278         .sr(1)
11279         .m(3)
11280         .n(4)
11281         .k(k)
11282         .ks(3)
11283         .a_offset(127)
11284         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11285     }
11286   }
11287 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,zero)11288   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
11289     TEST_REQUIRES_X86_XOP;
11290     for (size_t k = 1; k <= 40; k += 9) {
11291       for (uint32_t mz = 0; mz < 3; mz++) {
11292         GemmMicrokernelTester()
11293           .mr(3)
11294           .nr(4)
11295           .kr(2)
11296           .sr(1)
11297           .m(3)
11298           .n(4)
11299           .k(k)
11300           .ks(3)
11301           .a_offset(127)
11302           .zero_index(mz)
11303           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11304       }
11305     }
11306   }
11307 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)11308   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
11309     TEST_REQUIRES_X86_XOP;
11310     GemmMicrokernelTester()
11311       .mr(3)
11312       .nr(4)
11313       .kr(2)
11314       .sr(1)
11315       .m(3)
11316       .n(4)
11317       .k(8)
11318       .qmin(128)
11319       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11320   }
11321 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)11322   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
11323     TEST_REQUIRES_X86_XOP;
11324     GemmMicrokernelTester()
11325       .mr(3)
11326       .nr(4)
11327       .kr(2)
11328       .sr(1)
11329       .m(3)
11330       .n(4)
11331       .k(8)
11332       .qmax(128)
11333       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11334   }
11335 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)11336   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
11337     TEST_REQUIRES_X86_XOP;
11338     GemmMicrokernelTester()
11339       .mr(3)
11340       .nr(4)
11341       .kr(2)
11342       .sr(1)
11343       .m(3)
11344       .n(4)
11345       .k(8)
11346       .cm_stride(7)
11347       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11348   }
11349 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_a_zero_point)11350   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_a_zero_point) {
11351     TEST_REQUIRES_X86_XOP;
11352     for (size_t k = 1; k <= 40; k += 9) {
11353       GemmMicrokernelTester()
11354         .mr(3)
11355         .nr(4)
11356         .kr(2)
11357         .sr(1)
11358         .m(3)
11359         .n(4)
11360         .k(k)
11361         .a_zero_point(0)
11362         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11363     }
11364   }
11365 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_b_zero_point)11366   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_b_zero_point) {
11367     TEST_REQUIRES_X86_XOP;
11368     for (size_t k = 1; k <= 40; k += 9) {
11369       GemmMicrokernelTester()
11370         .mr(3)
11371         .nr(4)
11372         .kr(2)
11373         .sr(1)
11374         .m(3)
11375         .n(4)
11376         .k(k)
11377         .b_zero_point(0)
11378         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11379     }
11380   }
11381 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,no_zero_point)11382   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_zero_point) {
11383     TEST_REQUIRES_X86_XOP;
11384     for (size_t k = 1; k <= 40; k += 9) {
11385       GemmMicrokernelTester()
11386         .mr(3)
11387         .nr(4)
11388         .kr(2)
11389         .sr(1)
11390         .m(3)
11391         .n(4)
11392         .k(k)
11393         .a_zero_point(0)
11394         .b_zero_point(0)
11395         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11396     }
11397   }
11398 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11399 
11400 
11401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)11402   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
11403     TEST_REQUIRES_X86_AVX;
11404     GemmMicrokernelTester()
11405       .mr(4)
11406       .nr(4)
11407       .kr(2)
11408       .sr(1)
11409       .m(4)
11410       .n(4)
11411       .k(8)
11412       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11413   }
11414 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)11415   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
11416     TEST_REQUIRES_X86_AVX;
11417     GemmMicrokernelTester()
11418       .mr(4)
11419       .nr(4)
11420       .kr(2)
11421       .sr(1)
11422       .m(4)
11423       .n(4)
11424       .k(8)
11425       .cn_stride(7)
11426       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11427   }
11428 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)11429   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
11430     TEST_REQUIRES_X86_AVX;
11431     for (uint32_t n = 1; n <= 4; n++) {
11432       for (uint32_t m = 1; m <= 4; m++) {
11433         GemmMicrokernelTester()
11434           .mr(4)
11435           .nr(4)
11436           .kr(2)
11437           .sr(1)
11438           .m(m)
11439           .n(n)
11440           .k(8)
11441           .iterations(1)
11442           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11443       }
11444     }
11445   }
11446 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)11447   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
11448     TEST_REQUIRES_X86_AVX;
11449     for (uint32_t m = 1; m <= 4; m++) {
11450       GemmMicrokernelTester()
11451         .mr(4)
11452         .nr(4)
11453         .kr(2)
11454         .sr(1)
11455         .m(m)
11456         .n(4)
11457         .k(8)
11458         .iterations(1)
11459         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11460     }
11461   }
11462 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)11463   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
11464     TEST_REQUIRES_X86_AVX;
11465     for (uint32_t n = 1; n <= 4; n++) {
11466       GemmMicrokernelTester()
11467         .mr(4)
11468         .nr(4)
11469         .kr(2)
11470         .sr(1)
11471         .m(4)
11472         .n(n)
11473         .k(8)
11474         .iterations(1)
11475         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11476     }
11477   }
11478 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)11479   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
11480     TEST_REQUIRES_X86_AVX;
11481     for (size_t k = 1; k < 8; k++) {
11482       GemmMicrokernelTester()
11483         .mr(4)
11484         .nr(4)
11485         .kr(2)
11486         .sr(1)
11487         .m(4)
11488         .n(4)
11489         .k(k)
11490         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11491     }
11492   }
11493 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)11494   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
11495     TEST_REQUIRES_X86_AVX;
11496     for (size_t k = 1; k < 8; k++) {
11497       for (uint32_t n = 1; n <= 4; n++) {
11498         for (uint32_t m = 1; m <= 4; m++) {
11499           GemmMicrokernelTester()
11500             .mr(4)
11501             .nr(4)
11502             .kr(2)
11503             .sr(1)
11504             .m(m)
11505             .n(n)
11506             .k(k)
11507             .iterations(1)
11508             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11509         }
11510       }
11511     }
11512   }
11513 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)11514   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
11515     TEST_REQUIRES_X86_AVX;
11516     for (size_t k = 9; k < 16; k++) {
11517       GemmMicrokernelTester()
11518         .mr(4)
11519         .nr(4)
11520         .kr(2)
11521         .sr(1)
11522         .m(4)
11523         .n(4)
11524         .k(k)
11525         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11526     }
11527   }
11528 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)11529   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
11530     TEST_REQUIRES_X86_AVX;
11531     for (size_t k = 9; k < 16; k++) {
11532       for (uint32_t n = 1; n <= 4; n++) {
11533         for (uint32_t m = 1; m <= 4; m++) {
11534           GemmMicrokernelTester()
11535             .mr(4)
11536             .nr(4)
11537             .kr(2)
11538             .sr(1)
11539             .m(m)
11540             .n(n)
11541             .k(k)
11542             .iterations(1)
11543             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11544         }
11545       }
11546     }
11547   }
11548 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)11549   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
11550     TEST_REQUIRES_X86_AVX;
11551     for (size_t k = 16; k <= 80; k += 8) {
11552       GemmMicrokernelTester()
11553         .mr(4)
11554         .nr(4)
11555         .kr(2)
11556         .sr(1)
11557         .m(4)
11558         .n(4)
11559         .k(k)
11560         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11561     }
11562   }
11563 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)11564   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
11565     TEST_REQUIRES_X86_AVX;
11566     for (size_t k = 16; k <= 80; k += 8) {
11567       for (uint32_t n = 1; n <= 4; n++) {
11568         for (uint32_t m = 1; m <= 4; m++) {
11569           GemmMicrokernelTester()
11570             .mr(4)
11571             .nr(4)
11572             .kr(2)
11573             .sr(1)
11574             .m(m)
11575             .n(n)
11576             .k(k)
11577             .iterations(1)
11578             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11579         }
11580       }
11581     }
11582   }
11583 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)11584   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
11585     TEST_REQUIRES_X86_AVX;
11586     for (uint32_t n = 5; n < 8; n++) {
11587       for (size_t k = 1; k <= 40; k += 9) {
11588         GemmMicrokernelTester()
11589           .mr(4)
11590           .nr(4)
11591           .kr(2)
11592           .sr(1)
11593           .m(4)
11594           .n(n)
11595           .k(k)
11596           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11597       }
11598     }
11599   }
11600 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)11601   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
11602     TEST_REQUIRES_X86_AVX;
11603     for (uint32_t n = 5; n < 8; n++) {
11604       for (size_t k = 1; k <= 40; k += 9) {
11605         GemmMicrokernelTester()
11606           .mr(4)
11607           .nr(4)
11608           .kr(2)
11609           .sr(1)
11610           .m(4)
11611           .n(n)
11612           .k(k)
11613           .cn_stride(7)
11614           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11615       }
11616     }
11617   }
11618 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)11619   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
11620     TEST_REQUIRES_X86_AVX;
11621     for (uint32_t n = 5; n < 8; n++) {
11622       for (size_t k = 1; k <= 40; k += 9) {
11623         for (uint32_t m = 1; m <= 4; m++) {
11624           GemmMicrokernelTester()
11625             .mr(4)
11626             .nr(4)
11627             .kr(2)
11628             .sr(1)
11629             .m(m)
11630             .n(n)
11631             .k(k)
11632             .iterations(1)
11633             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11634         }
11635       }
11636     }
11637   }
11638 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)11639   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
11640     TEST_REQUIRES_X86_AVX;
11641     for (uint32_t n = 8; n <= 12; n += 4) {
11642       for (size_t k = 1; k <= 40; k += 9) {
11643         GemmMicrokernelTester()
11644           .mr(4)
11645           .nr(4)
11646           .kr(2)
11647           .sr(1)
11648           .m(4)
11649           .n(n)
11650           .k(k)
11651           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11652       }
11653     }
11654   }
11655 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)11656   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
11657     TEST_REQUIRES_X86_AVX;
11658     for (uint32_t n = 8; n <= 12; n += 4) {
11659       for (size_t k = 1; k <= 40; k += 9) {
11660         GemmMicrokernelTester()
11661           .mr(4)
11662           .nr(4)
11663           .kr(2)
11664           .sr(1)
11665           .m(4)
11666           .n(n)
11667           .k(k)
11668           .cn_stride(7)
11669           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11670       }
11671     }
11672   }
11673 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)11674   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
11675     TEST_REQUIRES_X86_AVX;
11676     for (uint32_t n = 8; n <= 12; n += 4) {
11677       for (size_t k = 1; k <= 40; k += 9) {
11678         for (uint32_t m = 1; m <= 4; m++) {
11679           GemmMicrokernelTester()
11680             .mr(4)
11681             .nr(4)
11682             .kr(2)
11683             .sr(1)
11684             .m(m)
11685             .n(n)
11686             .k(k)
11687             .iterations(1)
11688             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11689         }
11690       }
11691     }
11692   }
11693 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel)11694   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
11695     TEST_REQUIRES_X86_AVX;
11696     for (size_t k = 1; k <= 40; k += 9) {
11697       GemmMicrokernelTester()
11698         .mr(4)
11699         .nr(4)
11700         .kr(2)
11701         .sr(1)
11702         .m(4)
11703         .n(4)
11704         .k(k)
11705         .ks(3)
11706         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11707     }
11708   }
11709 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel_subtile)11710   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
11711     TEST_REQUIRES_X86_AVX;
11712     for (size_t k = 1; k <= 40; k += 9) {
11713       for (uint32_t n = 1; n <= 4; n++) {
11714         for (uint32_t m = 1; m <= 4; m++) {
11715           GemmMicrokernelTester()
11716             .mr(4)
11717             .nr(4)
11718             .kr(2)
11719             .sr(1)
11720             .m(m)
11721             .n(n)
11722             .k(k)
11723             .ks(3)
11724             .iterations(1)
11725             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11726         }
11727       }
11728     }
11729   }
11730 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_small_kernel)11731   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
11732     TEST_REQUIRES_X86_AVX;
11733     for (uint32_t n = 5; n < 8; n++) {
11734       for (size_t k = 1; k <= 40; k += 9) {
11735         GemmMicrokernelTester()
11736           .mr(4)
11737           .nr(4)
11738           .kr(2)
11739           .sr(1)
11740           .m(4)
11741           .n(n)
11742           .k(k)
11743           .ks(3)
11744           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11745       }
11746     }
11747   }
11748 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_small_kernel)11749   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
11750     TEST_REQUIRES_X86_AVX;
11751     for (uint32_t n = 8; n <= 12; n += 4) {
11752       for (size_t k = 1; k <= 40; k += 9) {
11753         GemmMicrokernelTester()
11754           .mr(4)
11755           .nr(4)
11756           .kr(2)
11757           .sr(1)
11758           .m(4)
11759           .n(n)
11760           .k(k)
11761           .ks(3)
11762           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11763       }
11764     }
11765   }
11766 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)11767   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
11768     TEST_REQUIRES_X86_AVX;
11769     for (size_t k = 1; k <= 40; k += 9) {
11770       for (uint32_t n = 1; n <= 4; n++) {
11771         for (uint32_t m = 1; m <= 4; m++) {
11772           GemmMicrokernelTester()
11773             .mr(4)
11774             .nr(4)
11775             .kr(2)
11776             .sr(1)
11777             .m(m)
11778             .n(n)
11779             .k(k)
11780             .cm_stride(7)
11781             .iterations(1)
11782             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11783         }
11784       }
11785     }
11786   }
11787 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,a_offset)11788   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
11789     TEST_REQUIRES_X86_AVX;
11790     for (size_t k = 1; k <= 40; k += 9) {
11791       GemmMicrokernelTester()
11792         .mr(4)
11793         .nr(4)
11794         .kr(2)
11795         .sr(1)
11796         .m(4)
11797         .n(4)
11798         .k(k)
11799         .ks(3)
11800         .a_offset(163)
11801         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11802     }
11803   }
11804 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,zero)11805   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
11806     TEST_REQUIRES_X86_AVX;
11807     for (size_t k = 1; k <= 40; k += 9) {
11808       for (uint32_t mz = 0; mz < 4; mz++) {
11809         GemmMicrokernelTester()
11810           .mr(4)
11811           .nr(4)
11812           .kr(2)
11813           .sr(1)
11814           .m(4)
11815           .n(4)
11816           .k(k)
11817           .ks(3)
11818           .a_offset(163)
11819           .zero_index(mz)
11820           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11821       }
11822     }
11823   }
11824 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)11825   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
11826     TEST_REQUIRES_X86_AVX;
11827     GemmMicrokernelTester()
11828       .mr(4)
11829       .nr(4)
11830       .kr(2)
11831       .sr(1)
11832       .m(4)
11833       .n(4)
11834       .k(8)
11835       .qmin(128)
11836       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11837   }
11838 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)11839   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
11840     TEST_REQUIRES_X86_AVX;
11841     GemmMicrokernelTester()
11842       .mr(4)
11843       .nr(4)
11844       .kr(2)
11845       .sr(1)
11846       .m(4)
11847       .n(4)
11848       .k(8)
11849       .qmax(128)
11850       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11851   }
11852 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)11853   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
11854     TEST_REQUIRES_X86_AVX;
11855     GemmMicrokernelTester()
11856       .mr(4)
11857       .nr(4)
11858       .kr(2)
11859       .sr(1)
11860       .m(4)
11861       .n(4)
11862       .k(8)
11863       .cm_stride(7)
11864       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11865   }
11866 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_a_zero_point)11867   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_a_zero_point) {
11868     TEST_REQUIRES_X86_AVX;
11869     for (size_t k = 1; k <= 40; k += 9) {
11870       GemmMicrokernelTester()
11871         .mr(4)
11872         .nr(4)
11873         .kr(2)
11874         .sr(1)
11875         .m(4)
11876         .n(4)
11877         .k(k)
11878         .a_zero_point(0)
11879         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11880     }
11881   }
11882 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_b_zero_point)11883   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_b_zero_point) {
11884     TEST_REQUIRES_X86_AVX;
11885     for (size_t k = 1; k <= 40; k += 9) {
11886       GemmMicrokernelTester()
11887         .mr(4)
11888         .nr(4)
11889         .kr(2)
11890         .sr(1)
11891         .m(4)
11892         .n(4)
11893         .k(k)
11894         .b_zero_point(0)
11895         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11896     }
11897   }
11898 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,no_zero_point)11899   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_zero_point) {
11900     TEST_REQUIRES_X86_AVX;
11901     for (size_t k = 1; k <= 40; k += 9) {
11902       GemmMicrokernelTester()
11903         .mr(4)
11904         .nr(4)
11905         .kr(2)
11906         .sr(1)
11907         .m(4)
11908         .n(4)
11909         .k(k)
11910         .a_zero_point(0)
11911         .b_zero_point(0)
11912         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11913     }
11914   }
11915 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
11916 
11917 
11918 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)11919   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
11920     TEST_REQUIRES_X86_SSE2;
11921     GemmMicrokernelTester()
11922       .mr(1)
11923       .nr(4)
11924       .kr(2)
11925       .sr(4)
11926       .m(1)
11927       .n(4)
11928       .k(8)
11929       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11930   }
11931 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)11932   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
11933     TEST_REQUIRES_X86_SSE2;
11934     GemmMicrokernelTester()
11935       .mr(1)
11936       .nr(4)
11937       .kr(2)
11938       .sr(4)
11939       .m(1)
11940       .n(4)
11941       .k(8)
11942       .cn_stride(7)
11943       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11944   }
11945 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)11946   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
11947     TEST_REQUIRES_X86_SSE2;
11948     for (uint32_t n = 1; n <= 4; n++) {
11949       for (uint32_t m = 1; m <= 1; m++) {
11950         GemmMicrokernelTester()
11951           .mr(1)
11952           .nr(4)
11953           .kr(2)
11954           .sr(4)
11955           .m(m)
11956           .n(n)
11957           .k(8)
11958           .iterations(1)
11959           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11960       }
11961     }
11962   }
11963 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)11964   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
11965     TEST_REQUIRES_X86_SSE2;
11966     for (uint32_t m = 1; m <= 1; m++) {
11967       GemmMicrokernelTester()
11968         .mr(1)
11969         .nr(4)
11970         .kr(2)
11971         .sr(4)
11972         .m(m)
11973         .n(4)
11974         .k(8)
11975         .iterations(1)
11976         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11977     }
11978   }
11979 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)11980   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
11981     TEST_REQUIRES_X86_SSE2;
11982     for (uint32_t n = 1; n <= 4; n++) {
11983       GemmMicrokernelTester()
11984         .mr(1)
11985         .nr(4)
11986         .kr(2)
11987         .sr(4)
11988         .m(1)
11989         .n(n)
11990         .k(8)
11991         .iterations(1)
11992         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
11993     }
11994   }
11995 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)11996   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
11997     TEST_REQUIRES_X86_SSE2;
11998     for (size_t k = 1; k < 8; k++) {
11999       GemmMicrokernelTester()
12000         .mr(1)
12001         .nr(4)
12002         .kr(2)
12003         .sr(4)
12004         .m(1)
12005         .n(4)
12006         .k(k)
12007         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12008     }
12009   }
12010 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)12011   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
12012     TEST_REQUIRES_X86_SSE2;
12013     for (size_t k = 1; k < 8; k++) {
12014       for (uint32_t n = 1; n <= 4; n++) {
12015         for (uint32_t m = 1; m <= 1; m++) {
12016           GemmMicrokernelTester()
12017             .mr(1)
12018             .nr(4)
12019             .kr(2)
12020             .sr(4)
12021             .m(m)
12022             .n(n)
12023             .k(k)
12024             .iterations(1)
12025             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12026         }
12027       }
12028     }
12029   }
12030 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)12031   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
12032     TEST_REQUIRES_X86_SSE2;
12033     for (size_t k = 9; k < 16; k++) {
12034       GemmMicrokernelTester()
12035         .mr(1)
12036         .nr(4)
12037         .kr(2)
12038         .sr(4)
12039         .m(1)
12040         .n(4)
12041         .k(k)
12042         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12043     }
12044   }
12045 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)12046   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
12047     TEST_REQUIRES_X86_SSE2;
12048     for (size_t k = 9; k < 16; k++) {
12049       for (uint32_t n = 1; n <= 4; n++) {
12050         for (uint32_t m = 1; m <= 1; m++) {
12051           GemmMicrokernelTester()
12052             .mr(1)
12053             .nr(4)
12054             .kr(2)
12055             .sr(4)
12056             .m(m)
12057             .n(n)
12058             .k(k)
12059             .iterations(1)
12060             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12061         }
12062       }
12063     }
12064   }
12065 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)12066   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
12067     TEST_REQUIRES_X86_SSE2;
12068     for (size_t k = 16; k <= 80; k += 8) {
12069       GemmMicrokernelTester()
12070         .mr(1)
12071         .nr(4)
12072         .kr(2)
12073         .sr(4)
12074         .m(1)
12075         .n(4)
12076         .k(k)
12077         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12078     }
12079   }
12080 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)12081   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
12082     TEST_REQUIRES_X86_SSE2;
12083     for (size_t k = 16; k <= 80; k += 8) {
12084       for (uint32_t n = 1; n <= 4; n++) {
12085         for (uint32_t m = 1; m <= 1; m++) {
12086           GemmMicrokernelTester()
12087             .mr(1)
12088             .nr(4)
12089             .kr(2)
12090             .sr(4)
12091             .m(m)
12092             .n(n)
12093             .k(k)
12094             .iterations(1)
12095             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12096         }
12097       }
12098     }
12099   }
12100 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)12101   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
12102     TEST_REQUIRES_X86_SSE2;
12103     for (uint32_t n = 5; n < 8; n++) {
12104       for (size_t k = 1; k <= 40; k += 9) {
12105         GemmMicrokernelTester()
12106           .mr(1)
12107           .nr(4)
12108           .kr(2)
12109           .sr(4)
12110           .m(1)
12111           .n(n)
12112           .k(k)
12113           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12114       }
12115     }
12116   }
12117 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)12118   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
12119     TEST_REQUIRES_X86_SSE2;
12120     for (uint32_t n = 5; n < 8; n++) {
12121       for (size_t k = 1; k <= 40; k += 9) {
12122         GemmMicrokernelTester()
12123           .mr(1)
12124           .nr(4)
12125           .kr(2)
12126           .sr(4)
12127           .m(1)
12128           .n(n)
12129           .k(k)
12130           .cn_stride(7)
12131           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12132       }
12133     }
12134   }
12135 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)12136   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
12137     TEST_REQUIRES_X86_SSE2;
12138     for (uint32_t n = 5; n < 8; n++) {
12139       for (size_t k = 1; k <= 40; k += 9) {
12140         for (uint32_t m = 1; m <= 1; m++) {
12141           GemmMicrokernelTester()
12142             .mr(1)
12143             .nr(4)
12144             .kr(2)
12145             .sr(4)
12146             .m(m)
12147             .n(n)
12148             .k(k)
12149             .iterations(1)
12150             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12151         }
12152       }
12153     }
12154   }
12155 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)12156   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
12157     TEST_REQUIRES_X86_SSE2;
12158     for (uint32_t n = 8; n <= 12; n += 4) {
12159       for (size_t k = 1; k <= 40; k += 9) {
12160         GemmMicrokernelTester()
12161           .mr(1)
12162           .nr(4)
12163           .kr(2)
12164           .sr(4)
12165           .m(1)
12166           .n(n)
12167           .k(k)
12168           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12169       }
12170     }
12171   }
12172 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)12173   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
12174     TEST_REQUIRES_X86_SSE2;
12175     for (uint32_t n = 8; n <= 12; n += 4) {
12176       for (size_t k = 1; k <= 40; k += 9) {
12177         GemmMicrokernelTester()
12178           .mr(1)
12179           .nr(4)
12180           .kr(2)
12181           .sr(4)
12182           .m(1)
12183           .n(n)
12184           .k(k)
12185           .cn_stride(7)
12186           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12187       }
12188     }
12189   }
12190 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)12191   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
12192     TEST_REQUIRES_X86_SSE2;
12193     for (uint32_t n = 8; n <= 12; n += 4) {
12194       for (size_t k = 1; k <= 40; k += 9) {
12195         for (uint32_t m = 1; m <= 1; m++) {
12196           GemmMicrokernelTester()
12197             .mr(1)
12198             .nr(4)
12199             .kr(2)
12200             .sr(4)
12201             .m(m)
12202             .n(n)
12203             .k(k)
12204             .iterations(1)
12205             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12206         }
12207       }
12208     }
12209   }
12210 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel)12211   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel) {
12212     TEST_REQUIRES_X86_SSE2;
12213     for (size_t k = 1; k <= 40; k += 9) {
12214       GemmMicrokernelTester()
12215         .mr(1)
12216         .nr(4)
12217         .kr(2)
12218         .sr(4)
12219         .m(1)
12220         .n(4)
12221         .k(k)
12222         .ks(3)
12223         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12224     }
12225   }
12226 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel_subtile)12227   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel_subtile) {
12228     TEST_REQUIRES_X86_SSE2;
12229     for (size_t k = 1; k <= 40; k += 9) {
12230       for (uint32_t n = 1; n <= 4; n++) {
12231         for (uint32_t m = 1; m <= 1; m++) {
12232           GemmMicrokernelTester()
12233             .mr(1)
12234             .nr(4)
12235             .kr(2)
12236             .sr(4)
12237             .m(m)
12238             .n(n)
12239             .k(k)
12240             .ks(3)
12241             .iterations(1)
12242             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12243         }
12244       }
12245     }
12246   }
12247 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_small_kernel)12248   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
12249     TEST_REQUIRES_X86_SSE2;
12250     for (uint32_t n = 5; n < 8; n++) {
12251       for (size_t k = 1; k <= 40; k += 9) {
12252         GemmMicrokernelTester()
12253           .mr(1)
12254           .nr(4)
12255           .kr(2)
12256           .sr(4)
12257           .m(1)
12258           .n(n)
12259           .k(k)
12260           .ks(3)
12261           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12262       }
12263     }
12264   }
12265 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_small_kernel)12266   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
12267     TEST_REQUIRES_X86_SSE2;
12268     for (uint32_t n = 8; n <= 12; n += 4) {
12269       for (size_t k = 1; k <= 40; k += 9) {
12270         GemmMicrokernelTester()
12271           .mr(1)
12272           .nr(4)
12273           .kr(2)
12274           .sr(4)
12275           .m(1)
12276           .n(n)
12277           .k(k)
12278           .ks(3)
12279           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12280       }
12281     }
12282   }
12283 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)12284   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
12285     TEST_REQUIRES_X86_SSE2;
12286     for (size_t k = 1; k <= 40; k += 9) {
12287       for (uint32_t n = 1; n <= 4; n++) {
12288         for (uint32_t m = 1; m <= 1; m++) {
12289           GemmMicrokernelTester()
12290             .mr(1)
12291             .nr(4)
12292             .kr(2)
12293             .sr(4)
12294             .m(m)
12295             .n(n)
12296             .k(k)
12297             .cm_stride(7)
12298             .iterations(1)
12299             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12300         }
12301       }
12302     }
12303   }
12304 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,a_offset)12305   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, a_offset) {
12306     TEST_REQUIRES_X86_SSE2;
12307     for (size_t k = 1; k <= 40; k += 9) {
12308       GemmMicrokernelTester()
12309         .mr(1)
12310         .nr(4)
12311         .kr(2)
12312         .sr(4)
12313         .m(1)
12314         .n(4)
12315         .k(k)
12316         .ks(3)
12317         .a_offset(43)
12318         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12319     }
12320   }
12321 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,zero)12322   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, zero) {
12323     TEST_REQUIRES_X86_SSE2;
12324     for (size_t k = 1; k <= 40; k += 9) {
12325       for (uint32_t mz = 0; mz < 1; mz++) {
12326         GemmMicrokernelTester()
12327           .mr(1)
12328           .nr(4)
12329           .kr(2)
12330           .sr(4)
12331           .m(1)
12332           .n(4)
12333           .k(k)
12334           .ks(3)
12335           .a_offset(43)
12336           .zero_index(mz)
12337           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12338       }
12339     }
12340   }
12341 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)12342   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
12343     TEST_REQUIRES_X86_SSE2;
12344     GemmMicrokernelTester()
12345       .mr(1)
12346       .nr(4)
12347       .kr(2)
12348       .sr(4)
12349       .m(1)
12350       .n(4)
12351       .k(8)
12352       .qmin(128)
12353       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12354   }
12355 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)12356   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
12357     TEST_REQUIRES_X86_SSE2;
12358     GemmMicrokernelTester()
12359       .mr(1)
12360       .nr(4)
12361       .kr(2)
12362       .sr(4)
12363       .m(1)
12364       .n(4)
12365       .k(8)
12366       .qmax(128)
12367       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12368   }
12369 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)12370   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
12371     TEST_REQUIRES_X86_SSE2;
12372     GemmMicrokernelTester()
12373       .mr(1)
12374       .nr(4)
12375       .kr(2)
12376       .sr(4)
12377       .m(1)
12378       .n(4)
12379       .k(8)
12380       .cm_stride(7)
12381       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12382   }
12383 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_a_zero_point)12384   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_a_zero_point) {
12385     TEST_REQUIRES_X86_SSE2;
12386     for (size_t k = 1; k <= 40; k += 9) {
12387       GemmMicrokernelTester()
12388         .mr(1)
12389         .nr(4)
12390         .kr(2)
12391         .sr(4)
12392         .m(1)
12393         .n(4)
12394         .k(k)
12395         .a_zero_point(0)
12396         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12397     }
12398   }
12399 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_b_zero_point)12400   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_b_zero_point) {
12401     TEST_REQUIRES_X86_SSE2;
12402     for (size_t k = 1; k <= 40; k += 9) {
12403       GemmMicrokernelTester()
12404         .mr(1)
12405         .nr(4)
12406         .kr(2)
12407         .sr(4)
12408         .m(1)
12409         .n(4)
12410         .k(k)
12411         .b_zero_point(0)
12412         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12413     }
12414   }
12415 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,no_zero_point)12416   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, no_zero_point) {
12417     TEST_REQUIRES_X86_SSE2;
12418     for (size_t k = 1; k <= 40; k += 9) {
12419       GemmMicrokernelTester()
12420         .mr(1)
12421         .nr(4)
12422         .kr(2)
12423         .sr(4)
12424         .m(1)
12425         .n(4)
12426         .k(k)
12427         .a_zero_point(0)
12428         .b_zero_point(0)
12429         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12430     }
12431   }
12432 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12433 
12434 
12435 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)12436   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
12437     TEST_REQUIRES_X86_SSE41;
12438     GemmMicrokernelTester()
12439       .mr(1)
12440       .nr(4)
12441       .kr(2)
12442       .sr(4)
12443       .m(1)
12444       .n(4)
12445       .k(8)
12446       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12447   }
12448 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)12449   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
12450     TEST_REQUIRES_X86_SSE41;
12451     GemmMicrokernelTester()
12452       .mr(1)
12453       .nr(4)
12454       .kr(2)
12455       .sr(4)
12456       .m(1)
12457       .n(4)
12458       .k(8)
12459       .cn_stride(7)
12460       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12461   }
12462 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)12463   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
12464     TEST_REQUIRES_X86_SSE41;
12465     for (uint32_t n = 1; n <= 4; n++) {
12466       for (uint32_t m = 1; m <= 1; m++) {
12467         GemmMicrokernelTester()
12468           .mr(1)
12469           .nr(4)
12470           .kr(2)
12471           .sr(4)
12472           .m(m)
12473           .n(n)
12474           .k(8)
12475           .iterations(1)
12476           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12477       }
12478     }
12479   }
12480 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)12481   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
12482     TEST_REQUIRES_X86_SSE41;
12483     for (uint32_t m = 1; m <= 1; m++) {
12484       GemmMicrokernelTester()
12485         .mr(1)
12486         .nr(4)
12487         .kr(2)
12488         .sr(4)
12489         .m(m)
12490         .n(4)
12491         .k(8)
12492         .iterations(1)
12493         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12494     }
12495   }
12496 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)12497   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
12498     TEST_REQUIRES_X86_SSE41;
12499     for (uint32_t n = 1; n <= 4; n++) {
12500       GemmMicrokernelTester()
12501         .mr(1)
12502         .nr(4)
12503         .kr(2)
12504         .sr(4)
12505         .m(1)
12506         .n(n)
12507         .k(8)
12508         .iterations(1)
12509         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12510     }
12511   }
12512 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)12513   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
12514     TEST_REQUIRES_X86_SSE41;
12515     for (size_t k = 1; k < 8; k++) {
12516       GemmMicrokernelTester()
12517         .mr(1)
12518         .nr(4)
12519         .kr(2)
12520         .sr(4)
12521         .m(1)
12522         .n(4)
12523         .k(k)
12524         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12525     }
12526   }
12527 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)12528   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
12529     TEST_REQUIRES_X86_SSE41;
12530     for (size_t k = 1; k < 8; k++) {
12531       for (uint32_t n = 1; n <= 4; n++) {
12532         for (uint32_t m = 1; m <= 1; m++) {
12533           GemmMicrokernelTester()
12534             .mr(1)
12535             .nr(4)
12536             .kr(2)
12537             .sr(4)
12538             .m(m)
12539             .n(n)
12540             .k(k)
12541             .iterations(1)
12542             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12543         }
12544       }
12545     }
12546   }
12547 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)12548   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
12549     TEST_REQUIRES_X86_SSE41;
12550     for (size_t k = 9; k < 16; k++) {
12551       GemmMicrokernelTester()
12552         .mr(1)
12553         .nr(4)
12554         .kr(2)
12555         .sr(4)
12556         .m(1)
12557         .n(4)
12558         .k(k)
12559         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12560     }
12561   }
12562 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)12563   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
12564     TEST_REQUIRES_X86_SSE41;
12565     for (size_t k = 9; k < 16; k++) {
12566       for (uint32_t n = 1; n <= 4; n++) {
12567         for (uint32_t m = 1; m <= 1; m++) {
12568           GemmMicrokernelTester()
12569             .mr(1)
12570             .nr(4)
12571             .kr(2)
12572             .sr(4)
12573             .m(m)
12574             .n(n)
12575             .k(k)
12576             .iterations(1)
12577             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12578         }
12579       }
12580     }
12581   }
12582 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)12583   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
12584     TEST_REQUIRES_X86_SSE41;
12585     for (size_t k = 16; k <= 80; k += 8) {
12586       GemmMicrokernelTester()
12587         .mr(1)
12588         .nr(4)
12589         .kr(2)
12590         .sr(4)
12591         .m(1)
12592         .n(4)
12593         .k(k)
12594         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12595     }
12596   }
12597 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)12598   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
12599     TEST_REQUIRES_X86_SSE41;
12600     for (size_t k = 16; k <= 80; k += 8) {
12601       for (uint32_t n = 1; n <= 4; n++) {
12602         for (uint32_t m = 1; m <= 1; m++) {
12603           GemmMicrokernelTester()
12604             .mr(1)
12605             .nr(4)
12606             .kr(2)
12607             .sr(4)
12608             .m(m)
12609             .n(n)
12610             .k(k)
12611             .iterations(1)
12612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12613         }
12614       }
12615     }
12616   }
12617 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)12618   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
12619     TEST_REQUIRES_X86_SSE41;
12620     for (uint32_t n = 5; n < 8; n++) {
12621       for (size_t k = 1; k <= 40; k += 9) {
12622         GemmMicrokernelTester()
12623           .mr(1)
12624           .nr(4)
12625           .kr(2)
12626           .sr(4)
12627           .m(1)
12628           .n(n)
12629           .k(k)
12630           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12631       }
12632     }
12633   }
12634 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)12635   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
12636     TEST_REQUIRES_X86_SSE41;
12637     for (uint32_t n = 5; n < 8; n++) {
12638       for (size_t k = 1; k <= 40; k += 9) {
12639         GemmMicrokernelTester()
12640           .mr(1)
12641           .nr(4)
12642           .kr(2)
12643           .sr(4)
12644           .m(1)
12645           .n(n)
12646           .k(k)
12647           .cn_stride(7)
12648           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12649       }
12650     }
12651   }
12652 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)12653   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
12654     TEST_REQUIRES_X86_SSE41;
12655     for (uint32_t n = 5; n < 8; n++) {
12656       for (size_t k = 1; k <= 40; k += 9) {
12657         for (uint32_t m = 1; m <= 1; m++) {
12658           GemmMicrokernelTester()
12659             .mr(1)
12660             .nr(4)
12661             .kr(2)
12662             .sr(4)
12663             .m(m)
12664             .n(n)
12665             .k(k)
12666             .iterations(1)
12667             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12668         }
12669       }
12670     }
12671   }
12672 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)12673   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
12674     TEST_REQUIRES_X86_SSE41;
12675     for (uint32_t n = 8; n <= 12; n += 4) {
12676       for (size_t k = 1; k <= 40; k += 9) {
12677         GemmMicrokernelTester()
12678           .mr(1)
12679           .nr(4)
12680           .kr(2)
12681           .sr(4)
12682           .m(1)
12683           .n(n)
12684           .k(k)
12685           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12686       }
12687     }
12688   }
12689 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)12690   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
12691     TEST_REQUIRES_X86_SSE41;
12692     for (uint32_t n = 8; n <= 12; n += 4) {
12693       for (size_t k = 1; k <= 40; k += 9) {
12694         GemmMicrokernelTester()
12695           .mr(1)
12696           .nr(4)
12697           .kr(2)
12698           .sr(4)
12699           .m(1)
12700           .n(n)
12701           .k(k)
12702           .cn_stride(7)
12703           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12704       }
12705     }
12706   }
12707 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)12708   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
12709     TEST_REQUIRES_X86_SSE41;
12710     for (uint32_t n = 8; n <= 12; n += 4) {
12711       for (size_t k = 1; k <= 40; k += 9) {
12712         for (uint32_t m = 1; m <= 1; m++) {
12713           GemmMicrokernelTester()
12714             .mr(1)
12715             .nr(4)
12716             .kr(2)
12717             .sr(4)
12718             .m(m)
12719             .n(n)
12720             .k(k)
12721             .iterations(1)
12722             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12723         }
12724       }
12725     }
12726   }
12727 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel)12728   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel) {
12729     TEST_REQUIRES_X86_SSE41;
12730     for (size_t k = 1; k <= 40; k += 9) {
12731       GemmMicrokernelTester()
12732         .mr(1)
12733         .nr(4)
12734         .kr(2)
12735         .sr(4)
12736         .m(1)
12737         .n(4)
12738         .k(k)
12739         .ks(3)
12740         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12741     }
12742   }
12743 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel_subtile)12744   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel_subtile) {
12745     TEST_REQUIRES_X86_SSE41;
12746     for (size_t k = 1; k <= 40; k += 9) {
12747       for (uint32_t n = 1; n <= 4; n++) {
12748         for (uint32_t m = 1; m <= 1; m++) {
12749           GemmMicrokernelTester()
12750             .mr(1)
12751             .nr(4)
12752             .kr(2)
12753             .sr(4)
12754             .m(m)
12755             .n(n)
12756             .k(k)
12757             .ks(3)
12758             .iterations(1)
12759             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12760         }
12761       }
12762     }
12763   }
12764 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_small_kernel)12765   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
12766     TEST_REQUIRES_X86_SSE41;
12767     for (uint32_t n = 5; n < 8; n++) {
12768       for (size_t k = 1; k <= 40; k += 9) {
12769         GemmMicrokernelTester()
12770           .mr(1)
12771           .nr(4)
12772           .kr(2)
12773           .sr(4)
12774           .m(1)
12775           .n(n)
12776           .k(k)
12777           .ks(3)
12778           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12779       }
12780     }
12781   }
12782 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_small_kernel)12783   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
12784     TEST_REQUIRES_X86_SSE41;
12785     for (uint32_t n = 8; n <= 12; n += 4) {
12786       for (size_t k = 1; k <= 40; k += 9) {
12787         GemmMicrokernelTester()
12788           .mr(1)
12789           .nr(4)
12790           .kr(2)
12791           .sr(4)
12792           .m(1)
12793           .n(n)
12794           .k(k)
12795           .ks(3)
12796           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12797       }
12798     }
12799   }
12800 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)12801   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
12802     TEST_REQUIRES_X86_SSE41;
12803     for (size_t k = 1; k <= 40; k += 9) {
12804       for (uint32_t n = 1; n <= 4; n++) {
12805         for (uint32_t m = 1; m <= 1; m++) {
12806           GemmMicrokernelTester()
12807             .mr(1)
12808             .nr(4)
12809             .kr(2)
12810             .sr(4)
12811             .m(m)
12812             .n(n)
12813             .k(k)
12814             .cm_stride(7)
12815             .iterations(1)
12816             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12817         }
12818       }
12819     }
12820   }
12821 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,a_offset)12822   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, a_offset) {
12823     TEST_REQUIRES_X86_SSE41;
12824     for (size_t k = 1; k <= 40; k += 9) {
12825       GemmMicrokernelTester()
12826         .mr(1)
12827         .nr(4)
12828         .kr(2)
12829         .sr(4)
12830         .m(1)
12831         .n(4)
12832         .k(k)
12833         .ks(3)
12834         .a_offset(43)
12835         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12836     }
12837   }
12838 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,zero)12839   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, zero) {
12840     TEST_REQUIRES_X86_SSE41;
12841     for (size_t k = 1; k <= 40; k += 9) {
12842       for (uint32_t mz = 0; mz < 1; mz++) {
12843         GemmMicrokernelTester()
12844           .mr(1)
12845           .nr(4)
12846           .kr(2)
12847           .sr(4)
12848           .m(1)
12849           .n(4)
12850           .k(k)
12851           .ks(3)
12852           .a_offset(43)
12853           .zero_index(mz)
12854           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12855       }
12856     }
12857   }
12858 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)12859   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
12860     TEST_REQUIRES_X86_SSE41;
12861     GemmMicrokernelTester()
12862       .mr(1)
12863       .nr(4)
12864       .kr(2)
12865       .sr(4)
12866       .m(1)
12867       .n(4)
12868       .k(8)
12869       .qmin(128)
12870       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12871   }
12872 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)12873   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
12874     TEST_REQUIRES_X86_SSE41;
12875     GemmMicrokernelTester()
12876       .mr(1)
12877       .nr(4)
12878       .kr(2)
12879       .sr(4)
12880       .m(1)
12881       .n(4)
12882       .k(8)
12883       .qmax(128)
12884       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12885   }
12886 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)12887   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
12888     TEST_REQUIRES_X86_SSE41;
12889     GemmMicrokernelTester()
12890       .mr(1)
12891       .nr(4)
12892       .kr(2)
12893       .sr(4)
12894       .m(1)
12895       .n(4)
12896       .k(8)
12897       .cm_stride(7)
12898       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12899   }
12900 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_a_zero_point)12901   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_a_zero_point) {
12902     TEST_REQUIRES_X86_SSE41;
12903     for (size_t k = 1; k <= 40; k += 9) {
12904       GemmMicrokernelTester()
12905         .mr(1)
12906         .nr(4)
12907         .kr(2)
12908         .sr(4)
12909         .m(1)
12910         .n(4)
12911         .k(k)
12912         .a_zero_point(0)
12913         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12914     }
12915   }
12916 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_b_zero_point)12917   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_b_zero_point) {
12918     TEST_REQUIRES_X86_SSE41;
12919     for (size_t k = 1; k <= 40; k += 9) {
12920       GemmMicrokernelTester()
12921         .mr(1)
12922         .nr(4)
12923         .kr(2)
12924         .sr(4)
12925         .m(1)
12926         .n(4)
12927         .k(k)
12928         .b_zero_point(0)
12929         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12930     }
12931   }
12932 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,no_zero_point)12933   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, no_zero_point) {
12934     TEST_REQUIRES_X86_SSE41;
12935     for (size_t k = 1; k <= 40; k += 9) {
12936       GemmMicrokernelTester()
12937         .mr(1)
12938         .nr(4)
12939         .kr(2)
12940         .sr(4)
12941         .m(1)
12942         .n(4)
12943         .k(k)
12944         .a_zero_point(0)
12945         .b_zero_point(0)
12946         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12947     }
12948   }
12949 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
12950 
12951 
12952 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)12953   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
12954     TEST_REQUIRES_X86_SSE2;
12955     GemmMicrokernelTester()
12956       .mr(2)
12957       .nr(4)
12958       .kr(2)
12959       .sr(4)
12960       .m(2)
12961       .n(4)
12962       .k(8)
12963       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12964   }
12965 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)12966   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
12967     TEST_REQUIRES_X86_SSE2;
12968     GemmMicrokernelTester()
12969       .mr(2)
12970       .nr(4)
12971       .kr(2)
12972       .sr(4)
12973       .m(2)
12974       .n(4)
12975       .k(8)
12976       .cn_stride(7)
12977       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12978   }
12979 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)12980   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
12981     TEST_REQUIRES_X86_SSE2;
12982     for (uint32_t n = 1; n <= 4; n++) {
12983       for (uint32_t m = 1; m <= 2; m++) {
12984         GemmMicrokernelTester()
12985           .mr(2)
12986           .nr(4)
12987           .kr(2)
12988           .sr(4)
12989           .m(m)
12990           .n(n)
12991           .k(8)
12992           .iterations(1)
12993           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
12994       }
12995     }
12996   }
12997 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)12998   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
12999     TEST_REQUIRES_X86_SSE2;
13000     for (uint32_t m = 1; m <= 2; m++) {
13001       GemmMicrokernelTester()
13002         .mr(2)
13003         .nr(4)
13004         .kr(2)
13005         .sr(4)
13006         .m(m)
13007         .n(4)
13008         .k(8)
13009         .iterations(1)
13010         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13011     }
13012   }
13013 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)13014   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
13015     TEST_REQUIRES_X86_SSE2;
13016     for (uint32_t n = 1; n <= 4; n++) {
13017       GemmMicrokernelTester()
13018         .mr(2)
13019         .nr(4)
13020         .kr(2)
13021         .sr(4)
13022         .m(2)
13023         .n(n)
13024         .k(8)
13025         .iterations(1)
13026         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13027     }
13028   }
13029 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)13030   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
13031     TEST_REQUIRES_X86_SSE2;
13032     for (size_t k = 1; k < 8; k++) {
13033       GemmMicrokernelTester()
13034         .mr(2)
13035         .nr(4)
13036         .kr(2)
13037         .sr(4)
13038         .m(2)
13039         .n(4)
13040         .k(k)
13041         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13042     }
13043   }
13044 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)13045   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
13046     TEST_REQUIRES_X86_SSE2;
13047     for (size_t k = 1; k < 8; k++) {
13048       for (uint32_t n = 1; n <= 4; n++) {
13049         for (uint32_t m = 1; m <= 2; m++) {
13050           GemmMicrokernelTester()
13051             .mr(2)
13052             .nr(4)
13053             .kr(2)
13054             .sr(4)
13055             .m(m)
13056             .n(n)
13057             .k(k)
13058             .iterations(1)
13059             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13060         }
13061       }
13062     }
13063   }
13064 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)13065   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
13066     TEST_REQUIRES_X86_SSE2;
13067     for (size_t k = 9; k < 16; k++) {
13068       GemmMicrokernelTester()
13069         .mr(2)
13070         .nr(4)
13071         .kr(2)
13072         .sr(4)
13073         .m(2)
13074         .n(4)
13075         .k(k)
13076         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13077     }
13078   }
13079 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)13080   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
13081     TEST_REQUIRES_X86_SSE2;
13082     for (size_t k = 9; k < 16; k++) {
13083       for (uint32_t n = 1; n <= 4; n++) {
13084         for (uint32_t m = 1; m <= 2; m++) {
13085           GemmMicrokernelTester()
13086             .mr(2)
13087             .nr(4)
13088             .kr(2)
13089             .sr(4)
13090             .m(m)
13091             .n(n)
13092             .k(k)
13093             .iterations(1)
13094             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13095         }
13096       }
13097     }
13098   }
13099 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)13100   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
13101     TEST_REQUIRES_X86_SSE2;
13102     for (size_t k = 16; k <= 80; k += 8) {
13103       GemmMicrokernelTester()
13104         .mr(2)
13105         .nr(4)
13106         .kr(2)
13107         .sr(4)
13108         .m(2)
13109         .n(4)
13110         .k(k)
13111         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13112     }
13113   }
13114 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)13115   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
13116     TEST_REQUIRES_X86_SSE2;
13117     for (size_t k = 16; k <= 80; k += 8) {
13118       for (uint32_t n = 1; n <= 4; n++) {
13119         for (uint32_t m = 1; m <= 2; m++) {
13120           GemmMicrokernelTester()
13121             .mr(2)
13122             .nr(4)
13123             .kr(2)
13124             .sr(4)
13125             .m(m)
13126             .n(n)
13127             .k(k)
13128             .iterations(1)
13129             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13130         }
13131       }
13132     }
13133   }
13134 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)13135   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
13136     TEST_REQUIRES_X86_SSE2;
13137     for (uint32_t n = 5; n < 8; n++) {
13138       for (size_t k = 1; k <= 40; k += 9) {
13139         GemmMicrokernelTester()
13140           .mr(2)
13141           .nr(4)
13142           .kr(2)
13143           .sr(4)
13144           .m(2)
13145           .n(n)
13146           .k(k)
13147           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13148       }
13149     }
13150   }
13151 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)13152   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
13153     TEST_REQUIRES_X86_SSE2;
13154     for (uint32_t n = 5; n < 8; n++) {
13155       for (size_t k = 1; k <= 40; k += 9) {
13156         GemmMicrokernelTester()
13157           .mr(2)
13158           .nr(4)
13159           .kr(2)
13160           .sr(4)
13161           .m(2)
13162           .n(n)
13163           .k(k)
13164           .cn_stride(7)
13165           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13166       }
13167     }
13168   }
13169 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)13170   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
13171     TEST_REQUIRES_X86_SSE2;
13172     for (uint32_t n = 5; n < 8; n++) {
13173       for (size_t k = 1; k <= 40; k += 9) {
13174         for (uint32_t m = 1; m <= 2; m++) {
13175           GemmMicrokernelTester()
13176             .mr(2)
13177             .nr(4)
13178             .kr(2)
13179             .sr(4)
13180             .m(m)
13181             .n(n)
13182             .k(k)
13183             .iterations(1)
13184             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13185         }
13186       }
13187     }
13188   }
13189 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)13190   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
13191     TEST_REQUIRES_X86_SSE2;
13192     for (uint32_t n = 8; n <= 12; n += 4) {
13193       for (size_t k = 1; k <= 40; k += 9) {
13194         GemmMicrokernelTester()
13195           .mr(2)
13196           .nr(4)
13197           .kr(2)
13198           .sr(4)
13199           .m(2)
13200           .n(n)
13201           .k(k)
13202           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13203       }
13204     }
13205   }
13206 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)13207   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
13208     TEST_REQUIRES_X86_SSE2;
13209     for (uint32_t n = 8; n <= 12; n += 4) {
13210       for (size_t k = 1; k <= 40; k += 9) {
13211         GemmMicrokernelTester()
13212           .mr(2)
13213           .nr(4)
13214           .kr(2)
13215           .sr(4)
13216           .m(2)
13217           .n(n)
13218           .k(k)
13219           .cn_stride(7)
13220           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13221       }
13222     }
13223   }
13224 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)13225   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
13226     TEST_REQUIRES_X86_SSE2;
13227     for (uint32_t n = 8; n <= 12; n += 4) {
13228       for (size_t k = 1; k <= 40; k += 9) {
13229         for (uint32_t m = 1; m <= 2; m++) {
13230           GemmMicrokernelTester()
13231             .mr(2)
13232             .nr(4)
13233             .kr(2)
13234             .sr(4)
13235             .m(m)
13236             .n(n)
13237             .k(k)
13238             .iterations(1)
13239             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13240         }
13241       }
13242     }
13243   }
13244 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel)13245   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel) {
13246     TEST_REQUIRES_X86_SSE2;
13247     for (size_t k = 1; k <= 40; k += 9) {
13248       GemmMicrokernelTester()
13249         .mr(2)
13250         .nr(4)
13251         .kr(2)
13252         .sr(4)
13253         .m(2)
13254         .n(4)
13255         .k(k)
13256         .ks(3)
13257         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13258     }
13259   }
13260 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel_subtile)13261   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel_subtile) {
13262     TEST_REQUIRES_X86_SSE2;
13263     for (size_t k = 1; k <= 40; k += 9) {
13264       for (uint32_t n = 1; n <= 4; n++) {
13265         for (uint32_t m = 1; m <= 2; m++) {
13266           GemmMicrokernelTester()
13267             .mr(2)
13268             .nr(4)
13269             .kr(2)
13270             .sr(4)
13271             .m(m)
13272             .n(n)
13273             .k(k)
13274             .ks(3)
13275             .iterations(1)
13276             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13277         }
13278       }
13279     }
13280   }
13281 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_small_kernel)13282   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
13283     TEST_REQUIRES_X86_SSE2;
13284     for (uint32_t n = 5; n < 8; n++) {
13285       for (size_t k = 1; k <= 40; k += 9) {
13286         GemmMicrokernelTester()
13287           .mr(2)
13288           .nr(4)
13289           .kr(2)
13290           .sr(4)
13291           .m(2)
13292           .n(n)
13293           .k(k)
13294           .ks(3)
13295           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13296       }
13297     }
13298   }
13299 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_small_kernel)13300   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
13301     TEST_REQUIRES_X86_SSE2;
13302     for (uint32_t n = 8; n <= 12; n += 4) {
13303       for (size_t k = 1; k <= 40; k += 9) {
13304         GemmMicrokernelTester()
13305           .mr(2)
13306           .nr(4)
13307           .kr(2)
13308           .sr(4)
13309           .m(2)
13310           .n(n)
13311           .k(k)
13312           .ks(3)
13313           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13314       }
13315     }
13316   }
13317 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)13318   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
13319     TEST_REQUIRES_X86_SSE2;
13320     for (size_t k = 1; k <= 40; k += 9) {
13321       for (uint32_t n = 1; n <= 4; n++) {
13322         for (uint32_t m = 1; m <= 2; m++) {
13323           GemmMicrokernelTester()
13324             .mr(2)
13325             .nr(4)
13326             .kr(2)
13327             .sr(4)
13328             .m(m)
13329             .n(n)
13330             .k(k)
13331             .cm_stride(7)
13332             .iterations(1)
13333             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13334         }
13335       }
13336     }
13337   }
13338 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,a_offset)13339   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, a_offset) {
13340     TEST_REQUIRES_X86_SSE2;
13341     for (size_t k = 1; k <= 40; k += 9) {
13342       GemmMicrokernelTester()
13343         .mr(2)
13344         .nr(4)
13345         .kr(2)
13346         .sr(4)
13347         .m(2)
13348         .n(4)
13349         .k(k)
13350         .ks(3)
13351         .a_offset(83)
13352         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13353     }
13354   }
13355 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,zero)13356   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, zero) {
13357     TEST_REQUIRES_X86_SSE2;
13358     for (size_t k = 1; k <= 40; k += 9) {
13359       for (uint32_t mz = 0; mz < 2; mz++) {
13360         GemmMicrokernelTester()
13361           .mr(2)
13362           .nr(4)
13363           .kr(2)
13364           .sr(4)
13365           .m(2)
13366           .n(4)
13367           .k(k)
13368           .ks(3)
13369           .a_offset(83)
13370           .zero_index(mz)
13371           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13372       }
13373     }
13374   }
13375 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)13376   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
13377     TEST_REQUIRES_X86_SSE2;
13378     GemmMicrokernelTester()
13379       .mr(2)
13380       .nr(4)
13381       .kr(2)
13382       .sr(4)
13383       .m(2)
13384       .n(4)
13385       .k(8)
13386       .qmin(128)
13387       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13388   }
13389 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)13390   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
13391     TEST_REQUIRES_X86_SSE2;
13392     GemmMicrokernelTester()
13393       .mr(2)
13394       .nr(4)
13395       .kr(2)
13396       .sr(4)
13397       .m(2)
13398       .n(4)
13399       .k(8)
13400       .qmax(128)
13401       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13402   }
13403 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)13404   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
13405     TEST_REQUIRES_X86_SSE2;
13406     GemmMicrokernelTester()
13407       .mr(2)
13408       .nr(4)
13409       .kr(2)
13410       .sr(4)
13411       .m(2)
13412       .n(4)
13413       .k(8)
13414       .cm_stride(7)
13415       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13416   }
13417 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_a_zero_point)13418   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_a_zero_point) {
13419     TEST_REQUIRES_X86_SSE2;
13420     for (size_t k = 1; k <= 40; k += 9) {
13421       GemmMicrokernelTester()
13422         .mr(2)
13423         .nr(4)
13424         .kr(2)
13425         .sr(4)
13426         .m(2)
13427         .n(4)
13428         .k(k)
13429         .a_zero_point(0)
13430         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13431     }
13432   }
13433 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_b_zero_point)13434   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_b_zero_point) {
13435     TEST_REQUIRES_X86_SSE2;
13436     for (size_t k = 1; k <= 40; k += 9) {
13437       GemmMicrokernelTester()
13438         .mr(2)
13439         .nr(4)
13440         .kr(2)
13441         .sr(4)
13442         .m(2)
13443         .n(4)
13444         .k(k)
13445         .b_zero_point(0)
13446         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13447     }
13448   }
13449 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,no_zero_point)13450   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, no_zero_point) {
13451     TEST_REQUIRES_X86_SSE2;
13452     for (size_t k = 1; k <= 40; k += 9) {
13453       GemmMicrokernelTester()
13454         .mr(2)
13455         .nr(4)
13456         .kr(2)
13457         .sr(4)
13458         .m(2)
13459         .n(4)
13460         .k(k)
13461         .a_zero_point(0)
13462         .b_zero_point(0)
13463         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13464     }
13465   }
13466 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13467 
13468 
13469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8)13470   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8) {
13471     TEST_REQUIRES_X86_SSE41;
13472     GemmMicrokernelTester()
13473       .mr(4)
13474       .nr(4)
13475       .kr(2)
13476       .sr(4)
13477       .m(4)
13478       .n(4)
13479       .k(8)
13480       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13481   }
13482 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cn)13483   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cn) {
13484     TEST_REQUIRES_X86_SSE41;
13485     GemmMicrokernelTester()
13486       .mr(4)
13487       .nr(4)
13488       .kr(2)
13489       .sr(4)
13490       .m(4)
13491       .n(4)
13492       .k(8)
13493       .cn_stride(7)
13494       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13495   }
13496 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile)13497   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile) {
13498     TEST_REQUIRES_X86_SSE41;
13499     for (uint32_t n = 1; n <= 4; n++) {
13500       for (uint32_t m = 1; m <= 4; m++) {
13501         GemmMicrokernelTester()
13502           .mr(4)
13503           .nr(4)
13504           .kr(2)
13505           .sr(4)
13506           .m(m)
13507           .n(n)
13508           .k(8)
13509           .iterations(1)
13510           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13511       }
13512     }
13513   }
13514 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_m)13515   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
13516     TEST_REQUIRES_X86_SSE41;
13517     for (uint32_t m = 1; m <= 4; m++) {
13518       GemmMicrokernelTester()
13519         .mr(4)
13520         .nr(4)
13521         .kr(2)
13522         .sr(4)
13523         .m(m)
13524         .n(4)
13525         .k(8)
13526         .iterations(1)
13527         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13528     }
13529   }
13530 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_n)13531   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
13532     TEST_REQUIRES_X86_SSE41;
13533     for (uint32_t n = 1; n <= 4; n++) {
13534       GemmMicrokernelTester()
13535         .mr(4)
13536         .nr(4)
13537         .kr(2)
13538         .sr(4)
13539         .m(4)
13540         .n(n)
13541         .k(8)
13542         .iterations(1)
13543         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13544     }
13545   }
13546 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8)13547   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8) {
13548     TEST_REQUIRES_X86_SSE41;
13549     for (size_t k = 1; k < 8; k++) {
13550       GemmMicrokernelTester()
13551         .mr(4)
13552         .nr(4)
13553         .kr(2)
13554         .sr(4)
13555         .m(4)
13556         .n(4)
13557         .k(k)
13558         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13559     }
13560   }
13561 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_subtile)13562   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_subtile) {
13563     TEST_REQUIRES_X86_SSE41;
13564     for (size_t k = 1; k < 8; k++) {
13565       for (uint32_t n = 1; n <= 4; n++) {
13566         for (uint32_t m = 1; m <= 4; m++) {
13567           GemmMicrokernelTester()
13568             .mr(4)
13569             .nr(4)
13570             .kr(2)
13571             .sr(4)
13572             .m(m)
13573             .n(n)
13574             .k(k)
13575             .iterations(1)
13576             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13577         }
13578       }
13579     }
13580   }
13581 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8)13582   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8) {
13583     TEST_REQUIRES_X86_SSE41;
13584     for (size_t k = 9; k < 16; k++) {
13585       GemmMicrokernelTester()
13586         .mr(4)
13587         .nr(4)
13588         .kr(2)
13589         .sr(4)
13590         .m(4)
13591         .n(4)
13592         .k(k)
13593         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13594     }
13595   }
13596 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_subtile)13597   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_subtile) {
13598     TEST_REQUIRES_X86_SSE41;
13599     for (size_t k = 9; k < 16; k++) {
13600       for (uint32_t n = 1; n <= 4; n++) {
13601         for (uint32_t m = 1; m <= 4; m++) {
13602           GemmMicrokernelTester()
13603             .mr(4)
13604             .nr(4)
13605             .kr(2)
13606             .sr(4)
13607             .m(m)
13608             .n(n)
13609             .k(k)
13610             .iterations(1)
13611             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13612         }
13613       }
13614     }
13615   }
13616 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8)13617   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8) {
13618     TEST_REQUIRES_X86_SSE41;
13619     for (size_t k = 16; k <= 80; k += 8) {
13620       GemmMicrokernelTester()
13621         .mr(4)
13622         .nr(4)
13623         .kr(2)
13624         .sr(4)
13625         .m(4)
13626         .n(4)
13627         .k(k)
13628         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13629     }
13630   }
13631 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_subtile)13632   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_subtile) {
13633     TEST_REQUIRES_X86_SSE41;
13634     for (size_t k = 16; k <= 80; k += 8) {
13635       for (uint32_t n = 1; n <= 4; n++) {
13636         for (uint32_t m = 1; m <= 4; m++) {
13637           GemmMicrokernelTester()
13638             .mr(4)
13639             .nr(4)
13640             .kr(2)
13641             .sr(4)
13642             .m(m)
13643             .n(n)
13644             .k(k)
13645             .iterations(1)
13646             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13647         }
13648       }
13649     }
13650   }
13651 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4)13652   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4) {
13653     TEST_REQUIRES_X86_SSE41;
13654     for (uint32_t n = 5; n < 8; n++) {
13655       for (size_t k = 1; k <= 40; k += 9) {
13656         GemmMicrokernelTester()
13657           .mr(4)
13658           .nr(4)
13659           .kr(2)
13660           .sr(4)
13661           .m(4)
13662           .n(n)
13663           .k(k)
13664           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13665       }
13666     }
13667   }
13668 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_cn)13669   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
13670     TEST_REQUIRES_X86_SSE41;
13671     for (uint32_t n = 5; n < 8; n++) {
13672       for (size_t k = 1; k <= 40; k += 9) {
13673         GemmMicrokernelTester()
13674           .mr(4)
13675           .nr(4)
13676           .kr(2)
13677           .sr(4)
13678           .m(4)
13679           .n(n)
13680           .k(k)
13681           .cn_stride(7)
13682           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13683       }
13684     }
13685   }
13686 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_subtile)13687   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_subtile) {
13688     TEST_REQUIRES_X86_SSE41;
13689     for (uint32_t n = 5; n < 8; n++) {
13690       for (size_t k = 1; k <= 40; k += 9) {
13691         for (uint32_t m = 1; m <= 4; m++) {
13692           GemmMicrokernelTester()
13693             .mr(4)
13694             .nr(4)
13695             .kr(2)
13696             .sr(4)
13697             .m(m)
13698             .n(n)
13699             .k(k)
13700             .iterations(1)
13701             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13702         }
13703       }
13704     }
13705   }
13706 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4)13707   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4) {
13708     TEST_REQUIRES_X86_SSE41;
13709     for (uint32_t n = 8; n <= 12; n += 4) {
13710       for (size_t k = 1; k <= 40; k += 9) {
13711         GemmMicrokernelTester()
13712           .mr(4)
13713           .nr(4)
13714           .kr(2)
13715           .sr(4)
13716           .m(4)
13717           .n(n)
13718           .k(k)
13719           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13720       }
13721     }
13722   }
13723 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_cn)13724   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
13725     TEST_REQUIRES_X86_SSE41;
13726     for (uint32_t n = 8; n <= 12; n += 4) {
13727       for (size_t k = 1; k <= 40; k += 9) {
13728         GemmMicrokernelTester()
13729           .mr(4)
13730           .nr(4)
13731           .kr(2)
13732           .sr(4)
13733           .m(4)
13734           .n(n)
13735           .k(k)
13736           .cn_stride(7)
13737           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13738       }
13739     }
13740   }
13741 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_subtile)13742   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_subtile) {
13743     TEST_REQUIRES_X86_SSE41;
13744     for (uint32_t n = 8; n <= 12; n += 4) {
13745       for (size_t k = 1; k <= 40; k += 9) {
13746         for (uint32_t m = 1; m <= 4; m++) {
13747           GemmMicrokernelTester()
13748             .mr(4)
13749             .nr(4)
13750             .kr(2)
13751             .sr(4)
13752             .m(m)
13753             .n(n)
13754             .k(k)
13755             .iterations(1)
13756             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13757         }
13758       }
13759     }
13760   }
13761 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel)13762   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel) {
13763     TEST_REQUIRES_X86_SSE41;
13764     for (size_t k = 1; k <= 40; k += 9) {
13765       GemmMicrokernelTester()
13766         .mr(4)
13767         .nr(4)
13768         .kr(2)
13769         .sr(4)
13770         .m(4)
13771         .n(4)
13772         .k(k)
13773         .ks(3)
13774         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13775     }
13776   }
13777 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel_subtile)13778   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel_subtile) {
13779     TEST_REQUIRES_X86_SSE41;
13780     for (size_t k = 1; k <= 40; k += 9) {
13781       for (uint32_t n = 1; n <= 4; n++) {
13782         for (uint32_t m = 1; m <= 4; m++) {
13783           GemmMicrokernelTester()
13784             .mr(4)
13785             .nr(4)
13786             .kr(2)
13787             .sr(4)
13788             .m(m)
13789             .n(n)
13790             .k(k)
13791             .ks(3)
13792             .iterations(1)
13793             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13794         }
13795       }
13796     }
13797   }
13798 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_small_kernel)13799   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
13800     TEST_REQUIRES_X86_SSE41;
13801     for (uint32_t n = 5; n < 8; n++) {
13802       for (size_t k = 1; k <= 40; k += 9) {
13803         GemmMicrokernelTester()
13804           .mr(4)
13805           .nr(4)
13806           .kr(2)
13807           .sr(4)
13808           .m(4)
13809           .n(n)
13810           .k(k)
13811           .ks(3)
13812           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13813       }
13814     }
13815   }
13816 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_small_kernel)13817   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
13818     TEST_REQUIRES_X86_SSE41;
13819     for (uint32_t n = 8; n <= 12; n += 4) {
13820       for (size_t k = 1; k <= 40; k += 9) {
13821         GemmMicrokernelTester()
13822           .mr(4)
13823           .nr(4)
13824           .kr(2)
13825           .sr(4)
13826           .m(4)
13827           .n(n)
13828           .k(k)
13829           .ks(3)
13830           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13831       }
13832     }
13833   }
13834 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm_subtile)13835   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm_subtile) {
13836     TEST_REQUIRES_X86_SSE41;
13837     for (size_t k = 1; k <= 40; k += 9) {
13838       for (uint32_t n = 1; n <= 4; n++) {
13839         for (uint32_t m = 1; m <= 4; m++) {
13840           GemmMicrokernelTester()
13841             .mr(4)
13842             .nr(4)
13843             .kr(2)
13844             .sr(4)
13845             .m(m)
13846             .n(n)
13847             .k(k)
13848             .cm_stride(7)
13849             .iterations(1)
13850             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13851         }
13852       }
13853     }
13854   }
13855 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,a_offset)13856   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, a_offset) {
13857     TEST_REQUIRES_X86_SSE41;
13858     for (size_t k = 1; k <= 40; k += 9) {
13859       GemmMicrokernelTester()
13860         .mr(4)
13861         .nr(4)
13862         .kr(2)
13863         .sr(4)
13864         .m(4)
13865         .n(4)
13866         .k(k)
13867         .ks(3)
13868         .a_offset(163)
13869         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13870     }
13871   }
13872 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,zero)13873   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, zero) {
13874     TEST_REQUIRES_X86_SSE41;
13875     for (size_t k = 1; k <= 40; k += 9) {
13876       for (uint32_t mz = 0; mz < 4; mz++) {
13877         GemmMicrokernelTester()
13878           .mr(4)
13879           .nr(4)
13880           .kr(2)
13881           .sr(4)
13882           .m(4)
13883           .n(4)
13884           .k(k)
13885           .ks(3)
13886           .a_offset(163)
13887           .zero_index(mz)
13888           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13889       }
13890     }
13891   }
13892 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmin)13893   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmin) {
13894     TEST_REQUIRES_X86_SSE41;
13895     GemmMicrokernelTester()
13896       .mr(4)
13897       .nr(4)
13898       .kr(2)
13899       .sr(4)
13900       .m(4)
13901       .n(4)
13902       .k(8)
13903       .qmin(128)
13904       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13905   }
13906 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmax)13907   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmax) {
13908     TEST_REQUIRES_X86_SSE41;
13909     GemmMicrokernelTester()
13910       .mr(4)
13911       .nr(4)
13912       .kr(2)
13913       .sr(4)
13914       .m(4)
13915       .n(4)
13916       .k(8)
13917       .qmax(128)
13918       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13919   }
13920 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm)13921   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm) {
13922     TEST_REQUIRES_X86_SSE41;
13923     GemmMicrokernelTester()
13924       .mr(4)
13925       .nr(4)
13926       .kr(2)
13927       .sr(4)
13928       .m(4)
13929       .n(4)
13930       .k(8)
13931       .cm_stride(7)
13932       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13933   }
13934 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_a_zero_point)13935   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_a_zero_point) {
13936     TEST_REQUIRES_X86_SSE41;
13937     for (size_t k = 1; k <= 40; k += 9) {
13938       GemmMicrokernelTester()
13939         .mr(4)
13940         .nr(4)
13941         .kr(2)
13942         .sr(4)
13943         .m(4)
13944         .n(4)
13945         .k(k)
13946         .a_zero_point(0)
13947         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13948     }
13949   }
13950 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_b_zero_point)13951   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_b_zero_point) {
13952     TEST_REQUIRES_X86_SSE41;
13953     for (size_t k = 1; k <= 40; k += 9) {
13954       GemmMicrokernelTester()
13955         .mr(4)
13956         .nr(4)
13957         .kr(2)
13958         .sr(4)
13959         .m(4)
13960         .n(4)
13961         .k(k)
13962         .b_zero_point(0)
13963         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13964     }
13965   }
13966 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,no_zero_point)13967   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, no_zero_point) {
13968     TEST_REQUIRES_X86_SSE41;
13969     for (size_t k = 1; k <= 40; k += 9) {
13970       GemmMicrokernelTester()
13971         .mr(4)
13972         .nr(4)
13973         .kr(2)
13974         .sr(4)
13975         .m(4)
13976         .n(4)
13977         .k(k)
13978         .a_zero_point(0)
13979         .b_zero_point(0)
13980         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13981     }
13982   }
13983 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
13984 
13985 
13986 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)13987   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
13988     TEST_REQUIRES_X86_AVX;
13989     GemmMicrokernelTester()
13990       .mr(3)
13991       .nr(4)
13992       .kr(2)
13993       .sr(4)
13994       .m(3)
13995       .n(4)
13996       .k(8)
13997       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
13998   }
13999 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)14000   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
14001     TEST_REQUIRES_X86_AVX;
14002     GemmMicrokernelTester()
14003       .mr(3)
14004       .nr(4)
14005       .kr(2)
14006       .sr(4)
14007       .m(3)
14008       .n(4)
14009       .k(8)
14010       .cn_stride(7)
14011       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14012   }
14013 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)14014   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
14015     TEST_REQUIRES_X86_AVX;
14016     for (uint32_t n = 1; n <= 4; n++) {
14017       for (uint32_t m = 1; m <= 3; m++) {
14018         GemmMicrokernelTester()
14019           .mr(3)
14020           .nr(4)
14021           .kr(2)
14022           .sr(4)
14023           .m(m)
14024           .n(n)
14025           .k(8)
14026           .iterations(1)
14027           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14028       }
14029     }
14030   }
14031 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)14032   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
14033     TEST_REQUIRES_X86_AVX;
14034     for (uint32_t m = 1; m <= 3; m++) {
14035       GemmMicrokernelTester()
14036         .mr(3)
14037         .nr(4)
14038         .kr(2)
14039         .sr(4)
14040         .m(m)
14041         .n(4)
14042         .k(8)
14043         .iterations(1)
14044         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14045     }
14046   }
14047 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)14048   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
14049     TEST_REQUIRES_X86_AVX;
14050     for (uint32_t n = 1; n <= 4; n++) {
14051       GemmMicrokernelTester()
14052         .mr(3)
14053         .nr(4)
14054         .kr(2)
14055         .sr(4)
14056         .m(3)
14057         .n(n)
14058         .k(8)
14059         .iterations(1)
14060         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14061     }
14062   }
14063 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)14064   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
14065     TEST_REQUIRES_X86_AVX;
14066     for (size_t k = 1; k < 8; k++) {
14067       GemmMicrokernelTester()
14068         .mr(3)
14069         .nr(4)
14070         .kr(2)
14071         .sr(4)
14072         .m(3)
14073         .n(4)
14074         .k(k)
14075         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14076     }
14077   }
14078 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)14079   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
14080     TEST_REQUIRES_X86_AVX;
14081     for (size_t k = 1; k < 8; k++) {
14082       for (uint32_t n = 1; n <= 4; n++) {
14083         for (uint32_t m = 1; m <= 3; m++) {
14084           GemmMicrokernelTester()
14085             .mr(3)
14086             .nr(4)
14087             .kr(2)
14088             .sr(4)
14089             .m(m)
14090             .n(n)
14091             .k(k)
14092             .iterations(1)
14093             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14094         }
14095       }
14096     }
14097   }
14098 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)14099   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
14100     TEST_REQUIRES_X86_AVX;
14101     for (size_t k = 9; k < 16; k++) {
14102       GemmMicrokernelTester()
14103         .mr(3)
14104         .nr(4)
14105         .kr(2)
14106         .sr(4)
14107         .m(3)
14108         .n(4)
14109         .k(k)
14110         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14111     }
14112   }
14113 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)14114   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
14115     TEST_REQUIRES_X86_AVX;
14116     for (size_t k = 9; k < 16; k++) {
14117       for (uint32_t n = 1; n <= 4; n++) {
14118         for (uint32_t m = 1; m <= 3; m++) {
14119           GemmMicrokernelTester()
14120             .mr(3)
14121             .nr(4)
14122             .kr(2)
14123             .sr(4)
14124             .m(m)
14125             .n(n)
14126             .k(k)
14127             .iterations(1)
14128             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14129         }
14130       }
14131     }
14132   }
14133 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)14134   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
14135     TEST_REQUIRES_X86_AVX;
14136     for (size_t k = 16; k <= 80; k += 8) {
14137       GemmMicrokernelTester()
14138         .mr(3)
14139         .nr(4)
14140         .kr(2)
14141         .sr(4)
14142         .m(3)
14143         .n(4)
14144         .k(k)
14145         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14146     }
14147   }
14148 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)14149   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
14150     TEST_REQUIRES_X86_AVX;
14151     for (size_t k = 16; k <= 80; k += 8) {
14152       for (uint32_t n = 1; n <= 4; n++) {
14153         for (uint32_t m = 1; m <= 3; m++) {
14154           GemmMicrokernelTester()
14155             .mr(3)
14156             .nr(4)
14157             .kr(2)
14158             .sr(4)
14159             .m(m)
14160             .n(n)
14161             .k(k)
14162             .iterations(1)
14163             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14164         }
14165       }
14166     }
14167   }
14168 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)14169   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
14170     TEST_REQUIRES_X86_AVX;
14171     for (uint32_t n = 5; n < 8; n++) {
14172       for (size_t k = 1; k <= 40; k += 9) {
14173         GemmMicrokernelTester()
14174           .mr(3)
14175           .nr(4)
14176           .kr(2)
14177           .sr(4)
14178           .m(3)
14179           .n(n)
14180           .k(k)
14181           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14182       }
14183     }
14184   }
14185 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)14186   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
14187     TEST_REQUIRES_X86_AVX;
14188     for (uint32_t n = 5; n < 8; n++) {
14189       for (size_t k = 1; k <= 40; k += 9) {
14190         GemmMicrokernelTester()
14191           .mr(3)
14192           .nr(4)
14193           .kr(2)
14194           .sr(4)
14195           .m(3)
14196           .n(n)
14197           .k(k)
14198           .cn_stride(7)
14199           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14200       }
14201     }
14202   }
14203 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)14204   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
14205     TEST_REQUIRES_X86_AVX;
14206     for (uint32_t n = 5; n < 8; n++) {
14207       for (size_t k = 1; k <= 40; k += 9) {
14208         for (uint32_t m = 1; m <= 3; m++) {
14209           GemmMicrokernelTester()
14210             .mr(3)
14211             .nr(4)
14212             .kr(2)
14213             .sr(4)
14214             .m(m)
14215             .n(n)
14216             .k(k)
14217             .iterations(1)
14218             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14219         }
14220       }
14221     }
14222   }
14223 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)14224   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
14225     TEST_REQUIRES_X86_AVX;
14226     for (uint32_t n = 8; n <= 12; n += 4) {
14227       for (size_t k = 1; k <= 40; k += 9) {
14228         GemmMicrokernelTester()
14229           .mr(3)
14230           .nr(4)
14231           .kr(2)
14232           .sr(4)
14233           .m(3)
14234           .n(n)
14235           .k(k)
14236           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14237       }
14238     }
14239   }
14240 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)14241   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
14242     TEST_REQUIRES_X86_AVX;
14243     for (uint32_t n = 8; n <= 12; n += 4) {
14244       for (size_t k = 1; k <= 40; k += 9) {
14245         GemmMicrokernelTester()
14246           .mr(3)
14247           .nr(4)
14248           .kr(2)
14249           .sr(4)
14250           .m(3)
14251           .n(n)
14252           .k(k)
14253           .cn_stride(7)
14254           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14255       }
14256     }
14257   }
14258 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)14259   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
14260     TEST_REQUIRES_X86_AVX;
14261     for (uint32_t n = 8; n <= 12; n += 4) {
14262       for (size_t k = 1; k <= 40; k += 9) {
14263         for (uint32_t m = 1; m <= 3; m++) {
14264           GemmMicrokernelTester()
14265             .mr(3)
14266             .nr(4)
14267             .kr(2)
14268             .sr(4)
14269             .m(m)
14270             .n(n)
14271             .k(k)
14272             .iterations(1)
14273             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14274         }
14275       }
14276     }
14277   }
14278 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel)14279   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel) {
14280     TEST_REQUIRES_X86_AVX;
14281     for (size_t k = 1; k <= 40; k += 9) {
14282       GemmMicrokernelTester()
14283         .mr(3)
14284         .nr(4)
14285         .kr(2)
14286         .sr(4)
14287         .m(3)
14288         .n(4)
14289         .k(k)
14290         .ks(3)
14291         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14292     }
14293   }
14294 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel_subtile)14295   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel_subtile) {
14296     TEST_REQUIRES_X86_AVX;
14297     for (size_t k = 1; k <= 40; k += 9) {
14298       for (uint32_t n = 1; n <= 4; n++) {
14299         for (uint32_t m = 1; m <= 3; m++) {
14300           GemmMicrokernelTester()
14301             .mr(3)
14302             .nr(4)
14303             .kr(2)
14304             .sr(4)
14305             .m(m)
14306             .n(n)
14307             .k(k)
14308             .ks(3)
14309             .iterations(1)
14310             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14311         }
14312       }
14313     }
14314   }
14315 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_small_kernel)14316   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
14317     TEST_REQUIRES_X86_AVX;
14318     for (uint32_t n = 5; n < 8; n++) {
14319       for (size_t k = 1; k <= 40; k += 9) {
14320         GemmMicrokernelTester()
14321           .mr(3)
14322           .nr(4)
14323           .kr(2)
14324           .sr(4)
14325           .m(3)
14326           .n(n)
14327           .k(k)
14328           .ks(3)
14329           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14330       }
14331     }
14332   }
14333 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_small_kernel)14334   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_small_kernel) {
14335     TEST_REQUIRES_X86_AVX;
14336     for (uint32_t n = 8; n <= 12; n += 4) {
14337       for (size_t k = 1; k <= 40; k += 9) {
14338         GemmMicrokernelTester()
14339           .mr(3)
14340           .nr(4)
14341           .kr(2)
14342           .sr(4)
14343           .m(3)
14344           .n(n)
14345           .k(k)
14346           .ks(3)
14347           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14348       }
14349     }
14350   }
14351 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)14352   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
14353     TEST_REQUIRES_X86_AVX;
14354     for (size_t k = 1; k <= 40; k += 9) {
14355       for (uint32_t n = 1; n <= 4; n++) {
14356         for (uint32_t m = 1; m <= 3; m++) {
14357           GemmMicrokernelTester()
14358             .mr(3)
14359             .nr(4)
14360             .kr(2)
14361             .sr(4)
14362             .m(m)
14363             .n(n)
14364             .k(k)
14365             .cm_stride(7)
14366             .iterations(1)
14367             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14368         }
14369       }
14370     }
14371   }
14372 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,a_offset)14373   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, a_offset) {
14374     TEST_REQUIRES_X86_AVX;
14375     for (size_t k = 1; k <= 40; k += 9) {
14376       GemmMicrokernelTester()
14377         .mr(3)
14378         .nr(4)
14379         .kr(2)
14380         .sr(4)
14381         .m(3)
14382         .n(4)
14383         .k(k)
14384         .ks(3)
14385         .a_offset(127)
14386         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14387     }
14388   }
14389 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,zero)14390   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, zero) {
14391     TEST_REQUIRES_X86_AVX;
14392     for (size_t k = 1; k <= 40; k += 9) {
14393       for (uint32_t mz = 0; mz < 3; mz++) {
14394         GemmMicrokernelTester()
14395           .mr(3)
14396           .nr(4)
14397           .kr(2)
14398           .sr(4)
14399           .m(3)
14400           .n(4)
14401           .k(k)
14402           .ks(3)
14403           .a_offset(127)
14404           .zero_index(mz)
14405           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14406       }
14407     }
14408   }
14409 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)14410   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
14411     TEST_REQUIRES_X86_AVX;
14412     GemmMicrokernelTester()
14413       .mr(3)
14414       .nr(4)
14415       .kr(2)
14416       .sr(4)
14417       .m(3)
14418       .n(4)
14419       .k(8)
14420       .qmin(128)
14421       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14422   }
14423 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)14424   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
14425     TEST_REQUIRES_X86_AVX;
14426     GemmMicrokernelTester()
14427       .mr(3)
14428       .nr(4)
14429       .kr(2)
14430       .sr(4)
14431       .m(3)
14432       .n(4)
14433       .k(8)
14434       .qmax(128)
14435       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14436   }
14437 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)14438   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
14439     TEST_REQUIRES_X86_AVX;
14440     GemmMicrokernelTester()
14441       .mr(3)
14442       .nr(4)
14443       .kr(2)
14444       .sr(4)
14445       .m(3)
14446       .n(4)
14447       .k(8)
14448       .cm_stride(7)
14449       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14450   }
14451 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_a_zero_point)14452   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_a_zero_point) {
14453     TEST_REQUIRES_X86_AVX;
14454     for (size_t k = 1; k <= 40; k += 9) {
14455       GemmMicrokernelTester()
14456         .mr(3)
14457         .nr(4)
14458         .kr(2)
14459         .sr(4)
14460         .m(3)
14461         .n(4)
14462         .k(k)
14463         .a_zero_point(0)
14464         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14465     }
14466   }
14467 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_b_zero_point)14468   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_b_zero_point) {
14469     TEST_REQUIRES_X86_AVX;
14470     for (size_t k = 1; k <= 40; k += 9) {
14471       GemmMicrokernelTester()
14472         .mr(3)
14473         .nr(4)
14474         .kr(2)
14475         .sr(4)
14476         .m(3)
14477         .n(4)
14478         .k(k)
14479         .b_zero_point(0)
14480         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14481     }
14482   }
14483 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,no_zero_point)14484   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, no_zero_point) {
14485     TEST_REQUIRES_X86_AVX;
14486     for (size_t k = 1; k <= 40; k += 9) {
14487       GemmMicrokernelTester()
14488         .mr(3)
14489         .nr(4)
14490         .kr(2)
14491         .sr(4)
14492         .m(3)
14493         .n(4)
14494         .k(k)
14495         .a_zero_point(0)
14496         .b_zero_point(0)
14497         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14498     }
14499   }
14500 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
14501 
14502 
14503 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)14504   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
14505     TEST_REQUIRES_X86_XOP;
14506     GemmMicrokernelTester()
14507       .mr(3)
14508       .nr(4)
14509       .kr(2)
14510       .sr(4)
14511       .m(3)
14512       .n(4)
14513       .k(8)
14514       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14515   }
14516 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)14517   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
14518     TEST_REQUIRES_X86_XOP;
14519     GemmMicrokernelTester()
14520       .mr(3)
14521       .nr(4)
14522       .kr(2)
14523       .sr(4)
14524       .m(3)
14525       .n(4)
14526       .k(8)
14527       .cn_stride(7)
14528       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14529   }
14530 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)14531   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
14532     TEST_REQUIRES_X86_XOP;
14533     for (uint32_t n = 1; n <= 4; n++) {
14534       for (uint32_t m = 1; m <= 3; m++) {
14535         GemmMicrokernelTester()
14536           .mr(3)
14537           .nr(4)
14538           .kr(2)
14539           .sr(4)
14540           .m(m)
14541           .n(n)
14542           .k(8)
14543           .iterations(1)
14544           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14545       }
14546     }
14547   }
14548 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)14549   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
14550     TEST_REQUIRES_X86_XOP;
14551     for (uint32_t m = 1; m <= 3; m++) {
14552       GemmMicrokernelTester()
14553         .mr(3)
14554         .nr(4)
14555         .kr(2)
14556         .sr(4)
14557         .m(m)
14558         .n(4)
14559         .k(8)
14560         .iterations(1)
14561         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14562     }
14563   }
14564 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)14565   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
14566     TEST_REQUIRES_X86_XOP;
14567     for (uint32_t n = 1; n <= 4; n++) {
14568       GemmMicrokernelTester()
14569         .mr(3)
14570         .nr(4)
14571         .kr(2)
14572         .sr(4)
14573         .m(3)
14574         .n(n)
14575         .k(8)
14576         .iterations(1)
14577         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14578     }
14579   }
14580 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)14581   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
14582     TEST_REQUIRES_X86_XOP;
14583     for (size_t k = 1; k < 8; k++) {
14584       GemmMicrokernelTester()
14585         .mr(3)
14586         .nr(4)
14587         .kr(2)
14588         .sr(4)
14589         .m(3)
14590         .n(4)
14591         .k(k)
14592         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14593     }
14594   }
14595 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)14596   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
14597     TEST_REQUIRES_X86_XOP;
14598     for (size_t k = 1; k < 8; k++) {
14599       for (uint32_t n = 1; n <= 4; n++) {
14600         for (uint32_t m = 1; m <= 3; m++) {
14601           GemmMicrokernelTester()
14602             .mr(3)
14603             .nr(4)
14604             .kr(2)
14605             .sr(4)
14606             .m(m)
14607             .n(n)
14608             .k(k)
14609             .iterations(1)
14610             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14611         }
14612       }
14613     }
14614   }
14615 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)14616   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
14617     TEST_REQUIRES_X86_XOP;
14618     for (size_t k = 9; k < 16; k++) {
14619       GemmMicrokernelTester()
14620         .mr(3)
14621         .nr(4)
14622         .kr(2)
14623         .sr(4)
14624         .m(3)
14625         .n(4)
14626         .k(k)
14627         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14628     }
14629   }
14630 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)14631   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
14632     TEST_REQUIRES_X86_XOP;
14633     for (size_t k = 9; k < 16; k++) {
14634       for (uint32_t n = 1; n <= 4; n++) {
14635         for (uint32_t m = 1; m <= 3; m++) {
14636           GemmMicrokernelTester()
14637             .mr(3)
14638             .nr(4)
14639             .kr(2)
14640             .sr(4)
14641             .m(m)
14642             .n(n)
14643             .k(k)
14644             .iterations(1)
14645             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14646         }
14647       }
14648     }
14649   }
14650 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)14651   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
14652     TEST_REQUIRES_X86_XOP;
14653     for (size_t k = 16; k <= 80; k += 8) {
14654       GemmMicrokernelTester()
14655         .mr(3)
14656         .nr(4)
14657         .kr(2)
14658         .sr(4)
14659         .m(3)
14660         .n(4)
14661         .k(k)
14662         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14663     }
14664   }
14665 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)14666   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
14667     TEST_REQUIRES_X86_XOP;
14668     for (size_t k = 16; k <= 80; k += 8) {
14669       for (uint32_t n = 1; n <= 4; n++) {
14670         for (uint32_t m = 1; m <= 3; m++) {
14671           GemmMicrokernelTester()
14672             .mr(3)
14673             .nr(4)
14674             .kr(2)
14675             .sr(4)
14676             .m(m)
14677             .n(n)
14678             .k(k)
14679             .iterations(1)
14680             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14681         }
14682       }
14683     }
14684   }
14685 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)14686   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
14687     TEST_REQUIRES_X86_XOP;
14688     for (uint32_t n = 5; n < 8; n++) {
14689       for (size_t k = 1; k <= 40; k += 9) {
14690         GemmMicrokernelTester()
14691           .mr(3)
14692           .nr(4)
14693           .kr(2)
14694           .sr(4)
14695           .m(3)
14696           .n(n)
14697           .k(k)
14698           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14699       }
14700     }
14701   }
14702 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)14703   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
14704     TEST_REQUIRES_X86_XOP;
14705     for (uint32_t n = 5; n < 8; n++) {
14706       for (size_t k = 1; k <= 40; k += 9) {
14707         GemmMicrokernelTester()
14708           .mr(3)
14709           .nr(4)
14710           .kr(2)
14711           .sr(4)
14712           .m(3)
14713           .n(n)
14714           .k(k)
14715           .cn_stride(7)
14716           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14717       }
14718     }
14719   }
14720 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)14721   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
14722     TEST_REQUIRES_X86_XOP;
14723     for (uint32_t n = 5; n < 8; n++) {
14724       for (size_t k = 1; k <= 40; k += 9) {
14725         for (uint32_t m = 1; m <= 3; m++) {
14726           GemmMicrokernelTester()
14727             .mr(3)
14728             .nr(4)
14729             .kr(2)
14730             .sr(4)
14731             .m(m)
14732             .n(n)
14733             .k(k)
14734             .iterations(1)
14735             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14736         }
14737       }
14738     }
14739   }
14740 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)14741   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
14742     TEST_REQUIRES_X86_XOP;
14743     for (uint32_t n = 8; n <= 12; n += 4) {
14744       for (size_t k = 1; k <= 40; k += 9) {
14745         GemmMicrokernelTester()
14746           .mr(3)
14747           .nr(4)
14748           .kr(2)
14749           .sr(4)
14750           .m(3)
14751           .n(n)
14752           .k(k)
14753           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14754       }
14755     }
14756   }
14757 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)14758   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
14759     TEST_REQUIRES_X86_XOP;
14760     for (uint32_t n = 8; n <= 12; n += 4) {
14761       for (size_t k = 1; k <= 40; k += 9) {
14762         GemmMicrokernelTester()
14763           .mr(3)
14764           .nr(4)
14765           .kr(2)
14766           .sr(4)
14767           .m(3)
14768           .n(n)
14769           .k(k)
14770           .cn_stride(7)
14771           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14772       }
14773     }
14774   }
14775 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)14776   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
14777     TEST_REQUIRES_X86_XOP;
14778     for (uint32_t n = 8; n <= 12; n += 4) {
14779       for (size_t k = 1; k <= 40; k += 9) {
14780         for (uint32_t m = 1; m <= 3; m++) {
14781           GemmMicrokernelTester()
14782             .mr(3)
14783             .nr(4)
14784             .kr(2)
14785             .sr(4)
14786             .m(m)
14787             .n(n)
14788             .k(k)
14789             .iterations(1)
14790             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14791         }
14792       }
14793     }
14794   }
14795 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)14796   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
14797     TEST_REQUIRES_X86_XOP;
14798     for (size_t k = 1; k <= 40; k += 9) {
14799       GemmMicrokernelTester()
14800         .mr(3)
14801         .nr(4)
14802         .kr(2)
14803         .sr(4)
14804         .m(3)
14805         .n(4)
14806         .k(k)
14807         .ks(3)
14808         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14809     }
14810   }
14811 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)14812   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
14813     TEST_REQUIRES_X86_XOP;
14814     for (size_t k = 1; k <= 40; k += 9) {
14815       for (uint32_t n = 1; n <= 4; n++) {
14816         for (uint32_t m = 1; m <= 3; m++) {
14817           GemmMicrokernelTester()
14818             .mr(3)
14819             .nr(4)
14820             .kr(2)
14821             .sr(4)
14822             .m(m)
14823             .n(n)
14824             .k(k)
14825             .ks(3)
14826             .iterations(1)
14827             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14828         }
14829       }
14830     }
14831   }
14832 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)14833   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
14834     TEST_REQUIRES_X86_XOP;
14835     for (uint32_t n = 5; n < 8; n++) {
14836       for (size_t k = 1; k <= 40; k += 9) {
14837         GemmMicrokernelTester()
14838           .mr(3)
14839           .nr(4)
14840           .kr(2)
14841           .sr(4)
14842           .m(3)
14843           .n(n)
14844           .k(k)
14845           .ks(3)
14846           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14847       }
14848     }
14849   }
14850 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)14851   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
14852     TEST_REQUIRES_X86_XOP;
14853     for (uint32_t n = 8; n <= 12; n += 4) {
14854       for (size_t k = 1; k <= 40; k += 9) {
14855         GemmMicrokernelTester()
14856           .mr(3)
14857           .nr(4)
14858           .kr(2)
14859           .sr(4)
14860           .m(3)
14861           .n(n)
14862           .k(k)
14863           .ks(3)
14864           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14865       }
14866     }
14867   }
14868 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)14869   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
14870     TEST_REQUIRES_X86_XOP;
14871     for (size_t k = 1; k <= 40; k += 9) {
14872       for (uint32_t n = 1; n <= 4; n++) {
14873         for (uint32_t m = 1; m <= 3; m++) {
14874           GemmMicrokernelTester()
14875             .mr(3)
14876             .nr(4)
14877             .kr(2)
14878             .sr(4)
14879             .m(m)
14880             .n(n)
14881             .k(k)
14882             .cm_stride(7)
14883             .iterations(1)
14884             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14885         }
14886       }
14887     }
14888   }
14889 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)14890   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
14891     TEST_REQUIRES_X86_XOP;
14892     for (size_t k = 1; k <= 40; k += 9) {
14893       GemmMicrokernelTester()
14894         .mr(3)
14895         .nr(4)
14896         .kr(2)
14897         .sr(4)
14898         .m(3)
14899         .n(4)
14900         .k(k)
14901         .ks(3)
14902         .a_offset(127)
14903         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14904     }
14905   }
14906 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)14907   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
14908     TEST_REQUIRES_X86_XOP;
14909     for (size_t k = 1; k <= 40; k += 9) {
14910       for (uint32_t mz = 0; mz < 3; mz++) {
14911         GemmMicrokernelTester()
14912           .mr(3)
14913           .nr(4)
14914           .kr(2)
14915           .sr(4)
14916           .m(3)
14917           .n(4)
14918           .k(k)
14919           .ks(3)
14920           .a_offset(127)
14921           .zero_index(mz)
14922           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14923       }
14924     }
14925   }
14926 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)14927   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
14928     TEST_REQUIRES_X86_XOP;
14929     GemmMicrokernelTester()
14930       .mr(3)
14931       .nr(4)
14932       .kr(2)
14933       .sr(4)
14934       .m(3)
14935       .n(4)
14936       .k(8)
14937       .qmin(128)
14938       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14939   }
14940 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)14941   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
14942     TEST_REQUIRES_X86_XOP;
14943     GemmMicrokernelTester()
14944       .mr(3)
14945       .nr(4)
14946       .kr(2)
14947       .sr(4)
14948       .m(3)
14949       .n(4)
14950       .k(8)
14951       .qmax(128)
14952       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14953   }
14954 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)14955   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
14956     TEST_REQUIRES_X86_XOP;
14957     GemmMicrokernelTester()
14958       .mr(3)
14959       .nr(4)
14960       .kr(2)
14961       .sr(4)
14962       .m(3)
14963       .n(4)
14964       .k(8)
14965       .cm_stride(7)
14966       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14967   }
14968 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_a_zero_point)14969   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_a_zero_point) {
14970     TEST_REQUIRES_X86_XOP;
14971     for (size_t k = 1; k <= 40; k += 9) {
14972       GemmMicrokernelTester()
14973         .mr(3)
14974         .nr(4)
14975         .kr(2)
14976         .sr(4)
14977         .m(3)
14978         .n(4)
14979         .k(k)
14980         .a_zero_point(0)
14981         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14982     }
14983   }
14984 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_b_zero_point)14985   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_b_zero_point) {
14986     TEST_REQUIRES_X86_XOP;
14987     for (size_t k = 1; k <= 40; k += 9) {
14988       GemmMicrokernelTester()
14989         .mr(3)
14990         .nr(4)
14991         .kr(2)
14992         .sr(4)
14993         .m(3)
14994         .n(4)
14995         .k(k)
14996         .b_zero_point(0)
14997         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
14998     }
14999   }
15000 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,no_zero_point)15001   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, no_zero_point) {
15002     TEST_REQUIRES_X86_XOP;
15003     for (size_t k = 1; k <= 40; k += 9) {
15004       GemmMicrokernelTester()
15005         .mr(3)
15006         .nr(4)
15007         .kr(2)
15008         .sr(4)
15009         .m(3)
15010         .n(4)
15011         .k(k)
15012         .a_zero_point(0)
15013         .b_zero_point(0)
15014         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15015     }
15016   }
15017 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15018 
15019 
15020 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8)15021   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8) {
15022     TEST_REQUIRES_X86_SSE41;
15023     GemmMicrokernelTester()
15024       .mr(1)
15025       .nr(4)
15026       .kr(2)
15027       .sr(4)
15028       .m(1)
15029       .n(4)
15030       .k(8)
15031       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15032   }
15033 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cn)15034   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cn) {
15035     TEST_REQUIRES_X86_SSE41;
15036     GemmMicrokernelTester()
15037       .mr(1)
15038       .nr(4)
15039       .kr(2)
15040       .sr(4)
15041       .m(1)
15042       .n(4)
15043       .k(8)
15044       .cn_stride(7)
15045       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15046   }
15047 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile)15048   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile) {
15049     TEST_REQUIRES_X86_SSE41;
15050     for (uint32_t n = 1; n <= 4; n++) {
15051       for (uint32_t m = 1; m <= 1; m++) {
15052         GemmMicrokernelTester()
15053           .mr(1)
15054           .nr(4)
15055           .kr(2)
15056           .sr(4)
15057           .m(m)
15058           .n(n)
15059           .k(8)
15060           .iterations(1)
15061           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15062       }
15063     }
15064   }
15065 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_m)15066   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
15067     TEST_REQUIRES_X86_SSE41;
15068     for (uint32_t m = 1; m <= 1; m++) {
15069       GemmMicrokernelTester()
15070         .mr(1)
15071         .nr(4)
15072         .kr(2)
15073         .sr(4)
15074         .m(m)
15075         .n(4)
15076         .k(8)
15077         .iterations(1)
15078         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15079     }
15080   }
15081 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_n)15082   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
15083     TEST_REQUIRES_X86_SSE41;
15084     for (uint32_t n = 1; n <= 4; n++) {
15085       GemmMicrokernelTester()
15086         .mr(1)
15087         .nr(4)
15088         .kr(2)
15089         .sr(4)
15090         .m(1)
15091         .n(n)
15092         .k(8)
15093         .iterations(1)
15094         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15095     }
15096   }
15097 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8)15098   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8) {
15099     TEST_REQUIRES_X86_SSE41;
15100     for (size_t k = 1; k < 8; k++) {
15101       GemmMicrokernelTester()
15102         .mr(1)
15103         .nr(4)
15104         .kr(2)
15105         .sr(4)
15106         .m(1)
15107         .n(4)
15108         .k(k)
15109         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15110     }
15111   }
15112 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_subtile)15113   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_subtile) {
15114     TEST_REQUIRES_X86_SSE41;
15115     for (size_t k = 1; k < 8; k++) {
15116       for (uint32_t n = 1; n <= 4; n++) {
15117         for (uint32_t m = 1; m <= 1; m++) {
15118           GemmMicrokernelTester()
15119             .mr(1)
15120             .nr(4)
15121             .kr(2)
15122             .sr(4)
15123             .m(m)
15124             .n(n)
15125             .k(k)
15126             .iterations(1)
15127             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15128         }
15129       }
15130     }
15131   }
15132 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8)15133   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8) {
15134     TEST_REQUIRES_X86_SSE41;
15135     for (size_t k = 9; k < 16; k++) {
15136       GemmMicrokernelTester()
15137         .mr(1)
15138         .nr(4)
15139         .kr(2)
15140         .sr(4)
15141         .m(1)
15142         .n(4)
15143         .k(k)
15144         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15145     }
15146   }
15147 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_subtile)15148   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_subtile) {
15149     TEST_REQUIRES_X86_SSE41;
15150     for (size_t k = 9; k < 16; k++) {
15151       for (uint32_t n = 1; n <= 4; n++) {
15152         for (uint32_t m = 1; m <= 1; m++) {
15153           GemmMicrokernelTester()
15154             .mr(1)
15155             .nr(4)
15156             .kr(2)
15157             .sr(4)
15158             .m(m)
15159             .n(n)
15160             .k(k)
15161             .iterations(1)
15162             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15163         }
15164       }
15165     }
15166   }
15167 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8)15168   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8) {
15169     TEST_REQUIRES_X86_SSE41;
15170     for (size_t k = 16; k <= 80; k += 8) {
15171       GemmMicrokernelTester()
15172         .mr(1)
15173         .nr(4)
15174         .kr(2)
15175         .sr(4)
15176         .m(1)
15177         .n(4)
15178         .k(k)
15179         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15180     }
15181   }
15182 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_subtile)15183   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_subtile) {
15184     TEST_REQUIRES_X86_SSE41;
15185     for (size_t k = 16; k <= 80; k += 8) {
15186       for (uint32_t n = 1; n <= 4; n++) {
15187         for (uint32_t m = 1; m <= 1; m++) {
15188           GemmMicrokernelTester()
15189             .mr(1)
15190             .nr(4)
15191             .kr(2)
15192             .sr(4)
15193             .m(m)
15194             .n(n)
15195             .k(k)
15196             .iterations(1)
15197             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15198         }
15199       }
15200     }
15201   }
15202 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4)15203   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4) {
15204     TEST_REQUIRES_X86_SSE41;
15205     for (uint32_t n = 5; n < 8; n++) {
15206       for (size_t k = 1; k <= 40; k += 9) {
15207         GemmMicrokernelTester()
15208           .mr(1)
15209           .nr(4)
15210           .kr(2)
15211           .sr(4)
15212           .m(1)
15213           .n(n)
15214           .k(k)
15215           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15216       }
15217     }
15218   }
15219 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_cn)15220   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
15221     TEST_REQUIRES_X86_SSE41;
15222     for (uint32_t n = 5; n < 8; n++) {
15223       for (size_t k = 1; k <= 40; k += 9) {
15224         GemmMicrokernelTester()
15225           .mr(1)
15226           .nr(4)
15227           .kr(2)
15228           .sr(4)
15229           .m(1)
15230           .n(n)
15231           .k(k)
15232           .cn_stride(7)
15233           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15234       }
15235     }
15236   }
15237 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_subtile)15238   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_subtile) {
15239     TEST_REQUIRES_X86_SSE41;
15240     for (uint32_t n = 5; n < 8; n++) {
15241       for (size_t k = 1; k <= 40; k += 9) {
15242         for (uint32_t m = 1; m <= 1; m++) {
15243           GemmMicrokernelTester()
15244             .mr(1)
15245             .nr(4)
15246             .kr(2)
15247             .sr(4)
15248             .m(m)
15249             .n(n)
15250             .k(k)
15251             .iterations(1)
15252             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15253         }
15254       }
15255     }
15256   }
15257 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4)15258   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4) {
15259     TEST_REQUIRES_X86_SSE41;
15260     for (uint32_t n = 8; n <= 12; n += 4) {
15261       for (size_t k = 1; k <= 40; k += 9) {
15262         GemmMicrokernelTester()
15263           .mr(1)
15264           .nr(4)
15265           .kr(2)
15266           .sr(4)
15267           .m(1)
15268           .n(n)
15269           .k(k)
15270           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15271       }
15272     }
15273   }
15274 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_cn)15275   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
15276     TEST_REQUIRES_X86_SSE41;
15277     for (uint32_t n = 8; n <= 12; n += 4) {
15278       for (size_t k = 1; k <= 40; k += 9) {
15279         GemmMicrokernelTester()
15280           .mr(1)
15281           .nr(4)
15282           .kr(2)
15283           .sr(4)
15284           .m(1)
15285           .n(n)
15286           .k(k)
15287           .cn_stride(7)
15288           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15289       }
15290     }
15291   }
15292 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_subtile)15293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_subtile) {
15294     TEST_REQUIRES_X86_SSE41;
15295     for (uint32_t n = 8; n <= 12; n += 4) {
15296       for (size_t k = 1; k <= 40; k += 9) {
15297         for (uint32_t m = 1; m <= 1; m++) {
15298           GemmMicrokernelTester()
15299             .mr(1)
15300             .nr(4)
15301             .kr(2)
15302             .sr(4)
15303             .m(m)
15304             .n(n)
15305             .k(k)
15306             .iterations(1)
15307             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15308         }
15309       }
15310     }
15311   }
15312 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel)15313   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel) {
15314     TEST_REQUIRES_X86_SSE41;
15315     for (size_t k = 1; k <= 40; k += 9) {
15316       GemmMicrokernelTester()
15317         .mr(1)
15318         .nr(4)
15319         .kr(2)
15320         .sr(4)
15321         .m(1)
15322         .n(4)
15323         .k(k)
15324         .ks(3)
15325         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15326     }
15327   }
15328 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel_subtile)15329   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel_subtile) {
15330     TEST_REQUIRES_X86_SSE41;
15331     for (size_t k = 1; k <= 40; k += 9) {
15332       for (uint32_t n = 1; n <= 4; n++) {
15333         for (uint32_t m = 1; m <= 1; m++) {
15334           GemmMicrokernelTester()
15335             .mr(1)
15336             .nr(4)
15337             .kr(2)
15338             .sr(4)
15339             .m(m)
15340             .n(n)
15341             .k(k)
15342             .ks(3)
15343             .iterations(1)
15344             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15345         }
15346       }
15347     }
15348   }
15349 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_small_kernel)15350   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
15351     TEST_REQUIRES_X86_SSE41;
15352     for (uint32_t n = 5; n < 8; n++) {
15353       for (size_t k = 1; k <= 40; k += 9) {
15354         GemmMicrokernelTester()
15355           .mr(1)
15356           .nr(4)
15357           .kr(2)
15358           .sr(4)
15359           .m(1)
15360           .n(n)
15361           .k(k)
15362           .ks(3)
15363           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15364       }
15365     }
15366   }
15367 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_small_kernel)15368   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
15369     TEST_REQUIRES_X86_SSE41;
15370     for (uint32_t n = 8; n <= 12; n += 4) {
15371       for (size_t k = 1; k <= 40; k += 9) {
15372         GemmMicrokernelTester()
15373           .mr(1)
15374           .nr(4)
15375           .kr(2)
15376           .sr(4)
15377           .m(1)
15378           .n(n)
15379           .k(k)
15380           .ks(3)
15381           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15382       }
15383     }
15384   }
15385 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm_subtile)15386   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm_subtile) {
15387     TEST_REQUIRES_X86_SSE41;
15388     for (size_t k = 1; k <= 40; k += 9) {
15389       for (uint32_t n = 1; n <= 4; n++) {
15390         for (uint32_t m = 1; m <= 1; m++) {
15391           GemmMicrokernelTester()
15392             .mr(1)
15393             .nr(4)
15394             .kr(2)
15395             .sr(4)
15396             .m(m)
15397             .n(n)
15398             .k(k)
15399             .cm_stride(7)
15400             .iterations(1)
15401             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15402         }
15403       }
15404     }
15405   }
15406 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,a_offset)15407   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, a_offset) {
15408     TEST_REQUIRES_X86_SSE41;
15409     for (size_t k = 1; k <= 40; k += 9) {
15410       GemmMicrokernelTester()
15411         .mr(1)
15412         .nr(4)
15413         .kr(2)
15414         .sr(4)
15415         .m(1)
15416         .n(4)
15417         .k(k)
15418         .ks(3)
15419         .a_offset(43)
15420         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15421     }
15422   }
15423 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,zero)15424   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, zero) {
15425     TEST_REQUIRES_X86_SSE41;
15426     for (size_t k = 1; k <= 40; k += 9) {
15427       for (uint32_t mz = 0; mz < 1; mz++) {
15428         GemmMicrokernelTester()
15429           .mr(1)
15430           .nr(4)
15431           .kr(2)
15432           .sr(4)
15433           .m(1)
15434           .n(4)
15435           .k(k)
15436           .ks(3)
15437           .a_offset(43)
15438           .zero_index(mz)
15439           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15440       }
15441     }
15442   }
15443 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmin)15444   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmin) {
15445     TEST_REQUIRES_X86_SSE41;
15446     GemmMicrokernelTester()
15447       .mr(1)
15448       .nr(4)
15449       .kr(2)
15450       .sr(4)
15451       .m(1)
15452       .n(4)
15453       .k(8)
15454       .qmin(128)
15455       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15456   }
15457 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmax)15458   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmax) {
15459     TEST_REQUIRES_X86_SSE41;
15460     GemmMicrokernelTester()
15461       .mr(1)
15462       .nr(4)
15463       .kr(2)
15464       .sr(4)
15465       .m(1)
15466       .n(4)
15467       .k(8)
15468       .qmax(128)
15469       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15470   }
15471 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm)15472   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm) {
15473     TEST_REQUIRES_X86_SSE41;
15474     GemmMicrokernelTester()
15475       .mr(1)
15476       .nr(4)
15477       .kr(2)
15478       .sr(4)
15479       .m(1)
15480       .n(4)
15481       .k(8)
15482       .cm_stride(7)
15483       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15484   }
15485 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_a_zero_point)15486   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_a_zero_point) {
15487     TEST_REQUIRES_X86_SSE41;
15488     for (size_t k = 1; k <= 40; k += 9) {
15489       GemmMicrokernelTester()
15490         .mr(1)
15491         .nr(4)
15492         .kr(2)
15493         .sr(4)
15494         .m(1)
15495         .n(4)
15496         .k(k)
15497         .a_zero_point(0)
15498         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15499     }
15500   }
15501 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_b_zero_point)15502   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_b_zero_point) {
15503     TEST_REQUIRES_X86_SSE41;
15504     for (size_t k = 1; k <= 40; k += 9) {
15505       GemmMicrokernelTester()
15506         .mr(1)
15507         .nr(4)
15508         .kr(2)
15509         .sr(4)
15510         .m(1)
15511         .n(4)
15512         .k(k)
15513         .b_zero_point(0)
15514         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15515     }
15516   }
15517 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,no_zero_point)15518   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, no_zero_point) {
15519     TEST_REQUIRES_X86_SSE41;
15520     for (size_t k = 1; k <= 40; k += 9) {
15521       GemmMicrokernelTester()
15522         .mr(1)
15523         .nr(4)
15524         .kr(2)
15525         .sr(4)
15526         .m(1)
15527         .n(4)
15528         .k(k)
15529         .a_zero_point(0)
15530         .b_zero_point(0)
15531         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15532     }
15533   }
15534 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
15535 
15536 
15537 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8)15538   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8) {
15539     TEST_REQUIRES_X86_SSE2;
15540     GemmMicrokernelTester()
15541       .mr(2)
15542       .nr(4)
15543       .kr(2)
15544       .sr(4)
15545       .m(2)
15546       .n(4)
15547       .k(8)
15548       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15549   }
15550 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cn)15551   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cn) {
15552     TEST_REQUIRES_X86_SSE2;
15553     GemmMicrokernelTester()
15554       .mr(2)
15555       .nr(4)
15556       .kr(2)
15557       .sr(4)
15558       .m(2)
15559       .n(4)
15560       .k(8)
15561       .cn_stride(7)
15562       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15563   }
15564 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile)15565   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile) {
15566     TEST_REQUIRES_X86_SSE2;
15567     for (uint32_t n = 1; n <= 4; n++) {
15568       for (uint32_t m = 1; m <= 2; m++) {
15569         GemmMicrokernelTester()
15570           .mr(2)
15571           .nr(4)
15572           .kr(2)
15573           .sr(4)
15574           .m(m)
15575           .n(n)
15576           .k(8)
15577           .iterations(1)
15578           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15579       }
15580     }
15581   }
15582 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_m)15583   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
15584     TEST_REQUIRES_X86_SSE2;
15585     for (uint32_t m = 1; m <= 2; m++) {
15586       GemmMicrokernelTester()
15587         .mr(2)
15588         .nr(4)
15589         .kr(2)
15590         .sr(4)
15591         .m(m)
15592         .n(4)
15593         .k(8)
15594         .iterations(1)
15595         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15596     }
15597   }
15598 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_n)15599   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
15600     TEST_REQUIRES_X86_SSE2;
15601     for (uint32_t n = 1; n <= 4; n++) {
15602       GemmMicrokernelTester()
15603         .mr(2)
15604         .nr(4)
15605         .kr(2)
15606         .sr(4)
15607         .m(2)
15608         .n(n)
15609         .k(8)
15610         .iterations(1)
15611         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15612     }
15613   }
15614 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8)15615   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8) {
15616     TEST_REQUIRES_X86_SSE2;
15617     for (size_t k = 1; k < 8; k++) {
15618       GemmMicrokernelTester()
15619         .mr(2)
15620         .nr(4)
15621         .kr(2)
15622         .sr(4)
15623         .m(2)
15624         .n(4)
15625         .k(k)
15626         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15627     }
15628   }
15629 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_subtile)15630   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_subtile) {
15631     TEST_REQUIRES_X86_SSE2;
15632     for (size_t k = 1; k < 8; k++) {
15633       for (uint32_t n = 1; n <= 4; n++) {
15634         for (uint32_t m = 1; m <= 2; m++) {
15635           GemmMicrokernelTester()
15636             .mr(2)
15637             .nr(4)
15638             .kr(2)
15639             .sr(4)
15640             .m(m)
15641             .n(n)
15642             .k(k)
15643             .iterations(1)
15644             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15645         }
15646       }
15647     }
15648   }
15649 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8)15650   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8) {
15651     TEST_REQUIRES_X86_SSE2;
15652     for (size_t k = 9; k < 16; k++) {
15653       GemmMicrokernelTester()
15654         .mr(2)
15655         .nr(4)
15656         .kr(2)
15657         .sr(4)
15658         .m(2)
15659         .n(4)
15660         .k(k)
15661         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15662     }
15663   }
15664 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_subtile)15665   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_subtile) {
15666     TEST_REQUIRES_X86_SSE2;
15667     for (size_t k = 9; k < 16; k++) {
15668       for (uint32_t n = 1; n <= 4; n++) {
15669         for (uint32_t m = 1; m <= 2; m++) {
15670           GemmMicrokernelTester()
15671             .mr(2)
15672             .nr(4)
15673             .kr(2)
15674             .sr(4)
15675             .m(m)
15676             .n(n)
15677             .k(k)
15678             .iterations(1)
15679             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15680         }
15681       }
15682     }
15683   }
15684 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8)15685   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8) {
15686     TEST_REQUIRES_X86_SSE2;
15687     for (size_t k = 16; k <= 80; k += 8) {
15688       GemmMicrokernelTester()
15689         .mr(2)
15690         .nr(4)
15691         .kr(2)
15692         .sr(4)
15693         .m(2)
15694         .n(4)
15695         .k(k)
15696         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15697     }
15698   }
15699 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_subtile)15700   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_subtile) {
15701     TEST_REQUIRES_X86_SSE2;
15702     for (size_t k = 16; k <= 80; k += 8) {
15703       for (uint32_t n = 1; n <= 4; n++) {
15704         for (uint32_t m = 1; m <= 2; m++) {
15705           GemmMicrokernelTester()
15706             .mr(2)
15707             .nr(4)
15708             .kr(2)
15709             .sr(4)
15710             .m(m)
15711             .n(n)
15712             .k(k)
15713             .iterations(1)
15714             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15715         }
15716       }
15717     }
15718   }
15719 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4)15720   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4) {
15721     TEST_REQUIRES_X86_SSE2;
15722     for (uint32_t n = 5; n < 8; n++) {
15723       for (size_t k = 1; k <= 40; k += 9) {
15724         GemmMicrokernelTester()
15725           .mr(2)
15726           .nr(4)
15727           .kr(2)
15728           .sr(4)
15729           .m(2)
15730           .n(n)
15731           .k(k)
15732           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15733       }
15734     }
15735   }
15736 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_cn)15737   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
15738     TEST_REQUIRES_X86_SSE2;
15739     for (uint32_t n = 5; n < 8; n++) {
15740       for (size_t k = 1; k <= 40; k += 9) {
15741         GemmMicrokernelTester()
15742           .mr(2)
15743           .nr(4)
15744           .kr(2)
15745           .sr(4)
15746           .m(2)
15747           .n(n)
15748           .k(k)
15749           .cn_stride(7)
15750           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15751       }
15752     }
15753   }
15754 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_subtile)15755   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_subtile) {
15756     TEST_REQUIRES_X86_SSE2;
15757     for (uint32_t n = 5; n < 8; n++) {
15758       for (size_t k = 1; k <= 40; k += 9) {
15759         for (uint32_t m = 1; m <= 2; m++) {
15760           GemmMicrokernelTester()
15761             .mr(2)
15762             .nr(4)
15763             .kr(2)
15764             .sr(4)
15765             .m(m)
15766             .n(n)
15767             .k(k)
15768             .iterations(1)
15769             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15770         }
15771       }
15772     }
15773   }
15774 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4)15775   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4) {
15776     TEST_REQUIRES_X86_SSE2;
15777     for (uint32_t n = 8; n <= 12; n += 4) {
15778       for (size_t k = 1; k <= 40; k += 9) {
15779         GemmMicrokernelTester()
15780           .mr(2)
15781           .nr(4)
15782           .kr(2)
15783           .sr(4)
15784           .m(2)
15785           .n(n)
15786           .k(k)
15787           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15788       }
15789     }
15790   }
15791 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_cn)15792   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
15793     TEST_REQUIRES_X86_SSE2;
15794     for (uint32_t n = 8; n <= 12; n += 4) {
15795       for (size_t k = 1; k <= 40; k += 9) {
15796         GemmMicrokernelTester()
15797           .mr(2)
15798           .nr(4)
15799           .kr(2)
15800           .sr(4)
15801           .m(2)
15802           .n(n)
15803           .k(k)
15804           .cn_stride(7)
15805           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15806       }
15807     }
15808   }
15809 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_subtile)15810   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_subtile) {
15811     TEST_REQUIRES_X86_SSE2;
15812     for (uint32_t n = 8; n <= 12; n += 4) {
15813       for (size_t k = 1; k <= 40; k += 9) {
15814         for (uint32_t m = 1; m <= 2; m++) {
15815           GemmMicrokernelTester()
15816             .mr(2)
15817             .nr(4)
15818             .kr(2)
15819             .sr(4)
15820             .m(m)
15821             .n(n)
15822             .k(k)
15823             .iterations(1)
15824             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15825         }
15826       }
15827     }
15828   }
15829 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel)15830   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel) {
15831     TEST_REQUIRES_X86_SSE2;
15832     for (size_t k = 1; k <= 40; k += 9) {
15833       GemmMicrokernelTester()
15834         .mr(2)
15835         .nr(4)
15836         .kr(2)
15837         .sr(4)
15838         .m(2)
15839         .n(4)
15840         .k(k)
15841         .ks(3)
15842         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15843     }
15844   }
15845 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel_subtile)15846   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel_subtile) {
15847     TEST_REQUIRES_X86_SSE2;
15848     for (size_t k = 1; k <= 40; k += 9) {
15849       for (uint32_t n = 1; n <= 4; n++) {
15850         for (uint32_t m = 1; m <= 2; m++) {
15851           GemmMicrokernelTester()
15852             .mr(2)
15853             .nr(4)
15854             .kr(2)
15855             .sr(4)
15856             .m(m)
15857             .n(n)
15858             .k(k)
15859             .ks(3)
15860             .iterations(1)
15861             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15862         }
15863       }
15864     }
15865   }
15866 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_small_kernel)15867   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
15868     TEST_REQUIRES_X86_SSE2;
15869     for (uint32_t n = 5; n < 8; n++) {
15870       for (size_t k = 1; k <= 40; k += 9) {
15871         GemmMicrokernelTester()
15872           .mr(2)
15873           .nr(4)
15874           .kr(2)
15875           .sr(4)
15876           .m(2)
15877           .n(n)
15878           .k(k)
15879           .ks(3)
15880           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15881       }
15882     }
15883   }
15884 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_small_kernel)15885   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
15886     TEST_REQUIRES_X86_SSE2;
15887     for (uint32_t n = 8; n <= 12; n += 4) {
15888       for (size_t k = 1; k <= 40; k += 9) {
15889         GemmMicrokernelTester()
15890           .mr(2)
15891           .nr(4)
15892           .kr(2)
15893           .sr(4)
15894           .m(2)
15895           .n(n)
15896           .k(k)
15897           .ks(3)
15898           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15899       }
15900     }
15901   }
15902 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm_subtile)15903   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm_subtile) {
15904     TEST_REQUIRES_X86_SSE2;
15905     for (size_t k = 1; k <= 40; k += 9) {
15906       for (uint32_t n = 1; n <= 4; n++) {
15907         for (uint32_t m = 1; m <= 2; m++) {
15908           GemmMicrokernelTester()
15909             .mr(2)
15910             .nr(4)
15911             .kr(2)
15912             .sr(4)
15913             .m(m)
15914             .n(n)
15915             .k(k)
15916             .cm_stride(7)
15917             .iterations(1)
15918             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15919         }
15920       }
15921     }
15922   }
15923 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,a_offset)15924   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, a_offset) {
15925     TEST_REQUIRES_X86_SSE2;
15926     for (size_t k = 1; k <= 40; k += 9) {
15927       GemmMicrokernelTester()
15928         .mr(2)
15929         .nr(4)
15930         .kr(2)
15931         .sr(4)
15932         .m(2)
15933         .n(4)
15934         .k(k)
15935         .ks(3)
15936         .a_offset(83)
15937         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15938     }
15939   }
15940 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,zero)15941   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, zero) {
15942     TEST_REQUIRES_X86_SSE2;
15943     for (size_t k = 1; k <= 40; k += 9) {
15944       for (uint32_t mz = 0; mz < 2; mz++) {
15945         GemmMicrokernelTester()
15946           .mr(2)
15947           .nr(4)
15948           .kr(2)
15949           .sr(4)
15950           .m(2)
15951           .n(4)
15952           .k(k)
15953           .ks(3)
15954           .a_offset(83)
15955           .zero_index(mz)
15956           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15957       }
15958     }
15959   }
15960 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmin)15961   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmin) {
15962     TEST_REQUIRES_X86_SSE2;
15963     GemmMicrokernelTester()
15964       .mr(2)
15965       .nr(4)
15966       .kr(2)
15967       .sr(4)
15968       .m(2)
15969       .n(4)
15970       .k(8)
15971       .qmin(128)
15972       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15973   }
15974 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmax)15975   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmax) {
15976     TEST_REQUIRES_X86_SSE2;
15977     GemmMicrokernelTester()
15978       .mr(2)
15979       .nr(4)
15980       .kr(2)
15981       .sr(4)
15982       .m(2)
15983       .n(4)
15984       .k(8)
15985       .qmax(128)
15986       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
15987   }
15988 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm)15989   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm) {
15990     TEST_REQUIRES_X86_SSE2;
15991     GemmMicrokernelTester()
15992       .mr(2)
15993       .nr(4)
15994       .kr(2)
15995       .sr(4)
15996       .m(2)
15997       .n(4)
15998       .k(8)
15999       .cm_stride(7)
16000       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16001   }
16002 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_a_zero_point)16003   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_a_zero_point) {
16004     TEST_REQUIRES_X86_SSE2;
16005     for (size_t k = 1; k <= 40; k += 9) {
16006       GemmMicrokernelTester()
16007         .mr(2)
16008         .nr(4)
16009         .kr(2)
16010         .sr(4)
16011         .m(2)
16012         .n(4)
16013         .k(k)
16014         .a_zero_point(0)
16015         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16016     }
16017   }
16018 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_b_zero_point)16019   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_b_zero_point) {
16020     TEST_REQUIRES_X86_SSE2;
16021     for (size_t k = 1; k <= 40; k += 9) {
16022       GemmMicrokernelTester()
16023         .mr(2)
16024         .nr(4)
16025         .kr(2)
16026         .sr(4)
16027         .m(2)
16028         .n(4)
16029         .k(k)
16030         .b_zero_point(0)
16031         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16032     }
16033   }
16034 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,no_zero_point)16035   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, no_zero_point) {
16036     TEST_REQUIRES_X86_SSE2;
16037     for (size_t k = 1; k <= 40; k += 9) {
16038       GemmMicrokernelTester()
16039         .mr(2)
16040         .nr(4)
16041         .kr(2)
16042         .sr(4)
16043         .m(2)
16044         .n(4)
16045         .k(k)
16046         .a_zero_point(0)
16047         .b_zero_point(0)
16048         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16049     }
16050   }
16051 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16052 
16053 
16054 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)16055   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
16056     TEST_REQUIRES_X86_SSE41;
16057     GemmMicrokernelTester()
16058       .mr(2)
16059       .nr(4)
16060       .kr(2)
16061       .sr(4)
16062       .m(2)
16063       .n(4)
16064       .k(8)
16065       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16066   }
16067 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)16068   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
16069     TEST_REQUIRES_X86_SSE41;
16070     GemmMicrokernelTester()
16071       .mr(2)
16072       .nr(4)
16073       .kr(2)
16074       .sr(4)
16075       .m(2)
16076       .n(4)
16077       .k(8)
16078       .cn_stride(7)
16079       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16080   }
16081 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)16082   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
16083     TEST_REQUIRES_X86_SSE41;
16084     for (uint32_t n = 1; n <= 4; n++) {
16085       for (uint32_t m = 1; m <= 2; m++) {
16086         GemmMicrokernelTester()
16087           .mr(2)
16088           .nr(4)
16089           .kr(2)
16090           .sr(4)
16091           .m(m)
16092           .n(n)
16093           .k(8)
16094           .iterations(1)
16095           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16096       }
16097     }
16098   }
16099 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)16100   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
16101     TEST_REQUIRES_X86_SSE41;
16102     for (uint32_t m = 1; m <= 2; m++) {
16103       GemmMicrokernelTester()
16104         .mr(2)
16105         .nr(4)
16106         .kr(2)
16107         .sr(4)
16108         .m(m)
16109         .n(4)
16110         .k(8)
16111         .iterations(1)
16112         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16113     }
16114   }
16115 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)16116   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
16117     TEST_REQUIRES_X86_SSE41;
16118     for (uint32_t n = 1; n <= 4; n++) {
16119       GemmMicrokernelTester()
16120         .mr(2)
16121         .nr(4)
16122         .kr(2)
16123         .sr(4)
16124         .m(2)
16125         .n(n)
16126         .k(8)
16127         .iterations(1)
16128         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16129     }
16130   }
16131 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)16132   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
16133     TEST_REQUIRES_X86_SSE41;
16134     for (size_t k = 1; k < 8; k++) {
16135       GemmMicrokernelTester()
16136         .mr(2)
16137         .nr(4)
16138         .kr(2)
16139         .sr(4)
16140         .m(2)
16141         .n(4)
16142         .k(k)
16143         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16144     }
16145   }
16146 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)16147   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
16148     TEST_REQUIRES_X86_SSE41;
16149     for (size_t k = 1; k < 8; k++) {
16150       for (uint32_t n = 1; n <= 4; n++) {
16151         for (uint32_t m = 1; m <= 2; m++) {
16152           GemmMicrokernelTester()
16153             .mr(2)
16154             .nr(4)
16155             .kr(2)
16156             .sr(4)
16157             .m(m)
16158             .n(n)
16159             .k(k)
16160             .iterations(1)
16161             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16162         }
16163       }
16164     }
16165   }
16166 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)16167   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
16168     TEST_REQUIRES_X86_SSE41;
16169     for (size_t k = 9; k < 16; k++) {
16170       GemmMicrokernelTester()
16171         .mr(2)
16172         .nr(4)
16173         .kr(2)
16174         .sr(4)
16175         .m(2)
16176         .n(4)
16177         .k(k)
16178         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16179     }
16180   }
16181 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)16182   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
16183     TEST_REQUIRES_X86_SSE41;
16184     for (size_t k = 9; k < 16; k++) {
16185       for (uint32_t n = 1; n <= 4; n++) {
16186         for (uint32_t m = 1; m <= 2; m++) {
16187           GemmMicrokernelTester()
16188             .mr(2)
16189             .nr(4)
16190             .kr(2)
16191             .sr(4)
16192             .m(m)
16193             .n(n)
16194             .k(k)
16195             .iterations(1)
16196             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16197         }
16198       }
16199     }
16200   }
16201 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)16202   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
16203     TEST_REQUIRES_X86_SSE41;
16204     for (size_t k = 16; k <= 80; k += 8) {
16205       GemmMicrokernelTester()
16206         .mr(2)
16207         .nr(4)
16208         .kr(2)
16209         .sr(4)
16210         .m(2)
16211         .n(4)
16212         .k(k)
16213         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16214     }
16215   }
16216 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)16217   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
16218     TEST_REQUIRES_X86_SSE41;
16219     for (size_t k = 16; k <= 80; k += 8) {
16220       for (uint32_t n = 1; n <= 4; n++) {
16221         for (uint32_t m = 1; m <= 2; m++) {
16222           GemmMicrokernelTester()
16223             .mr(2)
16224             .nr(4)
16225             .kr(2)
16226             .sr(4)
16227             .m(m)
16228             .n(n)
16229             .k(k)
16230             .iterations(1)
16231             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16232         }
16233       }
16234     }
16235   }
16236 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)16237   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
16238     TEST_REQUIRES_X86_SSE41;
16239     for (uint32_t n = 5; n < 8; n++) {
16240       for (size_t k = 1; k <= 40; k += 9) {
16241         GemmMicrokernelTester()
16242           .mr(2)
16243           .nr(4)
16244           .kr(2)
16245           .sr(4)
16246           .m(2)
16247           .n(n)
16248           .k(k)
16249           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16250       }
16251     }
16252   }
16253 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)16254   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
16255     TEST_REQUIRES_X86_SSE41;
16256     for (uint32_t n = 5; n < 8; n++) {
16257       for (size_t k = 1; k <= 40; k += 9) {
16258         GemmMicrokernelTester()
16259           .mr(2)
16260           .nr(4)
16261           .kr(2)
16262           .sr(4)
16263           .m(2)
16264           .n(n)
16265           .k(k)
16266           .cn_stride(7)
16267           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16268       }
16269     }
16270   }
16271 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)16272   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
16273     TEST_REQUIRES_X86_SSE41;
16274     for (uint32_t n = 5; n < 8; n++) {
16275       for (size_t k = 1; k <= 40; k += 9) {
16276         for (uint32_t m = 1; m <= 2; m++) {
16277           GemmMicrokernelTester()
16278             .mr(2)
16279             .nr(4)
16280             .kr(2)
16281             .sr(4)
16282             .m(m)
16283             .n(n)
16284             .k(k)
16285             .iterations(1)
16286             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16287         }
16288       }
16289     }
16290   }
16291 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)16292   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
16293     TEST_REQUIRES_X86_SSE41;
16294     for (uint32_t n = 8; n <= 12; n += 4) {
16295       for (size_t k = 1; k <= 40; k += 9) {
16296         GemmMicrokernelTester()
16297           .mr(2)
16298           .nr(4)
16299           .kr(2)
16300           .sr(4)
16301           .m(2)
16302           .n(n)
16303           .k(k)
16304           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16305       }
16306     }
16307   }
16308 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)16309   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
16310     TEST_REQUIRES_X86_SSE41;
16311     for (uint32_t n = 8; n <= 12; n += 4) {
16312       for (size_t k = 1; k <= 40; k += 9) {
16313         GemmMicrokernelTester()
16314           .mr(2)
16315           .nr(4)
16316           .kr(2)
16317           .sr(4)
16318           .m(2)
16319           .n(n)
16320           .k(k)
16321           .cn_stride(7)
16322           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16323       }
16324     }
16325   }
16326 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)16327   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
16328     TEST_REQUIRES_X86_SSE41;
16329     for (uint32_t n = 8; n <= 12; n += 4) {
16330       for (size_t k = 1; k <= 40; k += 9) {
16331         for (uint32_t m = 1; m <= 2; m++) {
16332           GemmMicrokernelTester()
16333             .mr(2)
16334             .nr(4)
16335             .kr(2)
16336             .sr(4)
16337             .m(m)
16338             .n(n)
16339             .k(k)
16340             .iterations(1)
16341             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16342         }
16343       }
16344     }
16345   }
16346 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel)16347   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel) {
16348     TEST_REQUIRES_X86_SSE41;
16349     for (size_t k = 1; k <= 40; k += 9) {
16350       GemmMicrokernelTester()
16351         .mr(2)
16352         .nr(4)
16353         .kr(2)
16354         .sr(4)
16355         .m(2)
16356         .n(4)
16357         .k(k)
16358         .ks(3)
16359         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16360     }
16361   }
16362 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel_subtile)16363   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel_subtile) {
16364     TEST_REQUIRES_X86_SSE41;
16365     for (size_t k = 1; k <= 40; k += 9) {
16366       for (uint32_t n = 1; n <= 4; n++) {
16367         for (uint32_t m = 1; m <= 2; m++) {
16368           GemmMicrokernelTester()
16369             .mr(2)
16370             .nr(4)
16371             .kr(2)
16372             .sr(4)
16373             .m(m)
16374             .n(n)
16375             .k(k)
16376             .ks(3)
16377             .iterations(1)
16378             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16379         }
16380       }
16381     }
16382   }
16383 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_small_kernel)16384   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
16385     TEST_REQUIRES_X86_SSE41;
16386     for (uint32_t n = 5; n < 8; n++) {
16387       for (size_t k = 1; k <= 40; k += 9) {
16388         GemmMicrokernelTester()
16389           .mr(2)
16390           .nr(4)
16391           .kr(2)
16392           .sr(4)
16393           .m(2)
16394           .n(n)
16395           .k(k)
16396           .ks(3)
16397           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16398       }
16399     }
16400   }
16401 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_small_kernel)16402   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
16403     TEST_REQUIRES_X86_SSE41;
16404     for (uint32_t n = 8; n <= 12; n += 4) {
16405       for (size_t k = 1; k <= 40; k += 9) {
16406         GemmMicrokernelTester()
16407           .mr(2)
16408           .nr(4)
16409           .kr(2)
16410           .sr(4)
16411           .m(2)
16412           .n(n)
16413           .k(k)
16414           .ks(3)
16415           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16416       }
16417     }
16418   }
16419 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)16420   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
16421     TEST_REQUIRES_X86_SSE41;
16422     for (size_t k = 1; k <= 40; k += 9) {
16423       for (uint32_t n = 1; n <= 4; n++) {
16424         for (uint32_t m = 1; m <= 2; m++) {
16425           GemmMicrokernelTester()
16426             .mr(2)
16427             .nr(4)
16428             .kr(2)
16429             .sr(4)
16430             .m(m)
16431             .n(n)
16432             .k(k)
16433             .cm_stride(7)
16434             .iterations(1)
16435             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16436         }
16437       }
16438     }
16439   }
16440 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,a_offset)16441   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, a_offset) {
16442     TEST_REQUIRES_X86_SSE41;
16443     for (size_t k = 1; k <= 40; k += 9) {
16444       GemmMicrokernelTester()
16445         .mr(2)
16446         .nr(4)
16447         .kr(2)
16448         .sr(4)
16449         .m(2)
16450         .n(4)
16451         .k(k)
16452         .ks(3)
16453         .a_offset(83)
16454         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16455     }
16456   }
16457 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,zero)16458   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, zero) {
16459     TEST_REQUIRES_X86_SSE41;
16460     for (size_t k = 1; k <= 40; k += 9) {
16461       for (uint32_t mz = 0; mz < 2; mz++) {
16462         GemmMicrokernelTester()
16463           .mr(2)
16464           .nr(4)
16465           .kr(2)
16466           .sr(4)
16467           .m(2)
16468           .n(4)
16469           .k(k)
16470           .ks(3)
16471           .a_offset(83)
16472           .zero_index(mz)
16473           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16474       }
16475     }
16476   }
16477 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)16478   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
16479     TEST_REQUIRES_X86_SSE41;
16480     GemmMicrokernelTester()
16481       .mr(2)
16482       .nr(4)
16483       .kr(2)
16484       .sr(4)
16485       .m(2)
16486       .n(4)
16487       .k(8)
16488       .qmin(128)
16489       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16490   }
16491 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)16492   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
16493     TEST_REQUIRES_X86_SSE41;
16494     GemmMicrokernelTester()
16495       .mr(2)
16496       .nr(4)
16497       .kr(2)
16498       .sr(4)
16499       .m(2)
16500       .n(4)
16501       .k(8)
16502       .qmax(128)
16503       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16504   }
16505 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)16506   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
16507     TEST_REQUIRES_X86_SSE41;
16508     GemmMicrokernelTester()
16509       .mr(2)
16510       .nr(4)
16511       .kr(2)
16512       .sr(4)
16513       .m(2)
16514       .n(4)
16515       .k(8)
16516       .cm_stride(7)
16517       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16518   }
16519 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_a_zero_point)16520   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_a_zero_point) {
16521     TEST_REQUIRES_X86_SSE41;
16522     for (size_t k = 1; k <= 40; k += 9) {
16523       GemmMicrokernelTester()
16524         .mr(2)
16525         .nr(4)
16526         .kr(2)
16527         .sr(4)
16528         .m(2)
16529         .n(4)
16530         .k(k)
16531         .a_zero_point(0)
16532         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16533     }
16534   }
16535 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_b_zero_point)16536   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_b_zero_point) {
16537     TEST_REQUIRES_X86_SSE41;
16538     for (size_t k = 1; k <= 40; k += 9) {
16539       GemmMicrokernelTester()
16540         .mr(2)
16541         .nr(4)
16542         .kr(2)
16543         .sr(4)
16544         .m(2)
16545         .n(4)
16546         .k(k)
16547         .b_zero_point(0)
16548         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16549     }
16550   }
16551 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,no_zero_point)16552   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, no_zero_point) {
16553     TEST_REQUIRES_X86_SSE41;
16554     for (size_t k = 1; k <= 40; k += 9) {
16555       GemmMicrokernelTester()
16556         .mr(2)
16557         .nr(4)
16558         .kr(2)
16559         .sr(4)
16560         .m(2)
16561         .n(4)
16562         .k(k)
16563         .a_zero_point(0)
16564         .b_zero_point(0)
16565         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16566     }
16567   }
16568 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16569 
16570 
16571 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8)16572   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8) {
16573     TEST_REQUIRES_X86_SSE2;
16574     GemmMicrokernelTester()
16575       .mr(3)
16576       .nr(4)
16577       .kr(2)
16578       .sr(4)
16579       .m(3)
16580       .n(4)
16581       .k(8)
16582       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16583   }
16584 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cn)16585   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cn) {
16586     TEST_REQUIRES_X86_SSE2;
16587     GemmMicrokernelTester()
16588       .mr(3)
16589       .nr(4)
16590       .kr(2)
16591       .sr(4)
16592       .m(3)
16593       .n(4)
16594       .k(8)
16595       .cn_stride(7)
16596       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16597   }
16598 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile)16599   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile) {
16600     TEST_REQUIRES_X86_SSE2;
16601     for (uint32_t n = 1; n <= 4; n++) {
16602       for (uint32_t m = 1; m <= 3; m++) {
16603         GemmMicrokernelTester()
16604           .mr(3)
16605           .nr(4)
16606           .kr(2)
16607           .sr(4)
16608           .m(m)
16609           .n(n)
16610           .k(8)
16611           .iterations(1)
16612           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16613       }
16614     }
16615   }
16616 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_m)16617   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
16618     TEST_REQUIRES_X86_SSE2;
16619     for (uint32_t m = 1; m <= 3; m++) {
16620       GemmMicrokernelTester()
16621         .mr(3)
16622         .nr(4)
16623         .kr(2)
16624         .sr(4)
16625         .m(m)
16626         .n(4)
16627         .k(8)
16628         .iterations(1)
16629         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16630     }
16631   }
16632 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_n)16633   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
16634     TEST_REQUIRES_X86_SSE2;
16635     for (uint32_t n = 1; n <= 4; n++) {
16636       GemmMicrokernelTester()
16637         .mr(3)
16638         .nr(4)
16639         .kr(2)
16640         .sr(4)
16641         .m(3)
16642         .n(n)
16643         .k(8)
16644         .iterations(1)
16645         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16646     }
16647   }
16648 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8)16649   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8) {
16650     TEST_REQUIRES_X86_SSE2;
16651     for (size_t k = 1; k < 8; k++) {
16652       GemmMicrokernelTester()
16653         .mr(3)
16654         .nr(4)
16655         .kr(2)
16656         .sr(4)
16657         .m(3)
16658         .n(4)
16659         .k(k)
16660         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16661     }
16662   }
16663 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_subtile)16664   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_subtile) {
16665     TEST_REQUIRES_X86_SSE2;
16666     for (size_t k = 1; k < 8; k++) {
16667       for (uint32_t n = 1; n <= 4; n++) {
16668         for (uint32_t m = 1; m <= 3; m++) {
16669           GemmMicrokernelTester()
16670             .mr(3)
16671             .nr(4)
16672             .kr(2)
16673             .sr(4)
16674             .m(m)
16675             .n(n)
16676             .k(k)
16677             .iterations(1)
16678             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16679         }
16680       }
16681     }
16682   }
16683 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8)16684   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8) {
16685     TEST_REQUIRES_X86_SSE2;
16686     for (size_t k = 9; k < 16; k++) {
16687       GemmMicrokernelTester()
16688         .mr(3)
16689         .nr(4)
16690         .kr(2)
16691         .sr(4)
16692         .m(3)
16693         .n(4)
16694         .k(k)
16695         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16696     }
16697   }
16698 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_subtile)16699   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_subtile) {
16700     TEST_REQUIRES_X86_SSE2;
16701     for (size_t k = 9; k < 16; k++) {
16702       for (uint32_t n = 1; n <= 4; n++) {
16703         for (uint32_t m = 1; m <= 3; m++) {
16704           GemmMicrokernelTester()
16705             .mr(3)
16706             .nr(4)
16707             .kr(2)
16708             .sr(4)
16709             .m(m)
16710             .n(n)
16711             .k(k)
16712             .iterations(1)
16713             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16714         }
16715       }
16716     }
16717   }
16718 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8)16719   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8) {
16720     TEST_REQUIRES_X86_SSE2;
16721     for (size_t k = 16; k <= 80; k += 8) {
16722       GemmMicrokernelTester()
16723         .mr(3)
16724         .nr(4)
16725         .kr(2)
16726         .sr(4)
16727         .m(3)
16728         .n(4)
16729         .k(k)
16730         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16731     }
16732   }
16733 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_subtile)16734   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_subtile) {
16735     TEST_REQUIRES_X86_SSE2;
16736     for (size_t k = 16; k <= 80; k += 8) {
16737       for (uint32_t n = 1; n <= 4; n++) {
16738         for (uint32_t m = 1; m <= 3; m++) {
16739           GemmMicrokernelTester()
16740             .mr(3)
16741             .nr(4)
16742             .kr(2)
16743             .sr(4)
16744             .m(m)
16745             .n(n)
16746             .k(k)
16747             .iterations(1)
16748             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16749         }
16750       }
16751     }
16752   }
16753 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4)16754   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4) {
16755     TEST_REQUIRES_X86_SSE2;
16756     for (uint32_t n = 5; n < 8; n++) {
16757       for (size_t k = 1; k <= 40; k += 9) {
16758         GemmMicrokernelTester()
16759           .mr(3)
16760           .nr(4)
16761           .kr(2)
16762           .sr(4)
16763           .m(3)
16764           .n(n)
16765           .k(k)
16766           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16767       }
16768     }
16769   }
16770 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_cn)16771   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
16772     TEST_REQUIRES_X86_SSE2;
16773     for (uint32_t n = 5; n < 8; n++) {
16774       for (size_t k = 1; k <= 40; k += 9) {
16775         GemmMicrokernelTester()
16776           .mr(3)
16777           .nr(4)
16778           .kr(2)
16779           .sr(4)
16780           .m(3)
16781           .n(n)
16782           .k(k)
16783           .cn_stride(7)
16784           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16785       }
16786     }
16787   }
16788 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_subtile)16789   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_subtile) {
16790     TEST_REQUIRES_X86_SSE2;
16791     for (uint32_t n = 5; n < 8; n++) {
16792       for (size_t k = 1; k <= 40; k += 9) {
16793         for (uint32_t m = 1; m <= 3; m++) {
16794           GemmMicrokernelTester()
16795             .mr(3)
16796             .nr(4)
16797             .kr(2)
16798             .sr(4)
16799             .m(m)
16800             .n(n)
16801             .k(k)
16802             .iterations(1)
16803             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16804         }
16805       }
16806     }
16807   }
16808 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4)16809   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4) {
16810     TEST_REQUIRES_X86_SSE2;
16811     for (uint32_t n = 8; n <= 12; n += 4) {
16812       for (size_t k = 1; k <= 40; k += 9) {
16813         GemmMicrokernelTester()
16814           .mr(3)
16815           .nr(4)
16816           .kr(2)
16817           .sr(4)
16818           .m(3)
16819           .n(n)
16820           .k(k)
16821           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16822       }
16823     }
16824   }
16825 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_cn)16826   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
16827     TEST_REQUIRES_X86_SSE2;
16828     for (uint32_t n = 8; n <= 12; n += 4) {
16829       for (size_t k = 1; k <= 40; k += 9) {
16830         GemmMicrokernelTester()
16831           .mr(3)
16832           .nr(4)
16833           .kr(2)
16834           .sr(4)
16835           .m(3)
16836           .n(n)
16837           .k(k)
16838           .cn_stride(7)
16839           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16840       }
16841     }
16842   }
16843 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_subtile)16844   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_subtile) {
16845     TEST_REQUIRES_X86_SSE2;
16846     for (uint32_t n = 8; n <= 12; n += 4) {
16847       for (size_t k = 1; k <= 40; k += 9) {
16848         for (uint32_t m = 1; m <= 3; m++) {
16849           GemmMicrokernelTester()
16850             .mr(3)
16851             .nr(4)
16852             .kr(2)
16853             .sr(4)
16854             .m(m)
16855             .n(n)
16856             .k(k)
16857             .iterations(1)
16858             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16859         }
16860       }
16861     }
16862   }
16863 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel)16864   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel) {
16865     TEST_REQUIRES_X86_SSE2;
16866     for (size_t k = 1; k <= 40; k += 9) {
16867       GemmMicrokernelTester()
16868         .mr(3)
16869         .nr(4)
16870         .kr(2)
16871         .sr(4)
16872         .m(3)
16873         .n(4)
16874         .k(k)
16875         .ks(3)
16876         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16877     }
16878   }
16879 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel_subtile)16880   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel_subtile) {
16881     TEST_REQUIRES_X86_SSE2;
16882     for (size_t k = 1; k <= 40; k += 9) {
16883       for (uint32_t n = 1; n <= 4; n++) {
16884         for (uint32_t m = 1; m <= 3; m++) {
16885           GemmMicrokernelTester()
16886             .mr(3)
16887             .nr(4)
16888             .kr(2)
16889             .sr(4)
16890             .m(m)
16891             .n(n)
16892             .k(k)
16893             .ks(3)
16894             .iterations(1)
16895             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16896         }
16897       }
16898     }
16899   }
16900 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_small_kernel)16901   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
16902     TEST_REQUIRES_X86_SSE2;
16903     for (uint32_t n = 5; n < 8; n++) {
16904       for (size_t k = 1; k <= 40; k += 9) {
16905         GemmMicrokernelTester()
16906           .mr(3)
16907           .nr(4)
16908           .kr(2)
16909           .sr(4)
16910           .m(3)
16911           .n(n)
16912           .k(k)
16913           .ks(3)
16914           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16915       }
16916     }
16917   }
16918 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_small_kernel)16919   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
16920     TEST_REQUIRES_X86_SSE2;
16921     for (uint32_t n = 8; n <= 12; n += 4) {
16922       for (size_t k = 1; k <= 40; k += 9) {
16923         GemmMicrokernelTester()
16924           .mr(3)
16925           .nr(4)
16926           .kr(2)
16927           .sr(4)
16928           .m(3)
16929           .n(n)
16930           .k(k)
16931           .ks(3)
16932           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16933       }
16934     }
16935   }
16936 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm_subtile)16937   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm_subtile) {
16938     TEST_REQUIRES_X86_SSE2;
16939     for (size_t k = 1; k <= 40; k += 9) {
16940       for (uint32_t n = 1; n <= 4; n++) {
16941         for (uint32_t m = 1; m <= 3; m++) {
16942           GemmMicrokernelTester()
16943             .mr(3)
16944             .nr(4)
16945             .kr(2)
16946             .sr(4)
16947             .m(m)
16948             .n(n)
16949             .k(k)
16950             .cm_stride(7)
16951             .iterations(1)
16952             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16953         }
16954       }
16955     }
16956   }
16957 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,a_offset)16958   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, a_offset) {
16959     TEST_REQUIRES_X86_SSE2;
16960     for (size_t k = 1; k <= 40; k += 9) {
16961       GemmMicrokernelTester()
16962         .mr(3)
16963         .nr(4)
16964         .kr(2)
16965         .sr(4)
16966         .m(3)
16967         .n(4)
16968         .k(k)
16969         .ks(3)
16970         .a_offset(127)
16971         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16972     }
16973   }
16974 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,zero)16975   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, zero) {
16976     TEST_REQUIRES_X86_SSE2;
16977     for (size_t k = 1; k <= 40; k += 9) {
16978       for (uint32_t mz = 0; mz < 3; mz++) {
16979         GemmMicrokernelTester()
16980           .mr(3)
16981           .nr(4)
16982           .kr(2)
16983           .sr(4)
16984           .m(3)
16985           .n(4)
16986           .k(k)
16987           .ks(3)
16988           .a_offset(127)
16989           .zero_index(mz)
16990           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
16991       }
16992     }
16993   }
16994 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmin)16995   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmin) {
16996     TEST_REQUIRES_X86_SSE2;
16997     GemmMicrokernelTester()
16998       .mr(3)
16999       .nr(4)
17000       .kr(2)
17001       .sr(4)
17002       .m(3)
17003       .n(4)
17004       .k(8)
17005       .qmin(128)
17006       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17007   }
17008 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmax)17009   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmax) {
17010     TEST_REQUIRES_X86_SSE2;
17011     GemmMicrokernelTester()
17012       .mr(3)
17013       .nr(4)
17014       .kr(2)
17015       .sr(4)
17016       .m(3)
17017       .n(4)
17018       .k(8)
17019       .qmax(128)
17020       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17021   }
17022 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm)17023   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm) {
17024     TEST_REQUIRES_X86_SSE2;
17025     GemmMicrokernelTester()
17026       .mr(3)
17027       .nr(4)
17028       .kr(2)
17029       .sr(4)
17030       .m(3)
17031       .n(4)
17032       .k(8)
17033       .cm_stride(7)
17034       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17035   }
17036 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_a_zero_point)17037   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_a_zero_point) {
17038     TEST_REQUIRES_X86_SSE2;
17039     for (size_t k = 1; k <= 40; k += 9) {
17040       GemmMicrokernelTester()
17041         .mr(3)
17042         .nr(4)
17043         .kr(2)
17044         .sr(4)
17045         .m(3)
17046         .n(4)
17047         .k(k)
17048         .a_zero_point(0)
17049         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17050     }
17051   }
17052 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_b_zero_point)17053   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_b_zero_point) {
17054     TEST_REQUIRES_X86_SSE2;
17055     for (size_t k = 1; k <= 40; k += 9) {
17056       GemmMicrokernelTester()
17057         .mr(3)
17058         .nr(4)
17059         .kr(2)
17060         .sr(4)
17061         .m(3)
17062         .n(4)
17063         .k(k)
17064         .b_zero_point(0)
17065         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17066     }
17067   }
17068 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,no_zero_point)17069   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, no_zero_point) {
17070     TEST_REQUIRES_X86_SSE2;
17071     for (size_t k = 1; k <= 40; k += 9) {
17072       GemmMicrokernelTester()
17073         .mr(3)
17074         .nr(4)
17075         .kr(2)
17076         .sr(4)
17077         .m(3)
17078         .n(4)
17079         .k(k)
17080         .a_zero_point(0)
17081         .b_zero_point(0)
17082         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17083     }
17084   }
17085 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17086 
17087 
17088 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8)17089   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8) {
17090     TEST_REQUIRES_X86_SSE41;
17091     GemmMicrokernelTester()
17092       .mr(3)
17093       .nr(4)
17094       .kr(2)
17095       .sr(4)
17096       .m(3)
17097       .n(4)
17098       .k(8)
17099       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17100   }
17101 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cn)17102   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cn) {
17103     TEST_REQUIRES_X86_SSE41;
17104     GemmMicrokernelTester()
17105       .mr(3)
17106       .nr(4)
17107       .kr(2)
17108       .sr(4)
17109       .m(3)
17110       .n(4)
17111       .k(8)
17112       .cn_stride(7)
17113       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17114   }
17115 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile)17116   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile) {
17117     TEST_REQUIRES_X86_SSE41;
17118     for (uint32_t n = 1; n <= 4; n++) {
17119       for (uint32_t m = 1; m <= 3; m++) {
17120         GemmMicrokernelTester()
17121           .mr(3)
17122           .nr(4)
17123           .kr(2)
17124           .sr(4)
17125           .m(m)
17126           .n(n)
17127           .k(8)
17128           .iterations(1)
17129           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17130       }
17131     }
17132   }
17133 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_m)17134   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
17135     TEST_REQUIRES_X86_SSE41;
17136     for (uint32_t m = 1; m <= 3; m++) {
17137       GemmMicrokernelTester()
17138         .mr(3)
17139         .nr(4)
17140         .kr(2)
17141         .sr(4)
17142         .m(m)
17143         .n(4)
17144         .k(8)
17145         .iterations(1)
17146         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17147     }
17148   }
17149 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_n)17150   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
17151     TEST_REQUIRES_X86_SSE41;
17152     for (uint32_t n = 1; n <= 4; n++) {
17153       GemmMicrokernelTester()
17154         .mr(3)
17155         .nr(4)
17156         .kr(2)
17157         .sr(4)
17158         .m(3)
17159         .n(n)
17160         .k(8)
17161         .iterations(1)
17162         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17163     }
17164   }
17165 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8)17166   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8) {
17167     TEST_REQUIRES_X86_SSE41;
17168     for (size_t k = 1; k < 8; k++) {
17169       GemmMicrokernelTester()
17170         .mr(3)
17171         .nr(4)
17172         .kr(2)
17173         .sr(4)
17174         .m(3)
17175         .n(4)
17176         .k(k)
17177         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17178     }
17179   }
17180 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_subtile)17181   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_subtile) {
17182     TEST_REQUIRES_X86_SSE41;
17183     for (size_t k = 1; k < 8; k++) {
17184       for (uint32_t n = 1; n <= 4; n++) {
17185         for (uint32_t m = 1; m <= 3; m++) {
17186           GemmMicrokernelTester()
17187             .mr(3)
17188             .nr(4)
17189             .kr(2)
17190             .sr(4)
17191             .m(m)
17192             .n(n)
17193             .k(k)
17194             .iterations(1)
17195             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17196         }
17197       }
17198     }
17199   }
17200 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8)17201   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8) {
17202     TEST_REQUIRES_X86_SSE41;
17203     for (size_t k = 9; k < 16; k++) {
17204       GemmMicrokernelTester()
17205         .mr(3)
17206         .nr(4)
17207         .kr(2)
17208         .sr(4)
17209         .m(3)
17210         .n(4)
17211         .k(k)
17212         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17213     }
17214   }
17215 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_subtile)17216   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_subtile) {
17217     TEST_REQUIRES_X86_SSE41;
17218     for (size_t k = 9; k < 16; k++) {
17219       for (uint32_t n = 1; n <= 4; n++) {
17220         for (uint32_t m = 1; m <= 3; m++) {
17221           GemmMicrokernelTester()
17222             .mr(3)
17223             .nr(4)
17224             .kr(2)
17225             .sr(4)
17226             .m(m)
17227             .n(n)
17228             .k(k)
17229             .iterations(1)
17230             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17231         }
17232       }
17233     }
17234   }
17235 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8)17236   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8) {
17237     TEST_REQUIRES_X86_SSE41;
17238     for (size_t k = 16; k <= 80; k += 8) {
17239       GemmMicrokernelTester()
17240         .mr(3)
17241         .nr(4)
17242         .kr(2)
17243         .sr(4)
17244         .m(3)
17245         .n(4)
17246         .k(k)
17247         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17248     }
17249   }
17250 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_subtile)17251   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_subtile) {
17252     TEST_REQUIRES_X86_SSE41;
17253     for (size_t k = 16; k <= 80; k += 8) {
17254       for (uint32_t n = 1; n <= 4; n++) {
17255         for (uint32_t m = 1; m <= 3; m++) {
17256           GemmMicrokernelTester()
17257             .mr(3)
17258             .nr(4)
17259             .kr(2)
17260             .sr(4)
17261             .m(m)
17262             .n(n)
17263             .k(k)
17264             .iterations(1)
17265             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17266         }
17267       }
17268     }
17269   }
17270 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4)17271   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4) {
17272     TEST_REQUIRES_X86_SSE41;
17273     for (uint32_t n = 5; n < 8; n++) {
17274       for (size_t k = 1; k <= 40; k += 9) {
17275         GemmMicrokernelTester()
17276           .mr(3)
17277           .nr(4)
17278           .kr(2)
17279           .sr(4)
17280           .m(3)
17281           .n(n)
17282           .k(k)
17283           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17284       }
17285     }
17286   }
17287 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_cn)17288   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
17289     TEST_REQUIRES_X86_SSE41;
17290     for (uint32_t n = 5; n < 8; n++) {
17291       for (size_t k = 1; k <= 40; k += 9) {
17292         GemmMicrokernelTester()
17293           .mr(3)
17294           .nr(4)
17295           .kr(2)
17296           .sr(4)
17297           .m(3)
17298           .n(n)
17299           .k(k)
17300           .cn_stride(7)
17301           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17302       }
17303     }
17304   }
17305 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_subtile)17306   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_subtile) {
17307     TEST_REQUIRES_X86_SSE41;
17308     for (uint32_t n = 5; n < 8; n++) {
17309       for (size_t k = 1; k <= 40; k += 9) {
17310         for (uint32_t m = 1; m <= 3; m++) {
17311           GemmMicrokernelTester()
17312             .mr(3)
17313             .nr(4)
17314             .kr(2)
17315             .sr(4)
17316             .m(m)
17317             .n(n)
17318             .k(k)
17319             .iterations(1)
17320             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17321         }
17322       }
17323     }
17324   }
17325 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4)17326   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4) {
17327     TEST_REQUIRES_X86_SSE41;
17328     for (uint32_t n = 8; n <= 12; n += 4) {
17329       for (size_t k = 1; k <= 40; k += 9) {
17330         GemmMicrokernelTester()
17331           .mr(3)
17332           .nr(4)
17333           .kr(2)
17334           .sr(4)
17335           .m(3)
17336           .n(n)
17337           .k(k)
17338           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17339       }
17340     }
17341   }
17342 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_cn)17343   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
17344     TEST_REQUIRES_X86_SSE41;
17345     for (uint32_t n = 8; n <= 12; n += 4) {
17346       for (size_t k = 1; k <= 40; k += 9) {
17347         GemmMicrokernelTester()
17348           .mr(3)
17349           .nr(4)
17350           .kr(2)
17351           .sr(4)
17352           .m(3)
17353           .n(n)
17354           .k(k)
17355           .cn_stride(7)
17356           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17357       }
17358     }
17359   }
17360 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_subtile)17361   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_subtile) {
17362     TEST_REQUIRES_X86_SSE41;
17363     for (uint32_t n = 8; n <= 12; n += 4) {
17364       for (size_t k = 1; k <= 40; k += 9) {
17365         for (uint32_t m = 1; m <= 3; m++) {
17366           GemmMicrokernelTester()
17367             .mr(3)
17368             .nr(4)
17369             .kr(2)
17370             .sr(4)
17371             .m(m)
17372             .n(n)
17373             .k(k)
17374             .iterations(1)
17375             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17376         }
17377       }
17378     }
17379   }
17380 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel)17381   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel) {
17382     TEST_REQUIRES_X86_SSE41;
17383     for (size_t k = 1; k <= 40; k += 9) {
17384       GemmMicrokernelTester()
17385         .mr(3)
17386         .nr(4)
17387         .kr(2)
17388         .sr(4)
17389         .m(3)
17390         .n(4)
17391         .k(k)
17392         .ks(3)
17393         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17394     }
17395   }
17396 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel_subtile)17397   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel_subtile) {
17398     TEST_REQUIRES_X86_SSE41;
17399     for (size_t k = 1; k <= 40; k += 9) {
17400       for (uint32_t n = 1; n <= 4; n++) {
17401         for (uint32_t m = 1; m <= 3; m++) {
17402           GemmMicrokernelTester()
17403             .mr(3)
17404             .nr(4)
17405             .kr(2)
17406             .sr(4)
17407             .m(m)
17408             .n(n)
17409             .k(k)
17410             .ks(3)
17411             .iterations(1)
17412             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17413         }
17414       }
17415     }
17416   }
17417 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_small_kernel)17418   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
17419     TEST_REQUIRES_X86_SSE41;
17420     for (uint32_t n = 5; n < 8; n++) {
17421       for (size_t k = 1; k <= 40; k += 9) {
17422         GemmMicrokernelTester()
17423           .mr(3)
17424           .nr(4)
17425           .kr(2)
17426           .sr(4)
17427           .m(3)
17428           .n(n)
17429           .k(k)
17430           .ks(3)
17431           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17432       }
17433     }
17434   }
17435 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_small_kernel)17436   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
17437     TEST_REQUIRES_X86_SSE41;
17438     for (uint32_t n = 8; n <= 12; n += 4) {
17439       for (size_t k = 1; k <= 40; k += 9) {
17440         GemmMicrokernelTester()
17441           .mr(3)
17442           .nr(4)
17443           .kr(2)
17444           .sr(4)
17445           .m(3)
17446           .n(n)
17447           .k(k)
17448           .ks(3)
17449           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17450       }
17451     }
17452   }
17453 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm_subtile)17454   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm_subtile) {
17455     TEST_REQUIRES_X86_SSE41;
17456     for (size_t k = 1; k <= 40; k += 9) {
17457       for (uint32_t n = 1; n <= 4; n++) {
17458         for (uint32_t m = 1; m <= 3; m++) {
17459           GemmMicrokernelTester()
17460             .mr(3)
17461             .nr(4)
17462             .kr(2)
17463             .sr(4)
17464             .m(m)
17465             .n(n)
17466             .k(k)
17467             .cm_stride(7)
17468             .iterations(1)
17469             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17470         }
17471       }
17472     }
17473   }
17474 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,a_offset)17475   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, a_offset) {
17476     TEST_REQUIRES_X86_SSE41;
17477     for (size_t k = 1; k <= 40; k += 9) {
17478       GemmMicrokernelTester()
17479         .mr(3)
17480         .nr(4)
17481         .kr(2)
17482         .sr(4)
17483         .m(3)
17484         .n(4)
17485         .k(k)
17486         .ks(3)
17487         .a_offset(127)
17488         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17489     }
17490   }
17491 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,zero)17492   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, zero) {
17493     TEST_REQUIRES_X86_SSE41;
17494     for (size_t k = 1; k <= 40; k += 9) {
17495       for (uint32_t mz = 0; mz < 3; mz++) {
17496         GemmMicrokernelTester()
17497           .mr(3)
17498           .nr(4)
17499           .kr(2)
17500           .sr(4)
17501           .m(3)
17502           .n(4)
17503           .k(k)
17504           .ks(3)
17505           .a_offset(127)
17506           .zero_index(mz)
17507           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17508       }
17509     }
17510   }
17511 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmin)17512   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmin) {
17513     TEST_REQUIRES_X86_SSE41;
17514     GemmMicrokernelTester()
17515       .mr(3)
17516       .nr(4)
17517       .kr(2)
17518       .sr(4)
17519       .m(3)
17520       .n(4)
17521       .k(8)
17522       .qmin(128)
17523       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17524   }
17525 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmax)17526   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmax) {
17527     TEST_REQUIRES_X86_SSE41;
17528     GemmMicrokernelTester()
17529       .mr(3)
17530       .nr(4)
17531       .kr(2)
17532       .sr(4)
17533       .m(3)
17534       .n(4)
17535       .k(8)
17536       .qmax(128)
17537       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17538   }
17539 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm)17540   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm) {
17541     TEST_REQUIRES_X86_SSE41;
17542     GemmMicrokernelTester()
17543       .mr(3)
17544       .nr(4)
17545       .kr(2)
17546       .sr(4)
17547       .m(3)
17548       .n(4)
17549       .k(8)
17550       .cm_stride(7)
17551       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17552   }
17553 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_a_zero_point)17554   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_a_zero_point) {
17555     TEST_REQUIRES_X86_SSE41;
17556     for (size_t k = 1; k <= 40; k += 9) {
17557       GemmMicrokernelTester()
17558         .mr(3)
17559         .nr(4)
17560         .kr(2)
17561         .sr(4)
17562         .m(3)
17563         .n(4)
17564         .k(k)
17565         .a_zero_point(0)
17566         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17567     }
17568   }
17569 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_b_zero_point)17570   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_b_zero_point) {
17571     TEST_REQUIRES_X86_SSE41;
17572     for (size_t k = 1; k <= 40; k += 9) {
17573       GemmMicrokernelTester()
17574         .mr(3)
17575         .nr(4)
17576         .kr(2)
17577         .sr(4)
17578         .m(3)
17579         .n(4)
17580         .k(k)
17581         .b_zero_point(0)
17582         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17583     }
17584   }
17585 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,no_zero_point)17586   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, no_zero_point) {
17587     TEST_REQUIRES_X86_SSE41;
17588     for (size_t k = 1; k <= 40; k += 9) {
17589       GemmMicrokernelTester()
17590         .mr(3)
17591         .nr(4)
17592         .kr(2)
17593         .sr(4)
17594         .m(3)
17595         .n(4)
17596         .k(k)
17597         .a_zero_point(0)
17598         .b_zero_point(0)
17599         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17600     }
17601   }
17602 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17603 
17604 
17605 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)17606   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
17607     TEST_REQUIRES_X86_AVX;
17608     GemmMicrokernelTester()
17609       .mr(1)
17610       .nr(4)
17611       .kr(2)
17612       .sr(4)
17613       .m(1)
17614       .n(4)
17615       .k(8)
17616       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17617   }
17618 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)17619   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
17620     TEST_REQUIRES_X86_AVX;
17621     GemmMicrokernelTester()
17622       .mr(1)
17623       .nr(4)
17624       .kr(2)
17625       .sr(4)
17626       .m(1)
17627       .n(4)
17628       .k(8)
17629       .cn_stride(7)
17630       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17631   }
17632 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)17633   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
17634     TEST_REQUIRES_X86_AVX;
17635     for (uint32_t n = 1; n <= 4; n++) {
17636       for (uint32_t m = 1; m <= 1; m++) {
17637         GemmMicrokernelTester()
17638           .mr(1)
17639           .nr(4)
17640           .kr(2)
17641           .sr(4)
17642           .m(m)
17643           .n(n)
17644           .k(8)
17645           .iterations(1)
17646           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17647       }
17648     }
17649   }
17650 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)17651   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
17652     TEST_REQUIRES_X86_AVX;
17653     for (uint32_t m = 1; m <= 1; m++) {
17654       GemmMicrokernelTester()
17655         .mr(1)
17656         .nr(4)
17657         .kr(2)
17658         .sr(4)
17659         .m(m)
17660         .n(4)
17661         .k(8)
17662         .iterations(1)
17663         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17664     }
17665   }
17666 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)17667   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
17668     TEST_REQUIRES_X86_AVX;
17669     for (uint32_t n = 1; n <= 4; n++) {
17670       GemmMicrokernelTester()
17671         .mr(1)
17672         .nr(4)
17673         .kr(2)
17674         .sr(4)
17675         .m(1)
17676         .n(n)
17677         .k(8)
17678         .iterations(1)
17679         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17680     }
17681   }
17682 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)17683   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
17684     TEST_REQUIRES_X86_AVX;
17685     for (size_t k = 1; k < 8; k++) {
17686       GemmMicrokernelTester()
17687         .mr(1)
17688         .nr(4)
17689         .kr(2)
17690         .sr(4)
17691         .m(1)
17692         .n(4)
17693         .k(k)
17694         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17695     }
17696   }
17697 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)17698   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
17699     TEST_REQUIRES_X86_AVX;
17700     for (size_t k = 1; k < 8; k++) {
17701       for (uint32_t n = 1; n <= 4; n++) {
17702         for (uint32_t m = 1; m <= 1; m++) {
17703           GemmMicrokernelTester()
17704             .mr(1)
17705             .nr(4)
17706             .kr(2)
17707             .sr(4)
17708             .m(m)
17709             .n(n)
17710             .k(k)
17711             .iterations(1)
17712             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17713         }
17714       }
17715     }
17716   }
17717 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)17718   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
17719     TEST_REQUIRES_X86_AVX;
17720     for (size_t k = 9; k < 16; k++) {
17721       GemmMicrokernelTester()
17722         .mr(1)
17723         .nr(4)
17724         .kr(2)
17725         .sr(4)
17726         .m(1)
17727         .n(4)
17728         .k(k)
17729         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17730     }
17731   }
17732 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)17733   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
17734     TEST_REQUIRES_X86_AVX;
17735     for (size_t k = 9; k < 16; k++) {
17736       for (uint32_t n = 1; n <= 4; n++) {
17737         for (uint32_t m = 1; m <= 1; m++) {
17738           GemmMicrokernelTester()
17739             .mr(1)
17740             .nr(4)
17741             .kr(2)
17742             .sr(4)
17743             .m(m)
17744             .n(n)
17745             .k(k)
17746             .iterations(1)
17747             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17748         }
17749       }
17750     }
17751   }
17752 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)17753   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
17754     TEST_REQUIRES_X86_AVX;
17755     for (size_t k = 16; k <= 80; k += 8) {
17756       GemmMicrokernelTester()
17757         .mr(1)
17758         .nr(4)
17759         .kr(2)
17760         .sr(4)
17761         .m(1)
17762         .n(4)
17763         .k(k)
17764         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17765     }
17766   }
17767 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)17768   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
17769     TEST_REQUIRES_X86_AVX;
17770     for (size_t k = 16; k <= 80; k += 8) {
17771       for (uint32_t n = 1; n <= 4; n++) {
17772         for (uint32_t m = 1; m <= 1; m++) {
17773           GemmMicrokernelTester()
17774             .mr(1)
17775             .nr(4)
17776             .kr(2)
17777             .sr(4)
17778             .m(m)
17779             .n(n)
17780             .k(k)
17781             .iterations(1)
17782             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17783         }
17784       }
17785     }
17786   }
17787 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)17788   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
17789     TEST_REQUIRES_X86_AVX;
17790     for (uint32_t n = 5; n < 8; n++) {
17791       for (size_t k = 1; k <= 40; k += 9) {
17792         GemmMicrokernelTester()
17793           .mr(1)
17794           .nr(4)
17795           .kr(2)
17796           .sr(4)
17797           .m(1)
17798           .n(n)
17799           .k(k)
17800           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17801       }
17802     }
17803   }
17804 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)17805   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
17806     TEST_REQUIRES_X86_AVX;
17807     for (uint32_t n = 5; n < 8; n++) {
17808       for (size_t k = 1; k <= 40; k += 9) {
17809         GemmMicrokernelTester()
17810           .mr(1)
17811           .nr(4)
17812           .kr(2)
17813           .sr(4)
17814           .m(1)
17815           .n(n)
17816           .k(k)
17817           .cn_stride(7)
17818           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17819       }
17820     }
17821   }
17822 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)17823   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
17824     TEST_REQUIRES_X86_AVX;
17825     for (uint32_t n = 5; n < 8; n++) {
17826       for (size_t k = 1; k <= 40; k += 9) {
17827         for (uint32_t m = 1; m <= 1; m++) {
17828           GemmMicrokernelTester()
17829             .mr(1)
17830             .nr(4)
17831             .kr(2)
17832             .sr(4)
17833             .m(m)
17834             .n(n)
17835             .k(k)
17836             .iterations(1)
17837             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17838         }
17839       }
17840     }
17841   }
17842 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)17843   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
17844     TEST_REQUIRES_X86_AVX;
17845     for (uint32_t n = 8; n <= 12; n += 4) {
17846       for (size_t k = 1; k <= 40; k += 9) {
17847         GemmMicrokernelTester()
17848           .mr(1)
17849           .nr(4)
17850           .kr(2)
17851           .sr(4)
17852           .m(1)
17853           .n(n)
17854           .k(k)
17855           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17856       }
17857     }
17858   }
17859 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)17860   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
17861     TEST_REQUIRES_X86_AVX;
17862     for (uint32_t n = 8; n <= 12; n += 4) {
17863       for (size_t k = 1; k <= 40; k += 9) {
17864         GemmMicrokernelTester()
17865           .mr(1)
17866           .nr(4)
17867           .kr(2)
17868           .sr(4)
17869           .m(1)
17870           .n(n)
17871           .k(k)
17872           .cn_stride(7)
17873           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17874       }
17875     }
17876   }
17877 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)17878   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
17879     TEST_REQUIRES_X86_AVX;
17880     for (uint32_t n = 8; n <= 12; n += 4) {
17881       for (size_t k = 1; k <= 40; k += 9) {
17882         for (uint32_t m = 1; m <= 1; m++) {
17883           GemmMicrokernelTester()
17884             .mr(1)
17885             .nr(4)
17886             .kr(2)
17887             .sr(4)
17888             .m(m)
17889             .n(n)
17890             .k(k)
17891             .iterations(1)
17892             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17893         }
17894       }
17895     }
17896   }
17897 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)17898   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
17899     TEST_REQUIRES_X86_AVX;
17900     for (size_t k = 1; k <= 40; k += 9) {
17901       GemmMicrokernelTester()
17902         .mr(1)
17903         .nr(4)
17904         .kr(2)
17905         .sr(4)
17906         .m(1)
17907         .n(4)
17908         .k(k)
17909         .ks(3)
17910         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17911     }
17912   }
17913 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)17914   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
17915     TEST_REQUIRES_X86_AVX;
17916     for (size_t k = 1; k <= 40; k += 9) {
17917       for (uint32_t n = 1; n <= 4; n++) {
17918         for (uint32_t m = 1; m <= 1; m++) {
17919           GemmMicrokernelTester()
17920             .mr(1)
17921             .nr(4)
17922             .kr(2)
17923             .sr(4)
17924             .m(m)
17925             .n(n)
17926             .k(k)
17927             .ks(3)
17928             .iterations(1)
17929             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17930         }
17931       }
17932     }
17933   }
17934 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)17935   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
17936     TEST_REQUIRES_X86_AVX;
17937     for (uint32_t n = 5; n < 8; n++) {
17938       for (size_t k = 1; k <= 40; k += 9) {
17939         GemmMicrokernelTester()
17940           .mr(1)
17941           .nr(4)
17942           .kr(2)
17943           .sr(4)
17944           .m(1)
17945           .n(n)
17946           .k(k)
17947           .ks(3)
17948           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17949       }
17950     }
17951   }
17952 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)17953   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
17954     TEST_REQUIRES_X86_AVX;
17955     for (uint32_t n = 8; n <= 12; n += 4) {
17956       for (size_t k = 1; k <= 40; k += 9) {
17957         GemmMicrokernelTester()
17958           .mr(1)
17959           .nr(4)
17960           .kr(2)
17961           .sr(4)
17962           .m(1)
17963           .n(n)
17964           .k(k)
17965           .ks(3)
17966           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17967       }
17968     }
17969   }
17970 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)17971   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
17972     TEST_REQUIRES_X86_AVX;
17973     for (size_t k = 1; k <= 40; k += 9) {
17974       for (uint32_t n = 1; n <= 4; n++) {
17975         for (uint32_t m = 1; m <= 1; m++) {
17976           GemmMicrokernelTester()
17977             .mr(1)
17978             .nr(4)
17979             .kr(2)
17980             .sr(4)
17981             .m(m)
17982             .n(n)
17983             .k(k)
17984             .cm_stride(7)
17985             .iterations(1)
17986             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
17987         }
17988       }
17989     }
17990   }
17991 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)17992   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
17993     TEST_REQUIRES_X86_AVX;
17994     for (size_t k = 1; k <= 40; k += 9) {
17995       GemmMicrokernelTester()
17996         .mr(1)
17997         .nr(4)
17998         .kr(2)
17999         .sr(4)
18000         .m(1)
18001         .n(4)
18002         .k(k)
18003         .ks(3)
18004         .a_offset(43)
18005         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18006     }
18007   }
18008 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)18009   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
18010     TEST_REQUIRES_X86_AVX;
18011     for (size_t k = 1; k <= 40; k += 9) {
18012       for (uint32_t mz = 0; mz < 1; mz++) {
18013         GemmMicrokernelTester()
18014           .mr(1)
18015           .nr(4)
18016           .kr(2)
18017           .sr(4)
18018           .m(1)
18019           .n(4)
18020           .k(k)
18021           .ks(3)
18022           .a_offset(43)
18023           .zero_index(mz)
18024           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18025       }
18026     }
18027   }
18028 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)18029   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
18030     TEST_REQUIRES_X86_AVX;
18031     GemmMicrokernelTester()
18032       .mr(1)
18033       .nr(4)
18034       .kr(2)
18035       .sr(4)
18036       .m(1)
18037       .n(4)
18038       .k(8)
18039       .qmin(128)
18040       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18041   }
18042 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)18043   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
18044     TEST_REQUIRES_X86_AVX;
18045     GemmMicrokernelTester()
18046       .mr(1)
18047       .nr(4)
18048       .kr(2)
18049       .sr(4)
18050       .m(1)
18051       .n(4)
18052       .k(8)
18053       .qmax(128)
18054       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18055   }
18056 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)18057   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
18058     TEST_REQUIRES_X86_AVX;
18059     GemmMicrokernelTester()
18060       .mr(1)
18061       .nr(4)
18062       .kr(2)
18063       .sr(4)
18064       .m(1)
18065       .n(4)
18066       .k(8)
18067       .cm_stride(7)
18068       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18069   }
18070 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_a_zero_point)18071   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_a_zero_point) {
18072     TEST_REQUIRES_X86_AVX;
18073     for (size_t k = 1; k <= 40; k += 9) {
18074       GemmMicrokernelTester()
18075         .mr(1)
18076         .nr(4)
18077         .kr(2)
18078         .sr(4)
18079         .m(1)
18080         .n(4)
18081         .k(k)
18082         .a_zero_point(0)
18083         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18084     }
18085   }
18086 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_b_zero_point)18087   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_b_zero_point) {
18088     TEST_REQUIRES_X86_AVX;
18089     for (size_t k = 1; k <= 40; k += 9) {
18090       GemmMicrokernelTester()
18091         .mr(1)
18092         .nr(4)
18093         .kr(2)
18094         .sr(4)
18095         .m(1)
18096         .n(4)
18097         .k(k)
18098         .b_zero_point(0)
18099         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18100     }
18101   }
18102 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,no_zero_point)18103   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, no_zero_point) {
18104     TEST_REQUIRES_X86_AVX;
18105     for (size_t k = 1; k <= 40; k += 9) {
18106       GemmMicrokernelTester()
18107         .mr(1)
18108         .nr(4)
18109         .kr(2)
18110         .sr(4)
18111         .m(1)
18112         .n(4)
18113         .k(k)
18114         .a_zero_point(0)
18115         .b_zero_point(0)
18116         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18117     }
18118   }
18119 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18120 
18121 
18122 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)18123   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
18124     TEST_REQUIRES_X86_AVX;
18125     GemmMicrokernelTester()
18126       .mr(2)
18127       .nr(4)
18128       .kr(2)
18129       .sr(4)
18130       .m(2)
18131       .n(4)
18132       .k(8)
18133       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18134   }
18135 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)18136   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
18137     TEST_REQUIRES_X86_AVX;
18138     GemmMicrokernelTester()
18139       .mr(2)
18140       .nr(4)
18141       .kr(2)
18142       .sr(4)
18143       .m(2)
18144       .n(4)
18145       .k(8)
18146       .cn_stride(7)
18147       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18148   }
18149 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)18150   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
18151     TEST_REQUIRES_X86_AVX;
18152     for (uint32_t n = 1; n <= 4; n++) {
18153       for (uint32_t m = 1; m <= 2; m++) {
18154         GemmMicrokernelTester()
18155           .mr(2)
18156           .nr(4)
18157           .kr(2)
18158           .sr(4)
18159           .m(m)
18160           .n(n)
18161           .k(8)
18162           .iterations(1)
18163           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18164       }
18165     }
18166   }
18167 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)18168   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
18169     TEST_REQUIRES_X86_AVX;
18170     for (uint32_t m = 1; m <= 2; m++) {
18171       GemmMicrokernelTester()
18172         .mr(2)
18173         .nr(4)
18174         .kr(2)
18175         .sr(4)
18176         .m(m)
18177         .n(4)
18178         .k(8)
18179         .iterations(1)
18180         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18181     }
18182   }
18183 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)18184   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
18185     TEST_REQUIRES_X86_AVX;
18186     for (uint32_t n = 1; n <= 4; n++) {
18187       GemmMicrokernelTester()
18188         .mr(2)
18189         .nr(4)
18190         .kr(2)
18191         .sr(4)
18192         .m(2)
18193         .n(n)
18194         .k(8)
18195         .iterations(1)
18196         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18197     }
18198   }
18199 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)18200   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
18201     TEST_REQUIRES_X86_AVX;
18202     for (size_t k = 1; k < 8; k++) {
18203       GemmMicrokernelTester()
18204         .mr(2)
18205         .nr(4)
18206         .kr(2)
18207         .sr(4)
18208         .m(2)
18209         .n(4)
18210         .k(k)
18211         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18212     }
18213   }
18214 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)18215   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
18216     TEST_REQUIRES_X86_AVX;
18217     for (size_t k = 1; k < 8; k++) {
18218       for (uint32_t n = 1; n <= 4; n++) {
18219         for (uint32_t m = 1; m <= 2; m++) {
18220           GemmMicrokernelTester()
18221             .mr(2)
18222             .nr(4)
18223             .kr(2)
18224             .sr(4)
18225             .m(m)
18226             .n(n)
18227             .k(k)
18228             .iterations(1)
18229             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18230         }
18231       }
18232     }
18233   }
18234 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)18235   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
18236     TEST_REQUIRES_X86_AVX;
18237     for (size_t k = 9; k < 16; k++) {
18238       GemmMicrokernelTester()
18239         .mr(2)
18240         .nr(4)
18241         .kr(2)
18242         .sr(4)
18243         .m(2)
18244         .n(4)
18245         .k(k)
18246         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18247     }
18248   }
18249 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)18250   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
18251     TEST_REQUIRES_X86_AVX;
18252     for (size_t k = 9; k < 16; k++) {
18253       for (uint32_t n = 1; n <= 4; n++) {
18254         for (uint32_t m = 1; m <= 2; m++) {
18255           GemmMicrokernelTester()
18256             .mr(2)
18257             .nr(4)
18258             .kr(2)
18259             .sr(4)
18260             .m(m)
18261             .n(n)
18262             .k(k)
18263             .iterations(1)
18264             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18265         }
18266       }
18267     }
18268   }
18269 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)18270   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
18271     TEST_REQUIRES_X86_AVX;
18272     for (size_t k = 16; k <= 80; k += 8) {
18273       GemmMicrokernelTester()
18274         .mr(2)
18275         .nr(4)
18276         .kr(2)
18277         .sr(4)
18278         .m(2)
18279         .n(4)
18280         .k(k)
18281         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18282     }
18283   }
18284 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)18285   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
18286     TEST_REQUIRES_X86_AVX;
18287     for (size_t k = 16; k <= 80; k += 8) {
18288       for (uint32_t n = 1; n <= 4; n++) {
18289         for (uint32_t m = 1; m <= 2; m++) {
18290           GemmMicrokernelTester()
18291             .mr(2)
18292             .nr(4)
18293             .kr(2)
18294             .sr(4)
18295             .m(m)
18296             .n(n)
18297             .k(k)
18298             .iterations(1)
18299             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18300         }
18301       }
18302     }
18303   }
18304 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)18305   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
18306     TEST_REQUIRES_X86_AVX;
18307     for (uint32_t n = 5; n < 8; n++) {
18308       for (size_t k = 1; k <= 40; k += 9) {
18309         GemmMicrokernelTester()
18310           .mr(2)
18311           .nr(4)
18312           .kr(2)
18313           .sr(4)
18314           .m(2)
18315           .n(n)
18316           .k(k)
18317           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18318       }
18319     }
18320   }
18321 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)18322   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
18323     TEST_REQUIRES_X86_AVX;
18324     for (uint32_t n = 5; n < 8; n++) {
18325       for (size_t k = 1; k <= 40; k += 9) {
18326         GemmMicrokernelTester()
18327           .mr(2)
18328           .nr(4)
18329           .kr(2)
18330           .sr(4)
18331           .m(2)
18332           .n(n)
18333           .k(k)
18334           .cn_stride(7)
18335           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18336       }
18337     }
18338   }
18339 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)18340   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
18341     TEST_REQUIRES_X86_AVX;
18342     for (uint32_t n = 5; n < 8; n++) {
18343       for (size_t k = 1; k <= 40; k += 9) {
18344         for (uint32_t m = 1; m <= 2; m++) {
18345           GemmMicrokernelTester()
18346             .mr(2)
18347             .nr(4)
18348             .kr(2)
18349             .sr(4)
18350             .m(m)
18351             .n(n)
18352             .k(k)
18353             .iterations(1)
18354             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18355         }
18356       }
18357     }
18358   }
18359 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)18360   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
18361     TEST_REQUIRES_X86_AVX;
18362     for (uint32_t n = 8; n <= 12; n += 4) {
18363       for (size_t k = 1; k <= 40; k += 9) {
18364         GemmMicrokernelTester()
18365           .mr(2)
18366           .nr(4)
18367           .kr(2)
18368           .sr(4)
18369           .m(2)
18370           .n(n)
18371           .k(k)
18372           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18373       }
18374     }
18375   }
18376 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)18377   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
18378     TEST_REQUIRES_X86_AVX;
18379     for (uint32_t n = 8; n <= 12; n += 4) {
18380       for (size_t k = 1; k <= 40; k += 9) {
18381         GemmMicrokernelTester()
18382           .mr(2)
18383           .nr(4)
18384           .kr(2)
18385           .sr(4)
18386           .m(2)
18387           .n(n)
18388           .k(k)
18389           .cn_stride(7)
18390           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18391       }
18392     }
18393   }
18394 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)18395   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
18396     TEST_REQUIRES_X86_AVX;
18397     for (uint32_t n = 8; n <= 12; n += 4) {
18398       for (size_t k = 1; k <= 40; k += 9) {
18399         for (uint32_t m = 1; m <= 2; m++) {
18400           GemmMicrokernelTester()
18401             .mr(2)
18402             .nr(4)
18403             .kr(2)
18404             .sr(4)
18405             .m(m)
18406             .n(n)
18407             .k(k)
18408             .iterations(1)
18409             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18410         }
18411       }
18412     }
18413   }
18414 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel)18415   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel) {
18416     TEST_REQUIRES_X86_AVX;
18417     for (size_t k = 1; k <= 40; k += 9) {
18418       GemmMicrokernelTester()
18419         .mr(2)
18420         .nr(4)
18421         .kr(2)
18422         .sr(4)
18423         .m(2)
18424         .n(4)
18425         .k(k)
18426         .ks(3)
18427         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18428     }
18429   }
18430 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel_subtile)18431   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel_subtile) {
18432     TEST_REQUIRES_X86_AVX;
18433     for (size_t k = 1; k <= 40; k += 9) {
18434       for (uint32_t n = 1; n <= 4; n++) {
18435         for (uint32_t m = 1; m <= 2; m++) {
18436           GemmMicrokernelTester()
18437             .mr(2)
18438             .nr(4)
18439             .kr(2)
18440             .sr(4)
18441             .m(m)
18442             .n(n)
18443             .k(k)
18444             .ks(3)
18445             .iterations(1)
18446             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18447         }
18448       }
18449     }
18450   }
18451 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_small_kernel)18452   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
18453     TEST_REQUIRES_X86_AVX;
18454     for (uint32_t n = 5; n < 8; n++) {
18455       for (size_t k = 1; k <= 40; k += 9) {
18456         GemmMicrokernelTester()
18457           .mr(2)
18458           .nr(4)
18459           .kr(2)
18460           .sr(4)
18461           .m(2)
18462           .n(n)
18463           .k(k)
18464           .ks(3)
18465           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18466       }
18467     }
18468   }
18469 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_small_kernel)18470   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_small_kernel) {
18471     TEST_REQUIRES_X86_AVX;
18472     for (uint32_t n = 8; n <= 12; n += 4) {
18473       for (size_t k = 1; k <= 40; k += 9) {
18474         GemmMicrokernelTester()
18475           .mr(2)
18476           .nr(4)
18477           .kr(2)
18478           .sr(4)
18479           .m(2)
18480           .n(n)
18481           .k(k)
18482           .ks(3)
18483           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18484       }
18485     }
18486   }
18487 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)18488   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
18489     TEST_REQUIRES_X86_AVX;
18490     for (size_t k = 1; k <= 40; k += 9) {
18491       for (uint32_t n = 1; n <= 4; n++) {
18492         for (uint32_t m = 1; m <= 2; m++) {
18493           GemmMicrokernelTester()
18494             .mr(2)
18495             .nr(4)
18496             .kr(2)
18497             .sr(4)
18498             .m(m)
18499             .n(n)
18500             .k(k)
18501             .cm_stride(7)
18502             .iterations(1)
18503             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18504         }
18505       }
18506     }
18507   }
18508 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,a_offset)18509   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, a_offset) {
18510     TEST_REQUIRES_X86_AVX;
18511     for (size_t k = 1; k <= 40; k += 9) {
18512       GemmMicrokernelTester()
18513         .mr(2)
18514         .nr(4)
18515         .kr(2)
18516         .sr(4)
18517         .m(2)
18518         .n(4)
18519         .k(k)
18520         .ks(3)
18521         .a_offset(83)
18522         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18523     }
18524   }
18525 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,zero)18526   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, zero) {
18527     TEST_REQUIRES_X86_AVX;
18528     for (size_t k = 1; k <= 40; k += 9) {
18529       for (uint32_t mz = 0; mz < 2; mz++) {
18530         GemmMicrokernelTester()
18531           .mr(2)
18532           .nr(4)
18533           .kr(2)
18534           .sr(4)
18535           .m(2)
18536           .n(4)
18537           .k(k)
18538           .ks(3)
18539           .a_offset(83)
18540           .zero_index(mz)
18541           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18542       }
18543     }
18544   }
18545 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)18546   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
18547     TEST_REQUIRES_X86_AVX;
18548     GemmMicrokernelTester()
18549       .mr(2)
18550       .nr(4)
18551       .kr(2)
18552       .sr(4)
18553       .m(2)
18554       .n(4)
18555       .k(8)
18556       .qmin(128)
18557       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18558   }
18559 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)18560   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
18561     TEST_REQUIRES_X86_AVX;
18562     GemmMicrokernelTester()
18563       .mr(2)
18564       .nr(4)
18565       .kr(2)
18566       .sr(4)
18567       .m(2)
18568       .n(4)
18569       .k(8)
18570       .qmax(128)
18571       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18572   }
18573 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)18574   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
18575     TEST_REQUIRES_X86_AVX;
18576     GemmMicrokernelTester()
18577       .mr(2)
18578       .nr(4)
18579       .kr(2)
18580       .sr(4)
18581       .m(2)
18582       .n(4)
18583       .k(8)
18584       .cm_stride(7)
18585       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18586   }
18587 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_a_zero_point)18588   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_a_zero_point) {
18589     TEST_REQUIRES_X86_AVX;
18590     for (size_t k = 1; k <= 40; k += 9) {
18591       GemmMicrokernelTester()
18592         .mr(2)
18593         .nr(4)
18594         .kr(2)
18595         .sr(4)
18596         .m(2)
18597         .n(4)
18598         .k(k)
18599         .a_zero_point(0)
18600         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18601     }
18602   }
18603 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_b_zero_point)18604   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_b_zero_point) {
18605     TEST_REQUIRES_X86_AVX;
18606     for (size_t k = 1; k <= 40; k += 9) {
18607       GemmMicrokernelTester()
18608         .mr(2)
18609         .nr(4)
18610         .kr(2)
18611         .sr(4)
18612         .m(2)
18613         .n(4)
18614         .k(k)
18615         .b_zero_point(0)
18616         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18617     }
18618   }
18619 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,no_zero_point)18620   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, no_zero_point) {
18621     TEST_REQUIRES_X86_AVX;
18622     for (size_t k = 1; k <= 40; k += 9) {
18623       GemmMicrokernelTester()
18624         .mr(2)
18625         .nr(4)
18626         .kr(2)
18627         .sr(4)
18628         .m(2)
18629         .n(4)
18630         .k(k)
18631         .a_zero_point(0)
18632         .b_zero_point(0)
18633         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18634     }
18635   }
18636 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18637 
18638 
18639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)18640   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
18641     TEST_REQUIRES_X86_XOP;
18642     GemmMicrokernelTester()
18643       .mr(3)
18644       .nr(4)
18645       .kr(2)
18646       .sr(4)
18647       .m(3)
18648       .n(4)
18649       .k(8)
18650       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18651   }
18652 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)18653   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
18654     TEST_REQUIRES_X86_XOP;
18655     GemmMicrokernelTester()
18656       .mr(3)
18657       .nr(4)
18658       .kr(2)
18659       .sr(4)
18660       .m(3)
18661       .n(4)
18662       .k(8)
18663       .cn_stride(7)
18664       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18665   }
18666 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)18667   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
18668     TEST_REQUIRES_X86_XOP;
18669     for (uint32_t n = 1; n <= 4; n++) {
18670       for (uint32_t m = 1; m <= 3; m++) {
18671         GemmMicrokernelTester()
18672           .mr(3)
18673           .nr(4)
18674           .kr(2)
18675           .sr(4)
18676           .m(m)
18677           .n(n)
18678           .k(8)
18679           .iterations(1)
18680           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18681       }
18682     }
18683   }
18684 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)18685   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
18686     TEST_REQUIRES_X86_XOP;
18687     for (uint32_t m = 1; m <= 3; m++) {
18688       GemmMicrokernelTester()
18689         .mr(3)
18690         .nr(4)
18691         .kr(2)
18692         .sr(4)
18693         .m(m)
18694         .n(4)
18695         .k(8)
18696         .iterations(1)
18697         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18698     }
18699   }
18700 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)18701   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
18702     TEST_REQUIRES_X86_XOP;
18703     for (uint32_t n = 1; n <= 4; n++) {
18704       GemmMicrokernelTester()
18705         .mr(3)
18706         .nr(4)
18707         .kr(2)
18708         .sr(4)
18709         .m(3)
18710         .n(n)
18711         .k(8)
18712         .iterations(1)
18713         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18714     }
18715   }
18716 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)18717   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
18718     TEST_REQUIRES_X86_XOP;
18719     for (size_t k = 1; k < 8; k++) {
18720       GemmMicrokernelTester()
18721         .mr(3)
18722         .nr(4)
18723         .kr(2)
18724         .sr(4)
18725         .m(3)
18726         .n(4)
18727         .k(k)
18728         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18729     }
18730   }
18731 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)18732   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
18733     TEST_REQUIRES_X86_XOP;
18734     for (size_t k = 1; k < 8; k++) {
18735       for (uint32_t n = 1; n <= 4; n++) {
18736         for (uint32_t m = 1; m <= 3; m++) {
18737           GemmMicrokernelTester()
18738             .mr(3)
18739             .nr(4)
18740             .kr(2)
18741             .sr(4)
18742             .m(m)
18743             .n(n)
18744             .k(k)
18745             .iterations(1)
18746             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18747         }
18748       }
18749     }
18750   }
18751 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)18752   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
18753     TEST_REQUIRES_X86_XOP;
18754     for (size_t k = 9; k < 16; k++) {
18755       GemmMicrokernelTester()
18756         .mr(3)
18757         .nr(4)
18758         .kr(2)
18759         .sr(4)
18760         .m(3)
18761         .n(4)
18762         .k(k)
18763         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18764     }
18765   }
18766 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)18767   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
18768     TEST_REQUIRES_X86_XOP;
18769     for (size_t k = 9; k < 16; k++) {
18770       for (uint32_t n = 1; n <= 4; n++) {
18771         for (uint32_t m = 1; m <= 3; m++) {
18772           GemmMicrokernelTester()
18773             .mr(3)
18774             .nr(4)
18775             .kr(2)
18776             .sr(4)
18777             .m(m)
18778             .n(n)
18779             .k(k)
18780             .iterations(1)
18781             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18782         }
18783       }
18784     }
18785   }
18786 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)18787   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
18788     TEST_REQUIRES_X86_XOP;
18789     for (size_t k = 16; k <= 80; k += 8) {
18790       GemmMicrokernelTester()
18791         .mr(3)
18792         .nr(4)
18793         .kr(2)
18794         .sr(4)
18795         .m(3)
18796         .n(4)
18797         .k(k)
18798         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18799     }
18800   }
18801 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)18802   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
18803     TEST_REQUIRES_X86_XOP;
18804     for (size_t k = 16; k <= 80; k += 8) {
18805       for (uint32_t n = 1; n <= 4; n++) {
18806         for (uint32_t m = 1; m <= 3; m++) {
18807           GemmMicrokernelTester()
18808             .mr(3)
18809             .nr(4)
18810             .kr(2)
18811             .sr(4)
18812             .m(m)
18813             .n(n)
18814             .k(k)
18815             .iterations(1)
18816             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18817         }
18818       }
18819     }
18820   }
18821 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)18822   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
18823     TEST_REQUIRES_X86_XOP;
18824     for (uint32_t n = 5; n < 8; n++) {
18825       for (size_t k = 1; k <= 40; k += 9) {
18826         GemmMicrokernelTester()
18827           .mr(3)
18828           .nr(4)
18829           .kr(2)
18830           .sr(4)
18831           .m(3)
18832           .n(n)
18833           .k(k)
18834           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18835       }
18836     }
18837   }
18838 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)18839   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
18840     TEST_REQUIRES_X86_XOP;
18841     for (uint32_t n = 5; n < 8; n++) {
18842       for (size_t k = 1; k <= 40; k += 9) {
18843         GemmMicrokernelTester()
18844           .mr(3)
18845           .nr(4)
18846           .kr(2)
18847           .sr(4)
18848           .m(3)
18849           .n(n)
18850           .k(k)
18851           .cn_stride(7)
18852           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18853       }
18854     }
18855   }
18856 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)18857   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
18858     TEST_REQUIRES_X86_XOP;
18859     for (uint32_t n = 5; n < 8; n++) {
18860       for (size_t k = 1; k <= 40; k += 9) {
18861         for (uint32_t m = 1; m <= 3; m++) {
18862           GemmMicrokernelTester()
18863             .mr(3)
18864             .nr(4)
18865             .kr(2)
18866             .sr(4)
18867             .m(m)
18868             .n(n)
18869             .k(k)
18870             .iterations(1)
18871             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18872         }
18873       }
18874     }
18875   }
18876 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)18877   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
18878     TEST_REQUIRES_X86_XOP;
18879     for (uint32_t n = 8; n <= 12; n += 4) {
18880       for (size_t k = 1; k <= 40; k += 9) {
18881         GemmMicrokernelTester()
18882           .mr(3)
18883           .nr(4)
18884           .kr(2)
18885           .sr(4)
18886           .m(3)
18887           .n(n)
18888           .k(k)
18889           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18890       }
18891     }
18892   }
18893 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)18894   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
18895     TEST_REQUIRES_X86_XOP;
18896     for (uint32_t n = 8; n <= 12; n += 4) {
18897       for (size_t k = 1; k <= 40; k += 9) {
18898         GemmMicrokernelTester()
18899           .mr(3)
18900           .nr(4)
18901           .kr(2)
18902           .sr(4)
18903           .m(3)
18904           .n(n)
18905           .k(k)
18906           .cn_stride(7)
18907           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18908       }
18909     }
18910   }
18911 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)18912   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
18913     TEST_REQUIRES_X86_XOP;
18914     for (uint32_t n = 8; n <= 12; n += 4) {
18915       for (size_t k = 1; k <= 40; k += 9) {
18916         for (uint32_t m = 1; m <= 3; m++) {
18917           GemmMicrokernelTester()
18918             .mr(3)
18919             .nr(4)
18920             .kr(2)
18921             .sr(4)
18922             .m(m)
18923             .n(n)
18924             .k(k)
18925             .iterations(1)
18926             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18927         }
18928       }
18929     }
18930   }
18931 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel)18932   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel) {
18933     TEST_REQUIRES_X86_XOP;
18934     for (size_t k = 1; k <= 40; k += 9) {
18935       GemmMicrokernelTester()
18936         .mr(3)
18937         .nr(4)
18938         .kr(2)
18939         .sr(4)
18940         .m(3)
18941         .n(4)
18942         .k(k)
18943         .ks(3)
18944         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18945     }
18946   }
18947 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel_subtile)18948   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel_subtile) {
18949     TEST_REQUIRES_X86_XOP;
18950     for (size_t k = 1; k <= 40; k += 9) {
18951       for (uint32_t n = 1; n <= 4; n++) {
18952         for (uint32_t m = 1; m <= 3; m++) {
18953           GemmMicrokernelTester()
18954             .mr(3)
18955             .nr(4)
18956             .kr(2)
18957             .sr(4)
18958             .m(m)
18959             .n(n)
18960             .k(k)
18961             .ks(3)
18962             .iterations(1)
18963             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18964         }
18965       }
18966     }
18967   }
18968 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_small_kernel)18969   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
18970     TEST_REQUIRES_X86_XOP;
18971     for (uint32_t n = 5; n < 8; n++) {
18972       for (size_t k = 1; k <= 40; k += 9) {
18973         GemmMicrokernelTester()
18974           .mr(3)
18975           .nr(4)
18976           .kr(2)
18977           .sr(4)
18978           .m(3)
18979           .n(n)
18980           .k(k)
18981           .ks(3)
18982           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
18983       }
18984     }
18985   }
18986 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_small_kernel)18987   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_small_kernel) {
18988     TEST_REQUIRES_X86_XOP;
18989     for (uint32_t n = 8; n <= 12; n += 4) {
18990       for (size_t k = 1; k <= 40; k += 9) {
18991         GemmMicrokernelTester()
18992           .mr(3)
18993           .nr(4)
18994           .kr(2)
18995           .sr(4)
18996           .m(3)
18997           .n(n)
18998           .k(k)
18999           .ks(3)
19000           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19001       }
19002     }
19003   }
19004 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)19005   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
19006     TEST_REQUIRES_X86_XOP;
19007     for (size_t k = 1; k <= 40; k += 9) {
19008       for (uint32_t n = 1; n <= 4; n++) {
19009         for (uint32_t m = 1; m <= 3; m++) {
19010           GemmMicrokernelTester()
19011             .mr(3)
19012             .nr(4)
19013             .kr(2)
19014             .sr(4)
19015             .m(m)
19016             .n(n)
19017             .k(k)
19018             .cm_stride(7)
19019             .iterations(1)
19020             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19021         }
19022       }
19023     }
19024   }
19025 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,a_offset)19026   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, a_offset) {
19027     TEST_REQUIRES_X86_XOP;
19028     for (size_t k = 1; k <= 40; k += 9) {
19029       GemmMicrokernelTester()
19030         .mr(3)
19031         .nr(4)
19032         .kr(2)
19033         .sr(4)
19034         .m(3)
19035         .n(4)
19036         .k(k)
19037         .ks(3)
19038         .a_offset(127)
19039         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19040     }
19041   }
19042 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,zero)19043   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, zero) {
19044     TEST_REQUIRES_X86_XOP;
19045     for (size_t k = 1; k <= 40; k += 9) {
19046       for (uint32_t mz = 0; mz < 3; mz++) {
19047         GemmMicrokernelTester()
19048           .mr(3)
19049           .nr(4)
19050           .kr(2)
19051           .sr(4)
19052           .m(3)
19053           .n(4)
19054           .k(k)
19055           .ks(3)
19056           .a_offset(127)
19057           .zero_index(mz)
19058           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19059       }
19060     }
19061   }
19062 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)19063   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
19064     TEST_REQUIRES_X86_XOP;
19065     GemmMicrokernelTester()
19066       .mr(3)
19067       .nr(4)
19068       .kr(2)
19069       .sr(4)
19070       .m(3)
19071       .n(4)
19072       .k(8)
19073       .qmin(128)
19074       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19075   }
19076 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)19077   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
19078     TEST_REQUIRES_X86_XOP;
19079     GemmMicrokernelTester()
19080       .mr(3)
19081       .nr(4)
19082       .kr(2)
19083       .sr(4)
19084       .m(3)
19085       .n(4)
19086       .k(8)
19087       .qmax(128)
19088       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19089   }
19090 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)19091   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
19092     TEST_REQUIRES_X86_XOP;
19093     GemmMicrokernelTester()
19094       .mr(3)
19095       .nr(4)
19096       .kr(2)
19097       .sr(4)
19098       .m(3)
19099       .n(4)
19100       .k(8)
19101       .cm_stride(7)
19102       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19103   }
19104 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_a_zero_point)19105   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_a_zero_point) {
19106     TEST_REQUIRES_X86_XOP;
19107     for (size_t k = 1; k <= 40; k += 9) {
19108       GemmMicrokernelTester()
19109         .mr(3)
19110         .nr(4)
19111         .kr(2)
19112         .sr(4)
19113         .m(3)
19114         .n(4)
19115         .k(k)
19116         .a_zero_point(0)
19117         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19118     }
19119   }
19120 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_b_zero_point)19121   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_b_zero_point) {
19122     TEST_REQUIRES_X86_XOP;
19123     for (size_t k = 1; k <= 40; k += 9) {
19124       GemmMicrokernelTester()
19125         .mr(3)
19126         .nr(4)
19127         .kr(2)
19128         .sr(4)
19129         .m(3)
19130         .n(4)
19131         .k(k)
19132         .b_zero_point(0)
19133         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19134     }
19135   }
19136 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,no_zero_point)19137   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, no_zero_point) {
19138     TEST_REQUIRES_X86_XOP;
19139     for (size_t k = 1; k <= 40; k += 9) {
19140       GemmMicrokernelTester()
19141         .mr(3)
19142         .nr(4)
19143         .kr(2)
19144         .sr(4)
19145         .m(3)
19146         .n(4)
19147         .k(k)
19148         .a_zero_point(0)
19149         .b_zero_point(0)
19150         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19151     }
19152   }
19153 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19154 
19155 
19156 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)19157   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
19158     TEST_REQUIRES_X86_XOP;
19159     GemmMicrokernelTester()
19160       .mr(4)
19161       .nr(4)
19162       .kr(2)
19163       .sr(4)
19164       .m(4)
19165       .n(4)
19166       .k(8)
19167       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19168   }
19169 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)19170   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
19171     TEST_REQUIRES_X86_XOP;
19172     GemmMicrokernelTester()
19173       .mr(4)
19174       .nr(4)
19175       .kr(2)
19176       .sr(4)
19177       .m(4)
19178       .n(4)
19179       .k(8)
19180       .cn_stride(7)
19181       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19182   }
19183 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)19184   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
19185     TEST_REQUIRES_X86_XOP;
19186     for (uint32_t n = 1; n <= 4; n++) {
19187       for (uint32_t m = 1; m <= 4; m++) {
19188         GemmMicrokernelTester()
19189           .mr(4)
19190           .nr(4)
19191           .kr(2)
19192           .sr(4)
19193           .m(m)
19194           .n(n)
19195           .k(8)
19196           .iterations(1)
19197           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19198       }
19199     }
19200   }
19201 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)19202   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
19203     TEST_REQUIRES_X86_XOP;
19204     for (uint32_t m = 1; m <= 4; m++) {
19205       GemmMicrokernelTester()
19206         .mr(4)
19207         .nr(4)
19208         .kr(2)
19209         .sr(4)
19210         .m(m)
19211         .n(4)
19212         .k(8)
19213         .iterations(1)
19214         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19215     }
19216   }
19217 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)19218   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
19219     TEST_REQUIRES_X86_XOP;
19220     for (uint32_t n = 1; n <= 4; n++) {
19221       GemmMicrokernelTester()
19222         .mr(4)
19223         .nr(4)
19224         .kr(2)
19225         .sr(4)
19226         .m(4)
19227         .n(n)
19228         .k(8)
19229         .iterations(1)
19230         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19231     }
19232   }
19233 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)19234   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
19235     TEST_REQUIRES_X86_XOP;
19236     for (size_t k = 1; k < 8; k++) {
19237       GemmMicrokernelTester()
19238         .mr(4)
19239         .nr(4)
19240         .kr(2)
19241         .sr(4)
19242         .m(4)
19243         .n(4)
19244         .k(k)
19245         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19246     }
19247   }
19248 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)19249   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
19250     TEST_REQUIRES_X86_XOP;
19251     for (size_t k = 1; k < 8; k++) {
19252       for (uint32_t n = 1; n <= 4; n++) {
19253         for (uint32_t m = 1; m <= 4; m++) {
19254           GemmMicrokernelTester()
19255             .mr(4)
19256             .nr(4)
19257             .kr(2)
19258             .sr(4)
19259             .m(m)
19260             .n(n)
19261             .k(k)
19262             .iterations(1)
19263             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19264         }
19265       }
19266     }
19267   }
19268 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)19269   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
19270     TEST_REQUIRES_X86_XOP;
19271     for (size_t k = 9; k < 16; k++) {
19272       GemmMicrokernelTester()
19273         .mr(4)
19274         .nr(4)
19275         .kr(2)
19276         .sr(4)
19277         .m(4)
19278         .n(4)
19279         .k(k)
19280         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19281     }
19282   }
19283 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)19284   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
19285     TEST_REQUIRES_X86_XOP;
19286     for (size_t k = 9; k < 16; k++) {
19287       for (uint32_t n = 1; n <= 4; n++) {
19288         for (uint32_t m = 1; m <= 4; m++) {
19289           GemmMicrokernelTester()
19290             .mr(4)
19291             .nr(4)
19292             .kr(2)
19293             .sr(4)
19294             .m(m)
19295             .n(n)
19296             .k(k)
19297             .iterations(1)
19298             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19299         }
19300       }
19301     }
19302   }
19303 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)19304   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
19305     TEST_REQUIRES_X86_XOP;
19306     for (size_t k = 16; k <= 80; k += 8) {
19307       GemmMicrokernelTester()
19308         .mr(4)
19309         .nr(4)
19310         .kr(2)
19311         .sr(4)
19312         .m(4)
19313         .n(4)
19314         .k(k)
19315         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19316     }
19317   }
19318 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)19319   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
19320     TEST_REQUIRES_X86_XOP;
19321     for (size_t k = 16; k <= 80; k += 8) {
19322       for (uint32_t n = 1; n <= 4; n++) {
19323         for (uint32_t m = 1; m <= 4; m++) {
19324           GemmMicrokernelTester()
19325             .mr(4)
19326             .nr(4)
19327             .kr(2)
19328             .sr(4)
19329             .m(m)
19330             .n(n)
19331             .k(k)
19332             .iterations(1)
19333             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19334         }
19335       }
19336     }
19337   }
19338 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)19339   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
19340     TEST_REQUIRES_X86_XOP;
19341     for (uint32_t n = 5; n < 8; n++) {
19342       for (size_t k = 1; k <= 40; k += 9) {
19343         GemmMicrokernelTester()
19344           .mr(4)
19345           .nr(4)
19346           .kr(2)
19347           .sr(4)
19348           .m(4)
19349           .n(n)
19350           .k(k)
19351           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19352       }
19353     }
19354   }
19355 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)19356   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
19357     TEST_REQUIRES_X86_XOP;
19358     for (uint32_t n = 5; n < 8; n++) {
19359       for (size_t k = 1; k <= 40; k += 9) {
19360         GemmMicrokernelTester()
19361           .mr(4)
19362           .nr(4)
19363           .kr(2)
19364           .sr(4)
19365           .m(4)
19366           .n(n)
19367           .k(k)
19368           .cn_stride(7)
19369           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19370       }
19371     }
19372   }
19373 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)19374   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
19375     TEST_REQUIRES_X86_XOP;
19376     for (uint32_t n = 5; n < 8; n++) {
19377       for (size_t k = 1; k <= 40; k += 9) {
19378         for (uint32_t m = 1; m <= 4; m++) {
19379           GemmMicrokernelTester()
19380             .mr(4)
19381             .nr(4)
19382             .kr(2)
19383             .sr(4)
19384             .m(m)
19385             .n(n)
19386             .k(k)
19387             .iterations(1)
19388             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19389         }
19390       }
19391     }
19392   }
19393 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)19394   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
19395     TEST_REQUIRES_X86_XOP;
19396     for (uint32_t n = 8; n <= 12; n += 4) {
19397       for (size_t k = 1; k <= 40; k += 9) {
19398         GemmMicrokernelTester()
19399           .mr(4)
19400           .nr(4)
19401           .kr(2)
19402           .sr(4)
19403           .m(4)
19404           .n(n)
19405           .k(k)
19406           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19407       }
19408     }
19409   }
19410 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)19411   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
19412     TEST_REQUIRES_X86_XOP;
19413     for (uint32_t n = 8; n <= 12; n += 4) {
19414       for (size_t k = 1; k <= 40; k += 9) {
19415         GemmMicrokernelTester()
19416           .mr(4)
19417           .nr(4)
19418           .kr(2)
19419           .sr(4)
19420           .m(4)
19421           .n(n)
19422           .k(k)
19423           .cn_stride(7)
19424           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19425       }
19426     }
19427   }
19428 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)19429   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
19430     TEST_REQUIRES_X86_XOP;
19431     for (uint32_t n = 8; n <= 12; n += 4) {
19432       for (size_t k = 1; k <= 40; k += 9) {
19433         for (uint32_t m = 1; m <= 4; m++) {
19434           GemmMicrokernelTester()
19435             .mr(4)
19436             .nr(4)
19437             .kr(2)
19438             .sr(4)
19439             .m(m)
19440             .n(n)
19441             .k(k)
19442             .iterations(1)
19443             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19444         }
19445       }
19446     }
19447   }
19448 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel)19449   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel) {
19450     TEST_REQUIRES_X86_XOP;
19451     for (size_t k = 1; k <= 40; k += 9) {
19452       GemmMicrokernelTester()
19453         .mr(4)
19454         .nr(4)
19455         .kr(2)
19456         .sr(4)
19457         .m(4)
19458         .n(4)
19459         .k(k)
19460         .ks(3)
19461         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19462     }
19463   }
19464 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel_subtile)19465   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel_subtile) {
19466     TEST_REQUIRES_X86_XOP;
19467     for (size_t k = 1; k <= 40; k += 9) {
19468       for (uint32_t n = 1; n <= 4; n++) {
19469         for (uint32_t m = 1; m <= 4; m++) {
19470           GemmMicrokernelTester()
19471             .mr(4)
19472             .nr(4)
19473             .kr(2)
19474             .sr(4)
19475             .m(m)
19476             .n(n)
19477             .k(k)
19478             .ks(3)
19479             .iterations(1)
19480             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19481         }
19482       }
19483     }
19484   }
19485 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_small_kernel)19486   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
19487     TEST_REQUIRES_X86_XOP;
19488     for (uint32_t n = 5; n < 8; n++) {
19489       for (size_t k = 1; k <= 40; k += 9) {
19490         GemmMicrokernelTester()
19491           .mr(4)
19492           .nr(4)
19493           .kr(2)
19494           .sr(4)
19495           .m(4)
19496           .n(n)
19497           .k(k)
19498           .ks(3)
19499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19500       }
19501     }
19502   }
19503 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_small_kernel)19504   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_small_kernel) {
19505     TEST_REQUIRES_X86_XOP;
19506     for (uint32_t n = 8; n <= 12; n += 4) {
19507       for (size_t k = 1; k <= 40; k += 9) {
19508         GemmMicrokernelTester()
19509           .mr(4)
19510           .nr(4)
19511           .kr(2)
19512           .sr(4)
19513           .m(4)
19514           .n(n)
19515           .k(k)
19516           .ks(3)
19517           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19518       }
19519     }
19520   }
19521 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)19522   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
19523     TEST_REQUIRES_X86_XOP;
19524     for (size_t k = 1; k <= 40; k += 9) {
19525       for (uint32_t n = 1; n <= 4; n++) {
19526         for (uint32_t m = 1; m <= 4; m++) {
19527           GemmMicrokernelTester()
19528             .mr(4)
19529             .nr(4)
19530             .kr(2)
19531             .sr(4)
19532             .m(m)
19533             .n(n)
19534             .k(k)
19535             .cm_stride(7)
19536             .iterations(1)
19537             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19538         }
19539       }
19540     }
19541   }
19542 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,a_offset)19543   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, a_offset) {
19544     TEST_REQUIRES_X86_XOP;
19545     for (size_t k = 1; k <= 40; k += 9) {
19546       GemmMicrokernelTester()
19547         .mr(4)
19548         .nr(4)
19549         .kr(2)
19550         .sr(4)
19551         .m(4)
19552         .n(4)
19553         .k(k)
19554         .ks(3)
19555         .a_offset(163)
19556         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19557     }
19558   }
19559 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,zero)19560   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, zero) {
19561     TEST_REQUIRES_X86_XOP;
19562     for (size_t k = 1; k <= 40; k += 9) {
19563       for (uint32_t mz = 0; mz < 4; mz++) {
19564         GemmMicrokernelTester()
19565           .mr(4)
19566           .nr(4)
19567           .kr(2)
19568           .sr(4)
19569           .m(4)
19570           .n(4)
19571           .k(k)
19572           .ks(3)
19573           .a_offset(163)
19574           .zero_index(mz)
19575           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19576       }
19577     }
19578   }
19579 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)19580   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
19581     TEST_REQUIRES_X86_XOP;
19582     GemmMicrokernelTester()
19583       .mr(4)
19584       .nr(4)
19585       .kr(2)
19586       .sr(4)
19587       .m(4)
19588       .n(4)
19589       .k(8)
19590       .qmin(128)
19591       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19592   }
19593 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)19594   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
19595     TEST_REQUIRES_X86_XOP;
19596     GemmMicrokernelTester()
19597       .mr(4)
19598       .nr(4)
19599       .kr(2)
19600       .sr(4)
19601       .m(4)
19602       .n(4)
19603       .k(8)
19604       .qmax(128)
19605       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19606   }
19607 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)19608   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
19609     TEST_REQUIRES_X86_XOP;
19610     GemmMicrokernelTester()
19611       .mr(4)
19612       .nr(4)
19613       .kr(2)
19614       .sr(4)
19615       .m(4)
19616       .n(4)
19617       .k(8)
19618       .cm_stride(7)
19619       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19620   }
19621 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_a_zero_point)19622   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_a_zero_point) {
19623     TEST_REQUIRES_X86_XOP;
19624     for (size_t k = 1; k <= 40; k += 9) {
19625       GemmMicrokernelTester()
19626         .mr(4)
19627         .nr(4)
19628         .kr(2)
19629         .sr(4)
19630         .m(4)
19631         .n(4)
19632         .k(k)
19633         .a_zero_point(0)
19634         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19635     }
19636   }
19637 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_b_zero_point)19638   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_b_zero_point) {
19639     TEST_REQUIRES_X86_XOP;
19640     for (size_t k = 1; k <= 40; k += 9) {
19641       GemmMicrokernelTester()
19642         .mr(4)
19643         .nr(4)
19644         .kr(2)
19645         .sr(4)
19646         .m(4)
19647         .n(4)
19648         .k(k)
19649         .b_zero_point(0)
19650         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19651     }
19652   }
19653 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,no_zero_point)19654   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, no_zero_point) {
19655     TEST_REQUIRES_X86_XOP;
19656     for (size_t k = 1; k <= 40; k += 9) {
19657       GemmMicrokernelTester()
19658         .mr(4)
19659         .nr(4)
19660         .kr(2)
19661         .sr(4)
19662         .m(4)
19663         .n(4)
19664         .k(k)
19665         .a_zero_point(0)
19666         .b_zero_point(0)
19667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19668     }
19669   }
19670 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19671 
19672 
19673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)19674   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
19675     TEST_REQUIRES_X86_SSE2;
19676     GemmMicrokernelTester()
19677       .mr(3)
19678       .nr(4)
19679       .kr(8)
19680       .sr(1)
19681       .m(3)
19682       .n(4)
19683       .k(8)
19684       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19685   }
19686 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)19687   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
19688     TEST_REQUIRES_X86_SSE2;
19689     GemmMicrokernelTester()
19690       .mr(3)
19691       .nr(4)
19692       .kr(8)
19693       .sr(1)
19694       .m(3)
19695       .n(4)
19696       .k(8)
19697       .cn_stride(7)
19698       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19699   }
19700 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)19701   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
19702     TEST_REQUIRES_X86_SSE2;
19703     for (uint32_t n = 1; n <= 4; n++) {
19704       for (uint32_t m = 1; m <= 3; m++) {
19705         GemmMicrokernelTester()
19706           .mr(3)
19707           .nr(4)
19708           .kr(8)
19709           .sr(1)
19710           .m(m)
19711           .n(n)
19712           .k(8)
19713           .iterations(1)
19714           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19715       }
19716     }
19717   }
19718 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)19719   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
19720     TEST_REQUIRES_X86_SSE2;
19721     for (uint32_t m = 1; m <= 3; m++) {
19722       GemmMicrokernelTester()
19723         .mr(3)
19724         .nr(4)
19725         .kr(8)
19726         .sr(1)
19727         .m(m)
19728         .n(4)
19729         .k(8)
19730         .iterations(1)
19731         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19732     }
19733   }
19734 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)19735   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
19736     TEST_REQUIRES_X86_SSE2;
19737     for (uint32_t n = 1; n <= 4; n++) {
19738       GemmMicrokernelTester()
19739         .mr(3)
19740         .nr(4)
19741         .kr(8)
19742         .sr(1)
19743         .m(3)
19744         .n(n)
19745         .k(8)
19746         .iterations(1)
19747         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19748     }
19749   }
19750 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)19751   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
19752     TEST_REQUIRES_X86_SSE2;
19753     for (size_t k = 1; k < 8; k++) {
19754       GemmMicrokernelTester()
19755         .mr(3)
19756         .nr(4)
19757         .kr(8)
19758         .sr(1)
19759         .m(3)
19760         .n(4)
19761         .k(k)
19762         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19763     }
19764   }
19765 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)19766   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
19767     TEST_REQUIRES_X86_SSE2;
19768     for (size_t k = 1; k < 8; k++) {
19769       for (uint32_t n = 1; n <= 4; n++) {
19770         for (uint32_t m = 1; m <= 3; m++) {
19771           GemmMicrokernelTester()
19772             .mr(3)
19773             .nr(4)
19774             .kr(8)
19775             .sr(1)
19776             .m(m)
19777             .n(n)
19778             .k(k)
19779             .iterations(1)
19780             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19781         }
19782       }
19783     }
19784   }
19785 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)19786   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
19787     TEST_REQUIRES_X86_SSE2;
19788     for (size_t k = 9; k < 16; k++) {
19789       GemmMicrokernelTester()
19790         .mr(3)
19791         .nr(4)
19792         .kr(8)
19793         .sr(1)
19794         .m(3)
19795         .n(4)
19796         .k(k)
19797         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19798     }
19799   }
19800 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)19801   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
19802     TEST_REQUIRES_X86_SSE2;
19803     for (size_t k = 9; k < 16; k++) {
19804       for (uint32_t n = 1; n <= 4; n++) {
19805         for (uint32_t m = 1; m <= 3; m++) {
19806           GemmMicrokernelTester()
19807             .mr(3)
19808             .nr(4)
19809             .kr(8)
19810             .sr(1)
19811             .m(m)
19812             .n(n)
19813             .k(k)
19814             .iterations(1)
19815             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19816         }
19817       }
19818     }
19819   }
19820 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)19821   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
19822     TEST_REQUIRES_X86_SSE2;
19823     for (size_t k = 16; k <= 80; k += 8) {
19824       GemmMicrokernelTester()
19825         .mr(3)
19826         .nr(4)
19827         .kr(8)
19828         .sr(1)
19829         .m(3)
19830         .n(4)
19831         .k(k)
19832         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19833     }
19834   }
19835 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)19836   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
19837     TEST_REQUIRES_X86_SSE2;
19838     for (size_t k = 16; k <= 80; k += 8) {
19839       for (uint32_t n = 1; n <= 4; n++) {
19840         for (uint32_t m = 1; m <= 3; m++) {
19841           GemmMicrokernelTester()
19842             .mr(3)
19843             .nr(4)
19844             .kr(8)
19845             .sr(1)
19846             .m(m)
19847             .n(n)
19848             .k(k)
19849             .iterations(1)
19850             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19851         }
19852       }
19853     }
19854   }
19855 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)19856   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
19857     TEST_REQUIRES_X86_SSE2;
19858     for (uint32_t n = 5; n < 8; n++) {
19859       for (size_t k = 1; k <= 40; k += 9) {
19860         GemmMicrokernelTester()
19861           .mr(3)
19862           .nr(4)
19863           .kr(8)
19864           .sr(1)
19865           .m(3)
19866           .n(n)
19867           .k(k)
19868           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19869       }
19870     }
19871   }
19872 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)19873   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
19874     TEST_REQUIRES_X86_SSE2;
19875     for (uint32_t n = 5; n < 8; n++) {
19876       for (size_t k = 1; k <= 40; k += 9) {
19877         GemmMicrokernelTester()
19878           .mr(3)
19879           .nr(4)
19880           .kr(8)
19881           .sr(1)
19882           .m(3)
19883           .n(n)
19884           .k(k)
19885           .cn_stride(7)
19886           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19887       }
19888     }
19889   }
19890 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)19891   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
19892     TEST_REQUIRES_X86_SSE2;
19893     for (uint32_t n = 5; n < 8; n++) {
19894       for (size_t k = 1; k <= 40; k += 9) {
19895         for (uint32_t m = 1; m <= 3; m++) {
19896           GemmMicrokernelTester()
19897             .mr(3)
19898             .nr(4)
19899             .kr(8)
19900             .sr(1)
19901             .m(m)
19902             .n(n)
19903             .k(k)
19904             .iterations(1)
19905             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19906         }
19907       }
19908     }
19909   }
19910 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)19911   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
19912     TEST_REQUIRES_X86_SSE2;
19913     for (uint32_t n = 8; n <= 12; n += 4) {
19914       for (size_t k = 1; k <= 40; k += 9) {
19915         GemmMicrokernelTester()
19916           .mr(3)
19917           .nr(4)
19918           .kr(8)
19919           .sr(1)
19920           .m(3)
19921           .n(n)
19922           .k(k)
19923           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19924       }
19925     }
19926   }
19927 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)19928   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
19929     TEST_REQUIRES_X86_SSE2;
19930     for (uint32_t n = 8; n <= 12; n += 4) {
19931       for (size_t k = 1; k <= 40; k += 9) {
19932         GemmMicrokernelTester()
19933           .mr(3)
19934           .nr(4)
19935           .kr(8)
19936           .sr(1)
19937           .m(3)
19938           .n(n)
19939           .k(k)
19940           .cn_stride(7)
19941           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19942       }
19943     }
19944   }
19945 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)19946   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
19947     TEST_REQUIRES_X86_SSE2;
19948     for (uint32_t n = 8; n <= 12; n += 4) {
19949       for (size_t k = 1; k <= 40; k += 9) {
19950         for (uint32_t m = 1; m <= 3; m++) {
19951           GemmMicrokernelTester()
19952             .mr(3)
19953             .nr(4)
19954             .kr(8)
19955             .sr(1)
19956             .m(m)
19957             .n(n)
19958             .k(k)
19959             .iterations(1)
19960             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19961         }
19962       }
19963     }
19964   }
19965 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel)19966   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
19967     TEST_REQUIRES_X86_SSE2;
19968     for (size_t k = 1; k <= 40; k += 9) {
19969       GemmMicrokernelTester()
19970         .mr(3)
19971         .nr(4)
19972         .kr(8)
19973         .sr(1)
19974         .m(3)
19975         .n(4)
19976         .k(k)
19977         .ks(3)
19978         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19979     }
19980   }
19981 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel_subtile)19982   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
19983     TEST_REQUIRES_X86_SSE2;
19984     for (size_t k = 1; k <= 40; k += 9) {
19985       for (uint32_t n = 1; n <= 4; n++) {
19986         for (uint32_t m = 1; m <= 3; m++) {
19987           GemmMicrokernelTester()
19988             .mr(3)
19989             .nr(4)
19990             .kr(8)
19991             .sr(1)
19992             .m(m)
19993             .n(n)
19994             .k(k)
19995             .ks(3)
19996             .iterations(1)
19997             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
19998         }
19999       }
20000     }
20001   }
20002 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_small_kernel)20003   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
20004     TEST_REQUIRES_X86_SSE2;
20005     for (uint32_t n = 5; n < 8; n++) {
20006       for (size_t k = 1; k <= 40; k += 9) {
20007         GemmMicrokernelTester()
20008           .mr(3)
20009           .nr(4)
20010           .kr(8)
20011           .sr(1)
20012           .m(3)
20013           .n(n)
20014           .k(k)
20015           .ks(3)
20016           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20017       }
20018     }
20019   }
20020 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_small_kernel)20021   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
20022     TEST_REQUIRES_X86_SSE2;
20023     for (uint32_t n = 8; n <= 12; n += 4) {
20024       for (size_t k = 1; k <= 40; k += 9) {
20025         GemmMicrokernelTester()
20026           .mr(3)
20027           .nr(4)
20028           .kr(8)
20029           .sr(1)
20030           .m(3)
20031           .n(n)
20032           .k(k)
20033           .ks(3)
20034           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20035       }
20036     }
20037   }
20038 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)20039   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
20040     TEST_REQUIRES_X86_SSE2;
20041     for (size_t k = 1; k <= 40; k += 9) {
20042       for (uint32_t n = 1; n <= 4; n++) {
20043         for (uint32_t m = 1; m <= 3; m++) {
20044           GemmMicrokernelTester()
20045             .mr(3)
20046             .nr(4)
20047             .kr(8)
20048             .sr(1)
20049             .m(m)
20050             .n(n)
20051             .k(k)
20052             .cm_stride(7)
20053             .iterations(1)
20054             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20055         }
20056       }
20057     }
20058   }
20059 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,a_offset)20060   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
20061     TEST_REQUIRES_X86_SSE2;
20062     for (size_t k = 1; k <= 40; k += 9) {
20063       GemmMicrokernelTester()
20064         .mr(3)
20065         .nr(4)
20066         .kr(8)
20067         .sr(1)
20068         .m(3)
20069         .n(4)
20070         .k(k)
20071         .ks(3)
20072         .a_offset(127)
20073         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20074     }
20075   }
20076 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,zero)20077   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
20078     TEST_REQUIRES_X86_SSE2;
20079     for (size_t k = 1; k <= 40; k += 9) {
20080       for (uint32_t mz = 0; mz < 3; mz++) {
20081         GemmMicrokernelTester()
20082           .mr(3)
20083           .nr(4)
20084           .kr(8)
20085           .sr(1)
20086           .m(3)
20087           .n(4)
20088           .k(k)
20089           .ks(3)
20090           .a_offset(127)
20091           .zero_index(mz)
20092           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20093       }
20094     }
20095   }
20096 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)20097   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
20098     TEST_REQUIRES_X86_SSE2;
20099     GemmMicrokernelTester()
20100       .mr(3)
20101       .nr(4)
20102       .kr(8)
20103       .sr(1)
20104       .m(3)
20105       .n(4)
20106       .k(8)
20107       .qmin(128)
20108       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20109   }
20110 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)20111   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
20112     TEST_REQUIRES_X86_SSE2;
20113     GemmMicrokernelTester()
20114       .mr(3)
20115       .nr(4)
20116       .kr(8)
20117       .sr(1)
20118       .m(3)
20119       .n(4)
20120       .k(8)
20121       .qmax(128)
20122       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20123   }
20124 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)20125   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
20126     TEST_REQUIRES_X86_SSE2;
20127     GemmMicrokernelTester()
20128       .mr(3)
20129       .nr(4)
20130       .kr(8)
20131       .sr(1)
20132       .m(3)
20133       .n(4)
20134       .k(8)
20135       .cm_stride(7)
20136       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20137   }
20138 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_a_zero_point)20139   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_a_zero_point) {
20140     TEST_REQUIRES_X86_SSE2;
20141     for (size_t k = 1; k <= 40; k += 9) {
20142       GemmMicrokernelTester()
20143         .mr(3)
20144         .nr(4)
20145         .kr(8)
20146         .sr(1)
20147         .m(3)
20148         .n(4)
20149         .k(k)
20150         .a_zero_point(0)
20151         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20152     }
20153   }
20154 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_b_zero_point)20155   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_b_zero_point) {
20156     TEST_REQUIRES_X86_SSE2;
20157     for (size_t k = 1; k <= 40; k += 9) {
20158       GemmMicrokernelTester()
20159         .mr(3)
20160         .nr(4)
20161         .kr(8)
20162         .sr(1)
20163         .m(3)
20164         .n(4)
20165         .k(k)
20166         .b_zero_point(0)
20167         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20168     }
20169   }
20170 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,no_zero_point)20171   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_zero_point) {
20172     TEST_REQUIRES_X86_SSE2;
20173     for (size_t k = 1; k <= 40; k += 9) {
20174       GemmMicrokernelTester()
20175         .mr(3)
20176         .nr(4)
20177         .kr(8)
20178         .sr(1)
20179         .m(3)
20180         .n(4)
20181         .k(k)
20182         .a_zero_point(0)
20183         .b_zero_point(0)
20184         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20185     }
20186   }
20187 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20188 
20189 
20190 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)20191   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
20192     TEST_REQUIRES_X86_SSE41;
20193     GemmMicrokernelTester()
20194       .mr(3)
20195       .nr(4)
20196       .kr(8)
20197       .sr(1)
20198       .m(3)
20199       .n(4)
20200       .k(8)
20201       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20202   }
20203 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)20204   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
20205     TEST_REQUIRES_X86_SSE41;
20206     GemmMicrokernelTester()
20207       .mr(3)
20208       .nr(4)
20209       .kr(8)
20210       .sr(1)
20211       .m(3)
20212       .n(4)
20213       .k(8)
20214       .cn_stride(7)
20215       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20216   }
20217 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)20218   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
20219     TEST_REQUIRES_X86_SSE41;
20220     for (uint32_t n = 1; n <= 4; n++) {
20221       for (uint32_t m = 1; m <= 3; m++) {
20222         GemmMicrokernelTester()
20223           .mr(3)
20224           .nr(4)
20225           .kr(8)
20226           .sr(1)
20227           .m(m)
20228           .n(n)
20229           .k(8)
20230           .iterations(1)
20231           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20232       }
20233     }
20234   }
20235 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)20236   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
20237     TEST_REQUIRES_X86_SSE41;
20238     for (uint32_t m = 1; m <= 3; m++) {
20239       GemmMicrokernelTester()
20240         .mr(3)
20241         .nr(4)
20242         .kr(8)
20243         .sr(1)
20244         .m(m)
20245         .n(4)
20246         .k(8)
20247         .iterations(1)
20248         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20249     }
20250   }
20251 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)20252   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
20253     TEST_REQUIRES_X86_SSE41;
20254     for (uint32_t n = 1; n <= 4; n++) {
20255       GemmMicrokernelTester()
20256         .mr(3)
20257         .nr(4)
20258         .kr(8)
20259         .sr(1)
20260         .m(3)
20261         .n(n)
20262         .k(8)
20263         .iterations(1)
20264         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20265     }
20266   }
20267 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)20268   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
20269     TEST_REQUIRES_X86_SSE41;
20270     for (size_t k = 1; k < 8; k++) {
20271       GemmMicrokernelTester()
20272         .mr(3)
20273         .nr(4)
20274         .kr(8)
20275         .sr(1)
20276         .m(3)
20277         .n(4)
20278         .k(k)
20279         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20280     }
20281   }
20282 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)20283   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
20284     TEST_REQUIRES_X86_SSE41;
20285     for (size_t k = 1; k < 8; k++) {
20286       for (uint32_t n = 1; n <= 4; n++) {
20287         for (uint32_t m = 1; m <= 3; m++) {
20288           GemmMicrokernelTester()
20289             .mr(3)
20290             .nr(4)
20291             .kr(8)
20292             .sr(1)
20293             .m(m)
20294             .n(n)
20295             .k(k)
20296             .iterations(1)
20297             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20298         }
20299       }
20300     }
20301   }
20302 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)20303   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
20304     TEST_REQUIRES_X86_SSE41;
20305     for (size_t k = 9; k < 16; k++) {
20306       GemmMicrokernelTester()
20307         .mr(3)
20308         .nr(4)
20309         .kr(8)
20310         .sr(1)
20311         .m(3)
20312         .n(4)
20313         .k(k)
20314         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20315     }
20316   }
20317 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)20318   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
20319     TEST_REQUIRES_X86_SSE41;
20320     for (size_t k = 9; k < 16; k++) {
20321       for (uint32_t n = 1; n <= 4; n++) {
20322         for (uint32_t m = 1; m <= 3; m++) {
20323           GemmMicrokernelTester()
20324             .mr(3)
20325             .nr(4)
20326             .kr(8)
20327             .sr(1)
20328             .m(m)
20329             .n(n)
20330             .k(k)
20331             .iterations(1)
20332             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20333         }
20334       }
20335     }
20336   }
20337 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)20338   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
20339     TEST_REQUIRES_X86_SSE41;
20340     for (size_t k = 16; k <= 80; k += 8) {
20341       GemmMicrokernelTester()
20342         .mr(3)
20343         .nr(4)
20344         .kr(8)
20345         .sr(1)
20346         .m(3)
20347         .n(4)
20348         .k(k)
20349         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20350     }
20351   }
20352 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)20353   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
20354     TEST_REQUIRES_X86_SSE41;
20355     for (size_t k = 16; k <= 80; k += 8) {
20356       for (uint32_t n = 1; n <= 4; n++) {
20357         for (uint32_t m = 1; m <= 3; m++) {
20358           GemmMicrokernelTester()
20359             .mr(3)
20360             .nr(4)
20361             .kr(8)
20362             .sr(1)
20363             .m(m)
20364             .n(n)
20365             .k(k)
20366             .iterations(1)
20367             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20368         }
20369       }
20370     }
20371   }
20372 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)20373   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
20374     TEST_REQUIRES_X86_SSE41;
20375     for (uint32_t n = 5; n < 8; n++) {
20376       for (size_t k = 1; k <= 40; k += 9) {
20377         GemmMicrokernelTester()
20378           .mr(3)
20379           .nr(4)
20380           .kr(8)
20381           .sr(1)
20382           .m(3)
20383           .n(n)
20384           .k(k)
20385           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20386       }
20387     }
20388   }
20389 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)20390   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
20391     TEST_REQUIRES_X86_SSE41;
20392     for (uint32_t n = 5; n < 8; n++) {
20393       for (size_t k = 1; k <= 40; k += 9) {
20394         GemmMicrokernelTester()
20395           .mr(3)
20396           .nr(4)
20397           .kr(8)
20398           .sr(1)
20399           .m(3)
20400           .n(n)
20401           .k(k)
20402           .cn_stride(7)
20403           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20404       }
20405     }
20406   }
20407 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)20408   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
20409     TEST_REQUIRES_X86_SSE41;
20410     for (uint32_t n = 5; n < 8; n++) {
20411       for (size_t k = 1; k <= 40; k += 9) {
20412         for (uint32_t m = 1; m <= 3; m++) {
20413           GemmMicrokernelTester()
20414             .mr(3)
20415             .nr(4)
20416             .kr(8)
20417             .sr(1)
20418             .m(m)
20419             .n(n)
20420             .k(k)
20421             .iterations(1)
20422             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20423         }
20424       }
20425     }
20426   }
20427 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)20428   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
20429     TEST_REQUIRES_X86_SSE41;
20430     for (uint32_t n = 8; n <= 12; n += 4) {
20431       for (size_t k = 1; k <= 40; k += 9) {
20432         GemmMicrokernelTester()
20433           .mr(3)
20434           .nr(4)
20435           .kr(8)
20436           .sr(1)
20437           .m(3)
20438           .n(n)
20439           .k(k)
20440           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20441       }
20442     }
20443   }
20444 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)20445   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
20446     TEST_REQUIRES_X86_SSE41;
20447     for (uint32_t n = 8; n <= 12; n += 4) {
20448       for (size_t k = 1; k <= 40; k += 9) {
20449         GemmMicrokernelTester()
20450           .mr(3)
20451           .nr(4)
20452           .kr(8)
20453           .sr(1)
20454           .m(3)
20455           .n(n)
20456           .k(k)
20457           .cn_stride(7)
20458           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20459       }
20460     }
20461   }
20462 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)20463   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
20464     TEST_REQUIRES_X86_SSE41;
20465     for (uint32_t n = 8; n <= 12; n += 4) {
20466       for (size_t k = 1; k <= 40; k += 9) {
20467         for (uint32_t m = 1; m <= 3; m++) {
20468           GemmMicrokernelTester()
20469             .mr(3)
20470             .nr(4)
20471             .kr(8)
20472             .sr(1)
20473             .m(m)
20474             .n(n)
20475             .k(k)
20476             .iterations(1)
20477             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20478         }
20479       }
20480     }
20481   }
20482 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)20483   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
20484     TEST_REQUIRES_X86_SSE41;
20485     for (size_t k = 1; k <= 40; k += 9) {
20486       GemmMicrokernelTester()
20487         .mr(3)
20488         .nr(4)
20489         .kr(8)
20490         .sr(1)
20491         .m(3)
20492         .n(4)
20493         .k(k)
20494         .ks(3)
20495         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20496     }
20497   }
20498 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)20499   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
20500     TEST_REQUIRES_X86_SSE41;
20501     for (size_t k = 1; k <= 40; k += 9) {
20502       for (uint32_t n = 1; n <= 4; n++) {
20503         for (uint32_t m = 1; m <= 3; m++) {
20504           GemmMicrokernelTester()
20505             .mr(3)
20506             .nr(4)
20507             .kr(8)
20508             .sr(1)
20509             .m(m)
20510             .n(n)
20511             .k(k)
20512             .ks(3)
20513             .iterations(1)
20514             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20515         }
20516       }
20517     }
20518   }
20519 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)20520   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
20521     TEST_REQUIRES_X86_SSE41;
20522     for (uint32_t n = 5; n < 8; n++) {
20523       for (size_t k = 1; k <= 40; k += 9) {
20524         GemmMicrokernelTester()
20525           .mr(3)
20526           .nr(4)
20527           .kr(8)
20528           .sr(1)
20529           .m(3)
20530           .n(n)
20531           .k(k)
20532           .ks(3)
20533           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20534       }
20535     }
20536   }
20537 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)20538   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
20539     TEST_REQUIRES_X86_SSE41;
20540     for (uint32_t n = 8; n <= 12; n += 4) {
20541       for (size_t k = 1; k <= 40; k += 9) {
20542         GemmMicrokernelTester()
20543           .mr(3)
20544           .nr(4)
20545           .kr(8)
20546           .sr(1)
20547           .m(3)
20548           .n(n)
20549           .k(k)
20550           .ks(3)
20551           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20552       }
20553     }
20554   }
20555 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)20556   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
20557     TEST_REQUIRES_X86_SSE41;
20558     for (size_t k = 1; k <= 40; k += 9) {
20559       for (uint32_t n = 1; n <= 4; n++) {
20560         for (uint32_t m = 1; m <= 3; m++) {
20561           GemmMicrokernelTester()
20562             .mr(3)
20563             .nr(4)
20564             .kr(8)
20565             .sr(1)
20566             .m(m)
20567             .n(n)
20568             .k(k)
20569             .cm_stride(7)
20570             .iterations(1)
20571             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20572         }
20573       }
20574     }
20575   }
20576 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)20577   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
20578     TEST_REQUIRES_X86_SSE41;
20579     for (size_t k = 1; k <= 40; k += 9) {
20580       GemmMicrokernelTester()
20581         .mr(3)
20582         .nr(4)
20583         .kr(8)
20584         .sr(1)
20585         .m(3)
20586         .n(4)
20587         .k(k)
20588         .ks(3)
20589         .a_offset(127)
20590         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20591     }
20592   }
20593 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)20594   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
20595     TEST_REQUIRES_X86_SSE41;
20596     for (size_t k = 1; k <= 40; k += 9) {
20597       for (uint32_t mz = 0; mz < 3; mz++) {
20598         GemmMicrokernelTester()
20599           .mr(3)
20600           .nr(4)
20601           .kr(8)
20602           .sr(1)
20603           .m(3)
20604           .n(4)
20605           .k(k)
20606           .ks(3)
20607           .a_offset(127)
20608           .zero_index(mz)
20609           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20610       }
20611     }
20612   }
20613 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)20614   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
20615     TEST_REQUIRES_X86_SSE41;
20616     GemmMicrokernelTester()
20617       .mr(3)
20618       .nr(4)
20619       .kr(8)
20620       .sr(1)
20621       .m(3)
20622       .n(4)
20623       .k(8)
20624       .qmin(128)
20625       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20626   }
20627 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)20628   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
20629     TEST_REQUIRES_X86_SSE41;
20630     GemmMicrokernelTester()
20631       .mr(3)
20632       .nr(4)
20633       .kr(8)
20634       .sr(1)
20635       .m(3)
20636       .n(4)
20637       .k(8)
20638       .qmax(128)
20639       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20640   }
20641 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)20642   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
20643     TEST_REQUIRES_X86_SSE41;
20644     GemmMicrokernelTester()
20645       .mr(3)
20646       .nr(4)
20647       .kr(8)
20648       .sr(1)
20649       .m(3)
20650       .n(4)
20651       .k(8)
20652       .cm_stride(7)
20653       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20654   }
20655 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_a_zero_point)20656   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_a_zero_point) {
20657     TEST_REQUIRES_X86_SSE41;
20658     for (size_t k = 1; k <= 40; k += 9) {
20659       GemmMicrokernelTester()
20660         .mr(3)
20661         .nr(4)
20662         .kr(8)
20663         .sr(1)
20664         .m(3)
20665         .n(4)
20666         .k(k)
20667         .a_zero_point(0)
20668         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20669     }
20670   }
20671 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_b_zero_point)20672   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_b_zero_point) {
20673     TEST_REQUIRES_X86_SSE41;
20674     for (size_t k = 1; k <= 40; k += 9) {
20675       GemmMicrokernelTester()
20676         .mr(3)
20677         .nr(4)
20678         .kr(8)
20679         .sr(1)
20680         .m(3)
20681         .n(4)
20682         .k(k)
20683         .b_zero_point(0)
20684         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20685     }
20686   }
20687 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,no_zero_point)20688   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_zero_point) {
20689     TEST_REQUIRES_X86_SSE41;
20690     for (size_t k = 1; k <= 40; k += 9) {
20691       GemmMicrokernelTester()
20692         .mr(3)
20693         .nr(4)
20694         .kr(8)
20695         .sr(1)
20696         .m(3)
20697         .n(4)
20698         .k(k)
20699         .a_zero_point(0)
20700         .b_zero_point(0)
20701         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20702     }
20703   }
20704 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20705 
20706 
20707 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8)20708   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
20709     TEST_REQUIRES_X86_AVX;
20710     GemmMicrokernelTester()
20711       .mr(1)
20712       .nr(4)
20713       .kr(8)
20714       .sr(1)
20715       .m(1)
20716       .n(4)
20717       .k(8)
20718       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20719   }
20720 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cn)20721   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
20722     TEST_REQUIRES_X86_AVX;
20723     GemmMicrokernelTester()
20724       .mr(1)
20725       .nr(4)
20726       .kr(8)
20727       .sr(1)
20728       .m(1)
20729       .n(4)
20730       .k(8)
20731       .cn_stride(7)
20732       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20733   }
20734 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile)20735   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
20736     TEST_REQUIRES_X86_AVX;
20737     for (uint32_t n = 1; n <= 4; n++) {
20738       for (uint32_t m = 1; m <= 1; m++) {
20739         GemmMicrokernelTester()
20740           .mr(1)
20741           .nr(4)
20742           .kr(8)
20743           .sr(1)
20744           .m(m)
20745           .n(n)
20746           .k(8)
20747           .iterations(1)
20748           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20749       }
20750     }
20751   }
20752 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_m)20753   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
20754     TEST_REQUIRES_X86_AVX;
20755     for (uint32_t m = 1; m <= 1; m++) {
20756       GemmMicrokernelTester()
20757         .mr(1)
20758         .nr(4)
20759         .kr(8)
20760         .sr(1)
20761         .m(m)
20762         .n(4)
20763         .k(8)
20764         .iterations(1)
20765         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20766     }
20767   }
20768 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_n)20769   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
20770     TEST_REQUIRES_X86_AVX;
20771     for (uint32_t n = 1; n <= 4; n++) {
20772       GemmMicrokernelTester()
20773         .mr(1)
20774         .nr(4)
20775         .kr(8)
20776         .sr(1)
20777         .m(1)
20778         .n(n)
20779         .k(8)
20780         .iterations(1)
20781         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20782     }
20783   }
20784 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8)20785   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
20786     TEST_REQUIRES_X86_AVX;
20787     for (size_t k = 1; k < 8; k++) {
20788       GemmMicrokernelTester()
20789         .mr(1)
20790         .nr(4)
20791         .kr(8)
20792         .sr(1)
20793         .m(1)
20794         .n(4)
20795         .k(k)
20796         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20797     }
20798   }
20799 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_subtile)20800   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
20801     TEST_REQUIRES_X86_AVX;
20802     for (size_t k = 1; k < 8; k++) {
20803       for (uint32_t n = 1; n <= 4; n++) {
20804         for (uint32_t m = 1; m <= 1; m++) {
20805           GemmMicrokernelTester()
20806             .mr(1)
20807             .nr(4)
20808             .kr(8)
20809             .sr(1)
20810             .m(m)
20811             .n(n)
20812             .k(k)
20813             .iterations(1)
20814             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20815         }
20816       }
20817     }
20818   }
20819 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8)20820   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
20821     TEST_REQUIRES_X86_AVX;
20822     for (size_t k = 9; k < 16; k++) {
20823       GemmMicrokernelTester()
20824         .mr(1)
20825         .nr(4)
20826         .kr(8)
20827         .sr(1)
20828         .m(1)
20829         .n(4)
20830         .k(k)
20831         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20832     }
20833   }
20834 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_subtile)20835   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
20836     TEST_REQUIRES_X86_AVX;
20837     for (size_t k = 9; k < 16; k++) {
20838       for (uint32_t n = 1; n <= 4; n++) {
20839         for (uint32_t m = 1; m <= 1; m++) {
20840           GemmMicrokernelTester()
20841             .mr(1)
20842             .nr(4)
20843             .kr(8)
20844             .sr(1)
20845             .m(m)
20846             .n(n)
20847             .k(k)
20848             .iterations(1)
20849             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20850         }
20851       }
20852     }
20853   }
20854 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8)20855   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
20856     TEST_REQUIRES_X86_AVX;
20857     for (size_t k = 16; k <= 80; k += 8) {
20858       GemmMicrokernelTester()
20859         .mr(1)
20860         .nr(4)
20861         .kr(8)
20862         .sr(1)
20863         .m(1)
20864         .n(4)
20865         .k(k)
20866         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20867     }
20868   }
20869 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_subtile)20870   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
20871     TEST_REQUIRES_X86_AVX;
20872     for (size_t k = 16; k <= 80; k += 8) {
20873       for (uint32_t n = 1; n <= 4; n++) {
20874         for (uint32_t m = 1; m <= 1; m++) {
20875           GemmMicrokernelTester()
20876             .mr(1)
20877             .nr(4)
20878             .kr(8)
20879             .sr(1)
20880             .m(m)
20881             .n(n)
20882             .k(k)
20883             .iterations(1)
20884             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20885         }
20886       }
20887     }
20888   }
20889 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4)20890   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
20891     TEST_REQUIRES_X86_AVX;
20892     for (uint32_t n = 5; n < 8; n++) {
20893       for (size_t k = 1; k <= 40; k += 9) {
20894         GemmMicrokernelTester()
20895           .mr(1)
20896           .nr(4)
20897           .kr(8)
20898           .sr(1)
20899           .m(1)
20900           .n(n)
20901           .k(k)
20902           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20903       }
20904     }
20905   }
20906 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_cn)20907   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
20908     TEST_REQUIRES_X86_AVX;
20909     for (uint32_t n = 5; n < 8; n++) {
20910       for (size_t k = 1; k <= 40; k += 9) {
20911         GemmMicrokernelTester()
20912           .mr(1)
20913           .nr(4)
20914           .kr(8)
20915           .sr(1)
20916           .m(1)
20917           .n(n)
20918           .k(k)
20919           .cn_stride(7)
20920           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20921       }
20922     }
20923   }
20924 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_subtile)20925   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
20926     TEST_REQUIRES_X86_AVX;
20927     for (uint32_t n = 5; n < 8; n++) {
20928       for (size_t k = 1; k <= 40; k += 9) {
20929         for (uint32_t m = 1; m <= 1; m++) {
20930           GemmMicrokernelTester()
20931             .mr(1)
20932             .nr(4)
20933             .kr(8)
20934             .sr(1)
20935             .m(m)
20936             .n(n)
20937             .k(k)
20938             .iterations(1)
20939             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20940         }
20941       }
20942     }
20943   }
20944 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4)20945   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
20946     TEST_REQUIRES_X86_AVX;
20947     for (uint32_t n = 8; n <= 12; n += 4) {
20948       for (size_t k = 1; k <= 40; k += 9) {
20949         GemmMicrokernelTester()
20950           .mr(1)
20951           .nr(4)
20952           .kr(8)
20953           .sr(1)
20954           .m(1)
20955           .n(n)
20956           .k(k)
20957           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20958       }
20959     }
20960   }
20961 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_cn)20962   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
20963     TEST_REQUIRES_X86_AVX;
20964     for (uint32_t n = 8; n <= 12; n += 4) {
20965       for (size_t k = 1; k <= 40; k += 9) {
20966         GemmMicrokernelTester()
20967           .mr(1)
20968           .nr(4)
20969           .kr(8)
20970           .sr(1)
20971           .m(1)
20972           .n(n)
20973           .k(k)
20974           .cn_stride(7)
20975           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20976       }
20977     }
20978   }
20979 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_subtile)20980   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
20981     TEST_REQUIRES_X86_AVX;
20982     for (uint32_t n = 8; n <= 12; n += 4) {
20983       for (size_t k = 1; k <= 40; k += 9) {
20984         for (uint32_t m = 1; m <= 1; m++) {
20985           GemmMicrokernelTester()
20986             .mr(1)
20987             .nr(4)
20988             .kr(8)
20989             .sr(1)
20990             .m(m)
20991             .n(n)
20992             .k(k)
20993             .iterations(1)
20994             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
20995         }
20996       }
20997     }
20998   }
20999 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel)21000   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel) {
21001     TEST_REQUIRES_X86_AVX;
21002     for (size_t k = 1; k <= 40; k += 9) {
21003       GemmMicrokernelTester()
21004         .mr(1)
21005         .nr(4)
21006         .kr(8)
21007         .sr(1)
21008         .m(1)
21009         .n(4)
21010         .k(k)
21011         .ks(3)
21012         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21013     }
21014   }
21015 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel_subtile)21016   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel_subtile) {
21017     TEST_REQUIRES_X86_AVX;
21018     for (size_t k = 1; k <= 40; k += 9) {
21019       for (uint32_t n = 1; n <= 4; n++) {
21020         for (uint32_t m = 1; m <= 1; m++) {
21021           GemmMicrokernelTester()
21022             .mr(1)
21023             .nr(4)
21024             .kr(8)
21025             .sr(1)
21026             .m(m)
21027             .n(n)
21028             .k(k)
21029             .ks(3)
21030             .iterations(1)
21031             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21032         }
21033       }
21034     }
21035   }
21036 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_small_kernel)21037   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
21038     TEST_REQUIRES_X86_AVX;
21039     for (uint32_t n = 5; n < 8; n++) {
21040       for (size_t k = 1; k <= 40; k += 9) {
21041         GemmMicrokernelTester()
21042           .mr(1)
21043           .nr(4)
21044           .kr(8)
21045           .sr(1)
21046           .m(1)
21047           .n(n)
21048           .k(k)
21049           .ks(3)
21050           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21051       }
21052     }
21053   }
21054 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_small_kernel)21055   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_small_kernel) {
21056     TEST_REQUIRES_X86_AVX;
21057     for (uint32_t n = 8; n <= 12; n += 4) {
21058       for (size_t k = 1; k <= 40; k += 9) {
21059         GemmMicrokernelTester()
21060           .mr(1)
21061           .nr(4)
21062           .kr(8)
21063           .sr(1)
21064           .m(1)
21065           .n(n)
21066           .k(k)
21067           .ks(3)
21068           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21069       }
21070     }
21071   }
21072 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm_subtile)21073   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
21074     TEST_REQUIRES_X86_AVX;
21075     for (size_t k = 1; k <= 40; k += 9) {
21076       for (uint32_t n = 1; n <= 4; n++) {
21077         for (uint32_t m = 1; m <= 1; m++) {
21078           GemmMicrokernelTester()
21079             .mr(1)
21080             .nr(4)
21081             .kr(8)
21082             .sr(1)
21083             .m(m)
21084             .n(n)
21085             .k(k)
21086             .cm_stride(7)
21087             .iterations(1)
21088             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21089         }
21090       }
21091     }
21092   }
21093 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,a_offset)21094   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, a_offset) {
21095     TEST_REQUIRES_X86_AVX;
21096     for (size_t k = 1; k <= 40; k += 9) {
21097       GemmMicrokernelTester()
21098         .mr(1)
21099         .nr(4)
21100         .kr(8)
21101         .sr(1)
21102         .m(1)
21103         .n(4)
21104         .k(k)
21105         .ks(3)
21106         .a_offset(43)
21107         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21108     }
21109   }
21110 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,zero)21111   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, zero) {
21112     TEST_REQUIRES_X86_AVX;
21113     for (size_t k = 1; k <= 40; k += 9) {
21114       for (uint32_t mz = 0; mz < 1; mz++) {
21115         GemmMicrokernelTester()
21116           .mr(1)
21117           .nr(4)
21118           .kr(8)
21119           .sr(1)
21120           .m(1)
21121           .n(4)
21122           .k(k)
21123           .ks(3)
21124           .a_offset(43)
21125           .zero_index(mz)
21126           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21127       }
21128     }
21129   }
21130 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmin)21131   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
21132     TEST_REQUIRES_X86_AVX;
21133     GemmMicrokernelTester()
21134       .mr(1)
21135       .nr(4)
21136       .kr(8)
21137       .sr(1)
21138       .m(1)
21139       .n(4)
21140       .k(8)
21141       .qmin(128)
21142       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21143   }
21144 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmax)21145   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
21146     TEST_REQUIRES_X86_AVX;
21147     GemmMicrokernelTester()
21148       .mr(1)
21149       .nr(4)
21150       .kr(8)
21151       .sr(1)
21152       .m(1)
21153       .n(4)
21154       .k(8)
21155       .qmax(128)
21156       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21157   }
21158 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm)21159   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
21160     TEST_REQUIRES_X86_AVX;
21161     GemmMicrokernelTester()
21162       .mr(1)
21163       .nr(4)
21164       .kr(8)
21165       .sr(1)
21166       .m(1)
21167       .n(4)
21168       .k(8)
21169       .cm_stride(7)
21170       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21171   }
21172 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_a_zero_point)21173   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_a_zero_point) {
21174     TEST_REQUIRES_X86_AVX;
21175     for (size_t k = 1; k <= 40; k += 9) {
21176       GemmMicrokernelTester()
21177         .mr(1)
21178         .nr(4)
21179         .kr(8)
21180         .sr(1)
21181         .m(1)
21182         .n(4)
21183         .k(k)
21184         .a_zero_point(0)
21185         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21186     }
21187   }
21188 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_b_zero_point)21189   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_b_zero_point) {
21190     TEST_REQUIRES_X86_AVX;
21191     for (size_t k = 1; k <= 40; k += 9) {
21192       GemmMicrokernelTester()
21193         .mr(1)
21194         .nr(4)
21195         .kr(8)
21196         .sr(1)
21197         .m(1)
21198         .n(4)
21199         .k(k)
21200         .b_zero_point(0)
21201         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21202     }
21203   }
21204 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,no_zero_point)21205   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_zero_point) {
21206     TEST_REQUIRES_X86_AVX;
21207     for (size_t k = 1; k <= 40; k += 9) {
21208       GemmMicrokernelTester()
21209         .mr(1)
21210         .nr(4)
21211         .kr(8)
21212         .sr(1)
21213         .m(1)
21214         .n(4)
21215         .k(k)
21216         .a_zero_point(0)
21217         .b_zero_point(0)
21218         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21219     }
21220   }
21221 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21222 
21223 
21224 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8)21225   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
21226     TEST_REQUIRES_X86_XOP;
21227     GemmMicrokernelTester()
21228       .mr(1)
21229       .nr(4)
21230       .kr(8)
21231       .sr(1)
21232       .m(1)
21233       .n(4)
21234       .k(8)
21235       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21236   }
21237 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cn)21238   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
21239     TEST_REQUIRES_X86_XOP;
21240     GemmMicrokernelTester()
21241       .mr(1)
21242       .nr(4)
21243       .kr(8)
21244       .sr(1)
21245       .m(1)
21246       .n(4)
21247       .k(8)
21248       .cn_stride(7)
21249       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21250   }
21251 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile)21252   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
21253     TEST_REQUIRES_X86_XOP;
21254     for (uint32_t n = 1; n <= 4; n++) {
21255       for (uint32_t m = 1; m <= 1; m++) {
21256         GemmMicrokernelTester()
21257           .mr(1)
21258           .nr(4)
21259           .kr(8)
21260           .sr(1)
21261           .m(m)
21262           .n(n)
21263           .k(8)
21264           .iterations(1)
21265           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21266       }
21267     }
21268   }
21269 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_m)21270   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
21271     TEST_REQUIRES_X86_XOP;
21272     for (uint32_t m = 1; m <= 1; m++) {
21273       GemmMicrokernelTester()
21274         .mr(1)
21275         .nr(4)
21276         .kr(8)
21277         .sr(1)
21278         .m(m)
21279         .n(4)
21280         .k(8)
21281         .iterations(1)
21282         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21283     }
21284   }
21285 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_n)21286   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
21287     TEST_REQUIRES_X86_XOP;
21288     for (uint32_t n = 1; n <= 4; n++) {
21289       GemmMicrokernelTester()
21290         .mr(1)
21291         .nr(4)
21292         .kr(8)
21293         .sr(1)
21294         .m(1)
21295         .n(n)
21296         .k(8)
21297         .iterations(1)
21298         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21299     }
21300   }
21301 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8)21302   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
21303     TEST_REQUIRES_X86_XOP;
21304     for (size_t k = 1; k < 8; k++) {
21305       GemmMicrokernelTester()
21306         .mr(1)
21307         .nr(4)
21308         .kr(8)
21309         .sr(1)
21310         .m(1)
21311         .n(4)
21312         .k(k)
21313         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21314     }
21315   }
21316 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_subtile)21317   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
21318     TEST_REQUIRES_X86_XOP;
21319     for (size_t k = 1; k < 8; k++) {
21320       for (uint32_t n = 1; n <= 4; n++) {
21321         for (uint32_t m = 1; m <= 1; m++) {
21322           GemmMicrokernelTester()
21323             .mr(1)
21324             .nr(4)
21325             .kr(8)
21326             .sr(1)
21327             .m(m)
21328             .n(n)
21329             .k(k)
21330             .iterations(1)
21331             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21332         }
21333       }
21334     }
21335   }
21336 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8)21337   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
21338     TEST_REQUIRES_X86_XOP;
21339     for (size_t k = 9; k < 16; k++) {
21340       GemmMicrokernelTester()
21341         .mr(1)
21342         .nr(4)
21343         .kr(8)
21344         .sr(1)
21345         .m(1)
21346         .n(4)
21347         .k(k)
21348         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21349     }
21350   }
21351 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_subtile)21352   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
21353     TEST_REQUIRES_X86_XOP;
21354     for (size_t k = 9; k < 16; k++) {
21355       for (uint32_t n = 1; n <= 4; n++) {
21356         for (uint32_t m = 1; m <= 1; m++) {
21357           GemmMicrokernelTester()
21358             .mr(1)
21359             .nr(4)
21360             .kr(8)
21361             .sr(1)
21362             .m(m)
21363             .n(n)
21364             .k(k)
21365             .iterations(1)
21366             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21367         }
21368       }
21369     }
21370   }
21371 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8)21372   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
21373     TEST_REQUIRES_X86_XOP;
21374     for (size_t k = 16; k <= 80; k += 8) {
21375       GemmMicrokernelTester()
21376         .mr(1)
21377         .nr(4)
21378         .kr(8)
21379         .sr(1)
21380         .m(1)
21381         .n(4)
21382         .k(k)
21383         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21384     }
21385   }
21386 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_subtile)21387   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
21388     TEST_REQUIRES_X86_XOP;
21389     for (size_t k = 16; k <= 80; k += 8) {
21390       for (uint32_t n = 1; n <= 4; n++) {
21391         for (uint32_t m = 1; m <= 1; m++) {
21392           GemmMicrokernelTester()
21393             .mr(1)
21394             .nr(4)
21395             .kr(8)
21396             .sr(1)
21397             .m(m)
21398             .n(n)
21399             .k(k)
21400             .iterations(1)
21401             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21402         }
21403       }
21404     }
21405   }
21406 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4)21407   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
21408     TEST_REQUIRES_X86_XOP;
21409     for (uint32_t n = 5; n < 8; n++) {
21410       for (size_t k = 1; k <= 40; k += 9) {
21411         GemmMicrokernelTester()
21412           .mr(1)
21413           .nr(4)
21414           .kr(8)
21415           .sr(1)
21416           .m(1)
21417           .n(n)
21418           .k(k)
21419           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21420       }
21421     }
21422   }
21423 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_cn)21424   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
21425     TEST_REQUIRES_X86_XOP;
21426     for (uint32_t n = 5; n < 8; n++) {
21427       for (size_t k = 1; k <= 40; k += 9) {
21428         GemmMicrokernelTester()
21429           .mr(1)
21430           .nr(4)
21431           .kr(8)
21432           .sr(1)
21433           .m(1)
21434           .n(n)
21435           .k(k)
21436           .cn_stride(7)
21437           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21438       }
21439     }
21440   }
21441 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_subtile)21442   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
21443     TEST_REQUIRES_X86_XOP;
21444     for (uint32_t n = 5; n < 8; n++) {
21445       for (size_t k = 1; k <= 40; k += 9) {
21446         for (uint32_t m = 1; m <= 1; m++) {
21447           GemmMicrokernelTester()
21448             .mr(1)
21449             .nr(4)
21450             .kr(8)
21451             .sr(1)
21452             .m(m)
21453             .n(n)
21454             .k(k)
21455             .iterations(1)
21456             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21457         }
21458       }
21459     }
21460   }
21461 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4)21462   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
21463     TEST_REQUIRES_X86_XOP;
21464     for (uint32_t n = 8; n <= 12; n += 4) {
21465       for (size_t k = 1; k <= 40; k += 9) {
21466         GemmMicrokernelTester()
21467           .mr(1)
21468           .nr(4)
21469           .kr(8)
21470           .sr(1)
21471           .m(1)
21472           .n(n)
21473           .k(k)
21474           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21475       }
21476     }
21477   }
21478 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_cn)21479   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
21480     TEST_REQUIRES_X86_XOP;
21481     for (uint32_t n = 8; n <= 12; n += 4) {
21482       for (size_t k = 1; k <= 40; k += 9) {
21483         GemmMicrokernelTester()
21484           .mr(1)
21485           .nr(4)
21486           .kr(8)
21487           .sr(1)
21488           .m(1)
21489           .n(n)
21490           .k(k)
21491           .cn_stride(7)
21492           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21493       }
21494     }
21495   }
21496 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_subtile)21497   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
21498     TEST_REQUIRES_X86_XOP;
21499     for (uint32_t n = 8; n <= 12; n += 4) {
21500       for (size_t k = 1; k <= 40; k += 9) {
21501         for (uint32_t m = 1; m <= 1; m++) {
21502           GemmMicrokernelTester()
21503             .mr(1)
21504             .nr(4)
21505             .kr(8)
21506             .sr(1)
21507             .m(m)
21508             .n(n)
21509             .k(k)
21510             .iterations(1)
21511             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21512         }
21513       }
21514     }
21515   }
21516 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel)21517   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel) {
21518     TEST_REQUIRES_X86_XOP;
21519     for (size_t k = 1; k <= 40; k += 9) {
21520       GemmMicrokernelTester()
21521         .mr(1)
21522         .nr(4)
21523         .kr(8)
21524         .sr(1)
21525         .m(1)
21526         .n(4)
21527         .k(k)
21528         .ks(3)
21529         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21530     }
21531   }
21532 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel_subtile)21533   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel_subtile) {
21534     TEST_REQUIRES_X86_XOP;
21535     for (size_t k = 1; k <= 40; k += 9) {
21536       for (uint32_t n = 1; n <= 4; n++) {
21537         for (uint32_t m = 1; m <= 1; m++) {
21538           GemmMicrokernelTester()
21539             .mr(1)
21540             .nr(4)
21541             .kr(8)
21542             .sr(1)
21543             .m(m)
21544             .n(n)
21545             .k(k)
21546             .ks(3)
21547             .iterations(1)
21548             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21549         }
21550       }
21551     }
21552   }
21553 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_small_kernel)21554   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
21555     TEST_REQUIRES_X86_XOP;
21556     for (uint32_t n = 5; n < 8; n++) {
21557       for (size_t k = 1; k <= 40; k += 9) {
21558         GemmMicrokernelTester()
21559           .mr(1)
21560           .nr(4)
21561           .kr(8)
21562           .sr(1)
21563           .m(1)
21564           .n(n)
21565           .k(k)
21566           .ks(3)
21567           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21568       }
21569     }
21570   }
21571 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_small_kernel)21572   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_small_kernel) {
21573     TEST_REQUIRES_X86_XOP;
21574     for (uint32_t n = 8; n <= 12; n += 4) {
21575       for (size_t k = 1; k <= 40; k += 9) {
21576         GemmMicrokernelTester()
21577           .mr(1)
21578           .nr(4)
21579           .kr(8)
21580           .sr(1)
21581           .m(1)
21582           .n(n)
21583           .k(k)
21584           .ks(3)
21585           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21586       }
21587     }
21588   }
21589 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm_subtile)21590   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
21591     TEST_REQUIRES_X86_XOP;
21592     for (size_t k = 1; k <= 40; k += 9) {
21593       for (uint32_t n = 1; n <= 4; n++) {
21594         for (uint32_t m = 1; m <= 1; m++) {
21595           GemmMicrokernelTester()
21596             .mr(1)
21597             .nr(4)
21598             .kr(8)
21599             .sr(1)
21600             .m(m)
21601             .n(n)
21602             .k(k)
21603             .cm_stride(7)
21604             .iterations(1)
21605             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21606         }
21607       }
21608     }
21609   }
21610 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,a_offset)21611   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, a_offset) {
21612     TEST_REQUIRES_X86_XOP;
21613     for (size_t k = 1; k <= 40; k += 9) {
21614       GemmMicrokernelTester()
21615         .mr(1)
21616         .nr(4)
21617         .kr(8)
21618         .sr(1)
21619         .m(1)
21620         .n(4)
21621         .k(k)
21622         .ks(3)
21623         .a_offset(43)
21624         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21625     }
21626   }
21627 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,zero)21628   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, zero) {
21629     TEST_REQUIRES_X86_XOP;
21630     for (size_t k = 1; k <= 40; k += 9) {
21631       for (uint32_t mz = 0; mz < 1; mz++) {
21632         GemmMicrokernelTester()
21633           .mr(1)
21634           .nr(4)
21635           .kr(8)
21636           .sr(1)
21637           .m(1)
21638           .n(4)
21639           .k(k)
21640           .ks(3)
21641           .a_offset(43)
21642           .zero_index(mz)
21643           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21644       }
21645     }
21646   }
21647 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmin)21648   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
21649     TEST_REQUIRES_X86_XOP;
21650     GemmMicrokernelTester()
21651       .mr(1)
21652       .nr(4)
21653       .kr(8)
21654       .sr(1)
21655       .m(1)
21656       .n(4)
21657       .k(8)
21658       .qmin(128)
21659       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21660   }
21661 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmax)21662   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
21663     TEST_REQUIRES_X86_XOP;
21664     GemmMicrokernelTester()
21665       .mr(1)
21666       .nr(4)
21667       .kr(8)
21668       .sr(1)
21669       .m(1)
21670       .n(4)
21671       .k(8)
21672       .qmax(128)
21673       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21674   }
21675 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm)21676   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
21677     TEST_REQUIRES_X86_XOP;
21678     GemmMicrokernelTester()
21679       .mr(1)
21680       .nr(4)
21681       .kr(8)
21682       .sr(1)
21683       .m(1)
21684       .n(4)
21685       .k(8)
21686       .cm_stride(7)
21687       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21688   }
21689 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_a_zero_point)21690   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_a_zero_point) {
21691     TEST_REQUIRES_X86_XOP;
21692     for (size_t k = 1; k <= 40; k += 9) {
21693       GemmMicrokernelTester()
21694         .mr(1)
21695         .nr(4)
21696         .kr(8)
21697         .sr(1)
21698         .m(1)
21699         .n(4)
21700         .k(k)
21701         .a_zero_point(0)
21702         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21703     }
21704   }
21705 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_b_zero_point)21706   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_b_zero_point) {
21707     TEST_REQUIRES_X86_XOP;
21708     for (size_t k = 1; k <= 40; k += 9) {
21709       GemmMicrokernelTester()
21710         .mr(1)
21711         .nr(4)
21712         .kr(8)
21713         .sr(1)
21714         .m(1)
21715         .n(4)
21716         .k(k)
21717         .b_zero_point(0)
21718         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21719     }
21720   }
21721 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,no_zero_point)21722   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_zero_point) {
21723     TEST_REQUIRES_X86_XOP;
21724     for (size_t k = 1; k <= 40; k += 9) {
21725       GemmMicrokernelTester()
21726         .mr(1)
21727         .nr(4)
21728         .kr(8)
21729         .sr(1)
21730         .m(1)
21731         .n(4)
21732         .k(k)
21733         .a_zero_point(0)
21734         .b_zero_point(0)
21735         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21736     }
21737   }
21738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21739 
21740 
21741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)21742   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
21743     TEST_REQUIRES_X86_SSE2;
21744     GemmMicrokernelTester()
21745       .mr(1)
21746       .nr(4)
21747       .kr(8)
21748       .sr(1)
21749       .m(1)
21750       .n(4)
21751       .k(8)
21752       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21753   }
21754 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)21755   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
21756     TEST_REQUIRES_X86_SSE2;
21757     GemmMicrokernelTester()
21758       .mr(1)
21759       .nr(4)
21760       .kr(8)
21761       .sr(1)
21762       .m(1)
21763       .n(4)
21764       .k(8)
21765       .cn_stride(7)
21766       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21767   }
21768 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)21769   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
21770     TEST_REQUIRES_X86_SSE2;
21771     for (uint32_t n = 1; n <= 4; n++) {
21772       for (uint32_t m = 1; m <= 1; m++) {
21773         GemmMicrokernelTester()
21774           .mr(1)
21775           .nr(4)
21776           .kr(8)
21777           .sr(1)
21778           .m(m)
21779           .n(n)
21780           .k(8)
21781           .iterations(1)
21782           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21783       }
21784     }
21785   }
21786 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)21787   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
21788     TEST_REQUIRES_X86_SSE2;
21789     for (uint32_t m = 1; m <= 1; m++) {
21790       GemmMicrokernelTester()
21791         .mr(1)
21792         .nr(4)
21793         .kr(8)
21794         .sr(1)
21795         .m(m)
21796         .n(4)
21797         .k(8)
21798         .iterations(1)
21799         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21800     }
21801   }
21802 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)21803   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
21804     TEST_REQUIRES_X86_SSE2;
21805     for (uint32_t n = 1; n <= 4; n++) {
21806       GemmMicrokernelTester()
21807         .mr(1)
21808         .nr(4)
21809         .kr(8)
21810         .sr(1)
21811         .m(1)
21812         .n(n)
21813         .k(8)
21814         .iterations(1)
21815         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21816     }
21817   }
21818 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)21819   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
21820     TEST_REQUIRES_X86_SSE2;
21821     for (size_t k = 1; k < 8; k++) {
21822       GemmMicrokernelTester()
21823         .mr(1)
21824         .nr(4)
21825         .kr(8)
21826         .sr(1)
21827         .m(1)
21828         .n(4)
21829         .k(k)
21830         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21831     }
21832   }
21833 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)21834   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
21835     TEST_REQUIRES_X86_SSE2;
21836     for (size_t k = 1; k < 8; k++) {
21837       for (uint32_t n = 1; n <= 4; n++) {
21838         for (uint32_t m = 1; m <= 1; m++) {
21839           GemmMicrokernelTester()
21840             .mr(1)
21841             .nr(4)
21842             .kr(8)
21843             .sr(1)
21844             .m(m)
21845             .n(n)
21846             .k(k)
21847             .iterations(1)
21848             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21849         }
21850       }
21851     }
21852   }
21853 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)21854   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
21855     TEST_REQUIRES_X86_SSE2;
21856     for (size_t k = 9; k < 16; k++) {
21857       GemmMicrokernelTester()
21858         .mr(1)
21859         .nr(4)
21860         .kr(8)
21861         .sr(1)
21862         .m(1)
21863         .n(4)
21864         .k(k)
21865         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21866     }
21867   }
21868 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)21869   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
21870     TEST_REQUIRES_X86_SSE2;
21871     for (size_t k = 9; k < 16; k++) {
21872       for (uint32_t n = 1; n <= 4; n++) {
21873         for (uint32_t m = 1; m <= 1; m++) {
21874           GemmMicrokernelTester()
21875             .mr(1)
21876             .nr(4)
21877             .kr(8)
21878             .sr(1)
21879             .m(m)
21880             .n(n)
21881             .k(k)
21882             .iterations(1)
21883             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21884         }
21885       }
21886     }
21887   }
21888 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)21889   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
21890     TEST_REQUIRES_X86_SSE2;
21891     for (size_t k = 16; k <= 80; k += 8) {
21892       GemmMicrokernelTester()
21893         .mr(1)
21894         .nr(4)
21895         .kr(8)
21896         .sr(1)
21897         .m(1)
21898         .n(4)
21899         .k(k)
21900         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21901     }
21902   }
21903 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)21904   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
21905     TEST_REQUIRES_X86_SSE2;
21906     for (size_t k = 16; k <= 80; k += 8) {
21907       for (uint32_t n = 1; n <= 4; n++) {
21908         for (uint32_t m = 1; m <= 1; m++) {
21909           GemmMicrokernelTester()
21910             .mr(1)
21911             .nr(4)
21912             .kr(8)
21913             .sr(1)
21914             .m(m)
21915             .n(n)
21916             .k(k)
21917             .iterations(1)
21918             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21919         }
21920       }
21921     }
21922   }
21923 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)21924   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
21925     TEST_REQUIRES_X86_SSE2;
21926     for (uint32_t n = 5; n < 8; n++) {
21927       for (size_t k = 1; k <= 40; k += 9) {
21928         GemmMicrokernelTester()
21929           .mr(1)
21930           .nr(4)
21931           .kr(8)
21932           .sr(1)
21933           .m(1)
21934           .n(n)
21935           .k(k)
21936           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21937       }
21938     }
21939   }
21940 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)21941   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
21942     TEST_REQUIRES_X86_SSE2;
21943     for (uint32_t n = 5; n < 8; n++) {
21944       for (size_t k = 1; k <= 40; k += 9) {
21945         GemmMicrokernelTester()
21946           .mr(1)
21947           .nr(4)
21948           .kr(8)
21949           .sr(1)
21950           .m(1)
21951           .n(n)
21952           .k(k)
21953           .cn_stride(7)
21954           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21955       }
21956     }
21957   }
21958 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)21959   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
21960     TEST_REQUIRES_X86_SSE2;
21961     for (uint32_t n = 5; n < 8; n++) {
21962       for (size_t k = 1; k <= 40; k += 9) {
21963         for (uint32_t m = 1; m <= 1; m++) {
21964           GemmMicrokernelTester()
21965             .mr(1)
21966             .nr(4)
21967             .kr(8)
21968             .sr(1)
21969             .m(m)
21970             .n(n)
21971             .k(k)
21972             .iterations(1)
21973             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21974         }
21975       }
21976     }
21977   }
21978 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)21979   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
21980     TEST_REQUIRES_X86_SSE2;
21981     for (uint32_t n = 8; n <= 12; n += 4) {
21982       for (size_t k = 1; k <= 40; k += 9) {
21983         GemmMicrokernelTester()
21984           .mr(1)
21985           .nr(4)
21986           .kr(8)
21987           .sr(1)
21988           .m(1)
21989           .n(n)
21990           .k(k)
21991           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
21992       }
21993     }
21994   }
21995 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)21996   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
21997     TEST_REQUIRES_X86_SSE2;
21998     for (uint32_t n = 8; n <= 12; n += 4) {
21999       for (size_t k = 1; k <= 40; k += 9) {
22000         GemmMicrokernelTester()
22001           .mr(1)
22002           .nr(4)
22003           .kr(8)
22004           .sr(1)
22005           .m(1)
22006           .n(n)
22007           .k(k)
22008           .cn_stride(7)
22009           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22010       }
22011     }
22012   }
22013 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)22014   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
22015     TEST_REQUIRES_X86_SSE2;
22016     for (uint32_t n = 8; n <= 12; n += 4) {
22017       for (size_t k = 1; k <= 40; k += 9) {
22018         for (uint32_t m = 1; m <= 1; m++) {
22019           GemmMicrokernelTester()
22020             .mr(1)
22021             .nr(4)
22022             .kr(8)
22023             .sr(1)
22024             .m(m)
22025             .n(n)
22026             .k(k)
22027             .iterations(1)
22028             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22029         }
22030       }
22031     }
22032   }
22033 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel)22034   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
22035     TEST_REQUIRES_X86_SSE2;
22036     for (size_t k = 1; k <= 40; k += 9) {
22037       GemmMicrokernelTester()
22038         .mr(1)
22039         .nr(4)
22040         .kr(8)
22041         .sr(1)
22042         .m(1)
22043         .n(4)
22044         .k(k)
22045         .ks(3)
22046         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22047     }
22048   }
22049 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel_subtile)22050   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
22051     TEST_REQUIRES_X86_SSE2;
22052     for (size_t k = 1; k <= 40; k += 9) {
22053       for (uint32_t n = 1; n <= 4; n++) {
22054         for (uint32_t m = 1; m <= 1; m++) {
22055           GemmMicrokernelTester()
22056             .mr(1)
22057             .nr(4)
22058             .kr(8)
22059             .sr(1)
22060             .m(m)
22061             .n(n)
22062             .k(k)
22063             .ks(3)
22064             .iterations(1)
22065             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22066         }
22067       }
22068     }
22069   }
22070 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_small_kernel)22071   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
22072     TEST_REQUIRES_X86_SSE2;
22073     for (uint32_t n = 5; n < 8; n++) {
22074       for (size_t k = 1; k <= 40; k += 9) {
22075         GemmMicrokernelTester()
22076           .mr(1)
22077           .nr(4)
22078           .kr(8)
22079           .sr(1)
22080           .m(1)
22081           .n(n)
22082           .k(k)
22083           .ks(3)
22084           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22085       }
22086     }
22087   }
22088 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_small_kernel)22089   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
22090     TEST_REQUIRES_X86_SSE2;
22091     for (uint32_t n = 8; n <= 12; n += 4) {
22092       for (size_t k = 1; k <= 40; k += 9) {
22093         GemmMicrokernelTester()
22094           .mr(1)
22095           .nr(4)
22096           .kr(8)
22097           .sr(1)
22098           .m(1)
22099           .n(n)
22100           .k(k)
22101           .ks(3)
22102           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22103       }
22104     }
22105   }
22106 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)22107   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
22108     TEST_REQUIRES_X86_SSE2;
22109     for (size_t k = 1; k <= 40; k += 9) {
22110       for (uint32_t n = 1; n <= 4; n++) {
22111         for (uint32_t m = 1; m <= 1; m++) {
22112           GemmMicrokernelTester()
22113             .mr(1)
22114             .nr(4)
22115             .kr(8)
22116             .sr(1)
22117             .m(m)
22118             .n(n)
22119             .k(k)
22120             .cm_stride(7)
22121             .iterations(1)
22122             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22123         }
22124       }
22125     }
22126   }
22127 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,a_offset)22128   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
22129     TEST_REQUIRES_X86_SSE2;
22130     for (size_t k = 1; k <= 40; k += 9) {
22131       GemmMicrokernelTester()
22132         .mr(1)
22133         .nr(4)
22134         .kr(8)
22135         .sr(1)
22136         .m(1)
22137         .n(4)
22138         .k(k)
22139         .ks(3)
22140         .a_offset(43)
22141         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22142     }
22143   }
22144 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,zero)22145   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
22146     TEST_REQUIRES_X86_SSE2;
22147     for (size_t k = 1; k <= 40; k += 9) {
22148       for (uint32_t mz = 0; mz < 1; mz++) {
22149         GemmMicrokernelTester()
22150           .mr(1)
22151           .nr(4)
22152           .kr(8)
22153           .sr(1)
22154           .m(1)
22155           .n(4)
22156           .k(k)
22157           .ks(3)
22158           .a_offset(43)
22159           .zero_index(mz)
22160           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22161       }
22162     }
22163   }
22164 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)22165   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
22166     TEST_REQUIRES_X86_SSE2;
22167     GemmMicrokernelTester()
22168       .mr(1)
22169       .nr(4)
22170       .kr(8)
22171       .sr(1)
22172       .m(1)
22173       .n(4)
22174       .k(8)
22175       .qmin(128)
22176       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22177   }
22178 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)22179   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
22180     TEST_REQUIRES_X86_SSE2;
22181     GemmMicrokernelTester()
22182       .mr(1)
22183       .nr(4)
22184       .kr(8)
22185       .sr(1)
22186       .m(1)
22187       .n(4)
22188       .k(8)
22189       .qmax(128)
22190       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22191   }
22192 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)22193   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
22194     TEST_REQUIRES_X86_SSE2;
22195     GemmMicrokernelTester()
22196       .mr(1)
22197       .nr(4)
22198       .kr(8)
22199       .sr(1)
22200       .m(1)
22201       .n(4)
22202       .k(8)
22203       .cm_stride(7)
22204       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22205   }
22206 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_a_zero_point)22207   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_a_zero_point) {
22208     TEST_REQUIRES_X86_SSE2;
22209     for (size_t k = 1; k <= 40; k += 9) {
22210       GemmMicrokernelTester()
22211         .mr(1)
22212         .nr(4)
22213         .kr(8)
22214         .sr(1)
22215         .m(1)
22216         .n(4)
22217         .k(k)
22218         .a_zero_point(0)
22219         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22220     }
22221   }
22222 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_b_zero_point)22223   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_b_zero_point) {
22224     TEST_REQUIRES_X86_SSE2;
22225     for (size_t k = 1; k <= 40; k += 9) {
22226       GemmMicrokernelTester()
22227         .mr(1)
22228         .nr(4)
22229         .kr(8)
22230         .sr(1)
22231         .m(1)
22232         .n(4)
22233         .k(k)
22234         .b_zero_point(0)
22235         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22236     }
22237   }
22238 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,no_zero_point)22239   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_zero_point) {
22240     TEST_REQUIRES_X86_SSE2;
22241     for (size_t k = 1; k <= 40; k += 9) {
22242       GemmMicrokernelTester()
22243         .mr(1)
22244         .nr(4)
22245         .kr(8)
22246         .sr(1)
22247         .m(1)
22248         .n(4)
22249         .k(k)
22250         .a_zero_point(0)
22251         .b_zero_point(0)
22252         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22253     }
22254   }
22255 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22256 
22257 
22258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)22259   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
22260     TEST_REQUIRES_X86_SSE2;
22261     GemmMicrokernelTester()
22262       .mr(2)
22263       .nr(4)
22264       .kr(8)
22265       .sr(1)
22266       .m(2)
22267       .n(4)
22268       .k(8)
22269       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22270   }
22271 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)22272   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
22273     TEST_REQUIRES_X86_SSE2;
22274     GemmMicrokernelTester()
22275       .mr(2)
22276       .nr(4)
22277       .kr(8)
22278       .sr(1)
22279       .m(2)
22280       .n(4)
22281       .k(8)
22282       .cn_stride(7)
22283       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22284   }
22285 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)22286   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
22287     TEST_REQUIRES_X86_SSE2;
22288     for (uint32_t n = 1; n <= 4; n++) {
22289       for (uint32_t m = 1; m <= 2; m++) {
22290         GemmMicrokernelTester()
22291           .mr(2)
22292           .nr(4)
22293           .kr(8)
22294           .sr(1)
22295           .m(m)
22296           .n(n)
22297           .k(8)
22298           .iterations(1)
22299           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22300       }
22301     }
22302   }
22303 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)22304   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
22305     TEST_REQUIRES_X86_SSE2;
22306     for (uint32_t m = 1; m <= 2; m++) {
22307       GemmMicrokernelTester()
22308         .mr(2)
22309         .nr(4)
22310         .kr(8)
22311         .sr(1)
22312         .m(m)
22313         .n(4)
22314         .k(8)
22315         .iterations(1)
22316         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22317     }
22318   }
22319 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)22320   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
22321     TEST_REQUIRES_X86_SSE2;
22322     for (uint32_t n = 1; n <= 4; n++) {
22323       GemmMicrokernelTester()
22324         .mr(2)
22325         .nr(4)
22326         .kr(8)
22327         .sr(1)
22328         .m(2)
22329         .n(n)
22330         .k(8)
22331         .iterations(1)
22332         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22333     }
22334   }
22335 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)22336   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
22337     TEST_REQUIRES_X86_SSE2;
22338     for (size_t k = 1; k < 8; k++) {
22339       GemmMicrokernelTester()
22340         .mr(2)
22341         .nr(4)
22342         .kr(8)
22343         .sr(1)
22344         .m(2)
22345         .n(4)
22346         .k(k)
22347         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22348     }
22349   }
22350 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)22351   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
22352     TEST_REQUIRES_X86_SSE2;
22353     for (size_t k = 1; k < 8; k++) {
22354       for (uint32_t n = 1; n <= 4; n++) {
22355         for (uint32_t m = 1; m <= 2; m++) {
22356           GemmMicrokernelTester()
22357             .mr(2)
22358             .nr(4)
22359             .kr(8)
22360             .sr(1)
22361             .m(m)
22362             .n(n)
22363             .k(k)
22364             .iterations(1)
22365             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22366         }
22367       }
22368     }
22369   }
22370 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)22371   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
22372     TEST_REQUIRES_X86_SSE2;
22373     for (size_t k = 9; k < 16; k++) {
22374       GemmMicrokernelTester()
22375         .mr(2)
22376         .nr(4)
22377         .kr(8)
22378         .sr(1)
22379         .m(2)
22380         .n(4)
22381         .k(k)
22382         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22383     }
22384   }
22385 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)22386   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
22387     TEST_REQUIRES_X86_SSE2;
22388     for (size_t k = 9; k < 16; k++) {
22389       for (uint32_t n = 1; n <= 4; n++) {
22390         for (uint32_t m = 1; m <= 2; m++) {
22391           GemmMicrokernelTester()
22392             .mr(2)
22393             .nr(4)
22394             .kr(8)
22395             .sr(1)
22396             .m(m)
22397             .n(n)
22398             .k(k)
22399             .iterations(1)
22400             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22401         }
22402       }
22403     }
22404   }
22405 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)22406   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
22407     TEST_REQUIRES_X86_SSE2;
22408     for (size_t k = 16; k <= 80; k += 8) {
22409       GemmMicrokernelTester()
22410         .mr(2)
22411         .nr(4)
22412         .kr(8)
22413         .sr(1)
22414         .m(2)
22415         .n(4)
22416         .k(k)
22417         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22418     }
22419   }
22420 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)22421   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
22422     TEST_REQUIRES_X86_SSE2;
22423     for (size_t k = 16; k <= 80; k += 8) {
22424       for (uint32_t n = 1; n <= 4; n++) {
22425         for (uint32_t m = 1; m <= 2; m++) {
22426           GemmMicrokernelTester()
22427             .mr(2)
22428             .nr(4)
22429             .kr(8)
22430             .sr(1)
22431             .m(m)
22432             .n(n)
22433             .k(k)
22434             .iterations(1)
22435             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22436         }
22437       }
22438     }
22439   }
22440 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)22441   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
22442     TEST_REQUIRES_X86_SSE2;
22443     for (uint32_t n = 5; n < 8; n++) {
22444       for (size_t k = 1; k <= 40; k += 9) {
22445         GemmMicrokernelTester()
22446           .mr(2)
22447           .nr(4)
22448           .kr(8)
22449           .sr(1)
22450           .m(2)
22451           .n(n)
22452           .k(k)
22453           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22454       }
22455     }
22456   }
22457 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)22458   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
22459     TEST_REQUIRES_X86_SSE2;
22460     for (uint32_t n = 5; n < 8; n++) {
22461       for (size_t k = 1; k <= 40; k += 9) {
22462         GemmMicrokernelTester()
22463           .mr(2)
22464           .nr(4)
22465           .kr(8)
22466           .sr(1)
22467           .m(2)
22468           .n(n)
22469           .k(k)
22470           .cn_stride(7)
22471           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22472       }
22473     }
22474   }
22475 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)22476   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
22477     TEST_REQUIRES_X86_SSE2;
22478     for (uint32_t n = 5; n < 8; n++) {
22479       for (size_t k = 1; k <= 40; k += 9) {
22480         for (uint32_t m = 1; m <= 2; m++) {
22481           GemmMicrokernelTester()
22482             .mr(2)
22483             .nr(4)
22484             .kr(8)
22485             .sr(1)
22486             .m(m)
22487             .n(n)
22488             .k(k)
22489             .iterations(1)
22490             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22491         }
22492       }
22493     }
22494   }
22495 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)22496   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
22497     TEST_REQUIRES_X86_SSE2;
22498     for (uint32_t n = 8; n <= 12; n += 4) {
22499       for (size_t k = 1; k <= 40; k += 9) {
22500         GemmMicrokernelTester()
22501           .mr(2)
22502           .nr(4)
22503           .kr(8)
22504           .sr(1)
22505           .m(2)
22506           .n(n)
22507           .k(k)
22508           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22509       }
22510     }
22511   }
22512 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)22513   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
22514     TEST_REQUIRES_X86_SSE2;
22515     for (uint32_t n = 8; n <= 12; n += 4) {
22516       for (size_t k = 1; k <= 40; k += 9) {
22517         GemmMicrokernelTester()
22518           .mr(2)
22519           .nr(4)
22520           .kr(8)
22521           .sr(1)
22522           .m(2)
22523           .n(n)
22524           .k(k)
22525           .cn_stride(7)
22526           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22527       }
22528     }
22529   }
22530 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)22531   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
22532     TEST_REQUIRES_X86_SSE2;
22533     for (uint32_t n = 8; n <= 12; n += 4) {
22534       for (size_t k = 1; k <= 40; k += 9) {
22535         for (uint32_t m = 1; m <= 2; m++) {
22536           GemmMicrokernelTester()
22537             .mr(2)
22538             .nr(4)
22539             .kr(8)
22540             .sr(1)
22541             .m(m)
22542             .n(n)
22543             .k(k)
22544             .iterations(1)
22545             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22546         }
22547       }
22548     }
22549   }
22550 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)22551   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
22552     TEST_REQUIRES_X86_SSE2;
22553     for (size_t k = 1; k <= 40; k += 9) {
22554       GemmMicrokernelTester()
22555         .mr(2)
22556         .nr(4)
22557         .kr(8)
22558         .sr(1)
22559         .m(2)
22560         .n(4)
22561         .k(k)
22562         .ks(3)
22563         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22564     }
22565   }
22566 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)22567   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
22568     TEST_REQUIRES_X86_SSE2;
22569     for (size_t k = 1; k <= 40; k += 9) {
22570       for (uint32_t n = 1; n <= 4; n++) {
22571         for (uint32_t m = 1; m <= 2; m++) {
22572           GemmMicrokernelTester()
22573             .mr(2)
22574             .nr(4)
22575             .kr(8)
22576             .sr(1)
22577             .m(m)
22578             .n(n)
22579             .k(k)
22580             .ks(3)
22581             .iterations(1)
22582             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22583         }
22584       }
22585     }
22586   }
22587 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)22588   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
22589     TEST_REQUIRES_X86_SSE2;
22590     for (uint32_t n = 5; n < 8; n++) {
22591       for (size_t k = 1; k <= 40; k += 9) {
22592         GemmMicrokernelTester()
22593           .mr(2)
22594           .nr(4)
22595           .kr(8)
22596           .sr(1)
22597           .m(2)
22598           .n(n)
22599           .k(k)
22600           .ks(3)
22601           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22602       }
22603     }
22604   }
22605 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)22606   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
22607     TEST_REQUIRES_X86_SSE2;
22608     for (uint32_t n = 8; n <= 12; n += 4) {
22609       for (size_t k = 1; k <= 40; k += 9) {
22610         GemmMicrokernelTester()
22611           .mr(2)
22612           .nr(4)
22613           .kr(8)
22614           .sr(1)
22615           .m(2)
22616           .n(n)
22617           .k(k)
22618           .ks(3)
22619           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22620       }
22621     }
22622   }
22623 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)22624   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
22625     TEST_REQUIRES_X86_SSE2;
22626     for (size_t k = 1; k <= 40; k += 9) {
22627       for (uint32_t n = 1; n <= 4; n++) {
22628         for (uint32_t m = 1; m <= 2; m++) {
22629           GemmMicrokernelTester()
22630             .mr(2)
22631             .nr(4)
22632             .kr(8)
22633             .sr(1)
22634             .m(m)
22635             .n(n)
22636             .k(k)
22637             .cm_stride(7)
22638             .iterations(1)
22639             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22640         }
22641       }
22642     }
22643   }
22644 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)22645   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
22646     TEST_REQUIRES_X86_SSE2;
22647     for (size_t k = 1; k <= 40; k += 9) {
22648       GemmMicrokernelTester()
22649         .mr(2)
22650         .nr(4)
22651         .kr(8)
22652         .sr(1)
22653         .m(2)
22654         .n(4)
22655         .k(k)
22656         .ks(3)
22657         .a_offset(83)
22658         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22659     }
22660   }
22661 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)22662   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
22663     TEST_REQUIRES_X86_SSE2;
22664     for (size_t k = 1; k <= 40; k += 9) {
22665       for (uint32_t mz = 0; mz < 2; mz++) {
22666         GemmMicrokernelTester()
22667           .mr(2)
22668           .nr(4)
22669           .kr(8)
22670           .sr(1)
22671           .m(2)
22672           .n(4)
22673           .k(k)
22674           .ks(3)
22675           .a_offset(83)
22676           .zero_index(mz)
22677           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22678       }
22679     }
22680   }
22681 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)22682   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
22683     TEST_REQUIRES_X86_SSE2;
22684     GemmMicrokernelTester()
22685       .mr(2)
22686       .nr(4)
22687       .kr(8)
22688       .sr(1)
22689       .m(2)
22690       .n(4)
22691       .k(8)
22692       .qmin(128)
22693       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22694   }
22695 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)22696   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
22697     TEST_REQUIRES_X86_SSE2;
22698     GemmMicrokernelTester()
22699       .mr(2)
22700       .nr(4)
22701       .kr(8)
22702       .sr(1)
22703       .m(2)
22704       .n(4)
22705       .k(8)
22706       .qmax(128)
22707       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22708   }
22709 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)22710   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
22711     TEST_REQUIRES_X86_SSE2;
22712     GemmMicrokernelTester()
22713       .mr(2)
22714       .nr(4)
22715       .kr(8)
22716       .sr(1)
22717       .m(2)
22718       .n(4)
22719       .k(8)
22720       .cm_stride(7)
22721       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22722   }
22723 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_a_zero_point)22724   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_a_zero_point) {
22725     TEST_REQUIRES_X86_SSE2;
22726     for (size_t k = 1; k <= 40; k += 9) {
22727       GemmMicrokernelTester()
22728         .mr(2)
22729         .nr(4)
22730         .kr(8)
22731         .sr(1)
22732         .m(2)
22733         .n(4)
22734         .k(k)
22735         .a_zero_point(0)
22736         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22737     }
22738   }
22739 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_b_zero_point)22740   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_b_zero_point) {
22741     TEST_REQUIRES_X86_SSE2;
22742     for (size_t k = 1; k <= 40; k += 9) {
22743       GemmMicrokernelTester()
22744         .mr(2)
22745         .nr(4)
22746         .kr(8)
22747         .sr(1)
22748         .m(2)
22749         .n(4)
22750         .k(k)
22751         .b_zero_point(0)
22752         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22753     }
22754   }
22755 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,no_zero_point)22756   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_zero_point) {
22757     TEST_REQUIRES_X86_SSE2;
22758     for (size_t k = 1; k <= 40; k += 9) {
22759       GemmMicrokernelTester()
22760         .mr(2)
22761         .nr(4)
22762         .kr(8)
22763         .sr(1)
22764         .m(2)
22765         .n(4)
22766         .k(k)
22767         .a_zero_point(0)
22768         .b_zero_point(0)
22769         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22770     }
22771   }
22772 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22773 
22774 
22775 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)22776   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
22777     TEST_REQUIRES_X86_SSE41;
22778     GemmMicrokernelTester()
22779       .mr(3)
22780       .nr(4)
22781       .kr(8)
22782       .sr(1)
22783       .m(3)
22784       .n(4)
22785       .k(8)
22786       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22787   }
22788 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)22789   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
22790     TEST_REQUIRES_X86_SSE41;
22791     GemmMicrokernelTester()
22792       .mr(3)
22793       .nr(4)
22794       .kr(8)
22795       .sr(1)
22796       .m(3)
22797       .n(4)
22798       .k(8)
22799       .cn_stride(7)
22800       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22801   }
22802 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)22803   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
22804     TEST_REQUIRES_X86_SSE41;
22805     for (uint32_t n = 1; n <= 4; n++) {
22806       for (uint32_t m = 1; m <= 3; m++) {
22807         GemmMicrokernelTester()
22808           .mr(3)
22809           .nr(4)
22810           .kr(8)
22811           .sr(1)
22812           .m(m)
22813           .n(n)
22814           .k(8)
22815           .iterations(1)
22816           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22817       }
22818     }
22819   }
22820 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)22821   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
22822     TEST_REQUIRES_X86_SSE41;
22823     for (uint32_t m = 1; m <= 3; m++) {
22824       GemmMicrokernelTester()
22825         .mr(3)
22826         .nr(4)
22827         .kr(8)
22828         .sr(1)
22829         .m(m)
22830         .n(4)
22831         .k(8)
22832         .iterations(1)
22833         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22834     }
22835   }
22836 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)22837   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
22838     TEST_REQUIRES_X86_SSE41;
22839     for (uint32_t n = 1; n <= 4; n++) {
22840       GemmMicrokernelTester()
22841         .mr(3)
22842         .nr(4)
22843         .kr(8)
22844         .sr(1)
22845         .m(3)
22846         .n(n)
22847         .k(8)
22848         .iterations(1)
22849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22850     }
22851   }
22852 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)22853   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
22854     TEST_REQUIRES_X86_SSE41;
22855     for (size_t k = 1; k < 8; k++) {
22856       GemmMicrokernelTester()
22857         .mr(3)
22858         .nr(4)
22859         .kr(8)
22860         .sr(1)
22861         .m(3)
22862         .n(4)
22863         .k(k)
22864         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22865     }
22866   }
22867 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)22868   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
22869     TEST_REQUIRES_X86_SSE41;
22870     for (size_t k = 1; k < 8; k++) {
22871       for (uint32_t n = 1; n <= 4; n++) {
22872         for (uint32_t m = 1; m <= 3; m++) {
22873           GemmMicrokernelTester()
22874             .mr(3)
22875             .nr(4)
22876             .kr(8)
22877             .sr(1)
22878             .m(m)
22879             .n(n)
22880             .k(k)
22881             .iterations(1)
22882             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22883         }
22884       }
22885     }
22886   }
22887 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)22888   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
22889     TEST_REQUIRES_X86_SSE41;
22890     for (size_t k = 9; k < 16; k++) {
22891       GemmMicrokernelTester()
22892         .mr(3)
22893         .nr(4)
22894         .kr(8)
22895         .sr(1)
22896         .m(3)
22897         .n(4)
22898         .k(k)
22899         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22900     }
22901   }
22902 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)22903   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
22904     TEST_REQUIRES_X86_SSE41;
22905     for (size_t k = 9; k < 16; k++) {
22906       for (uint32_t n = 1; n <= 4; n++) {
22907         for (uint32_t m = 1; m <= 3; m++) {
22908           GemmMicrokernelTester()
22909             .mr(3)
22910             .nr(4)
22911             .kr(8)
22912             .sr(1)
22913             .m(m)
22914             .n(n)
22915             .k(k)
22916             .iterations(1)
22917             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22918         }
22919       }
22920     }
22921   }
22922 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)22923   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
22924     TEST_REQUIRES_X86_SSE41;
22925     for (size_t k = 16; k <= 80; k += 8) {
22926       GemmMicrokernelTester()
22927         .mr(3)
22928         .nr(4)
22929         .kr(8)
22930         .sr(1)
22931         .m(3)
22932         .n(4)
22933         .k(k)
22934         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22935     }
22936   }
22937 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)22938   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
22939     TEST_REQUIRES_X86_SSE41;
22940     for (size_t k = 16; k <= 80; k += 8) {
22941       for (uint32_t n = 1; n <= 4; n++) {
22942         for (uint32_t m = 1; m <= 3; m++) {
22943           GemmMicrokernelTester()
22944             .mr(3)
22945             .nr(4)
22946             .kr(8)
22947             .sr(1)
22948             .m(m)
22949             .n(n)
22950             .k(k)
22951             .iterations(1)
22952             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22953         }
22954       }
22955     }
22956   }
22957 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)22958   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
22959     TEST_REQUIRES_X86_SSE41;
22960     for (uint32_t n = 5; n < 8; n++) {
22961       for (size_t k = 1; k <= 40; k += 9) {
22962         GemmMicrokernelTester()
22963           .mr(3)
22964           .nr(4)
22965           .kr(8)
22966           .sr(1)
22967           .m(3)
22968           .n(n)
22969           .k(k)
22970           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22971       }
22972     }
22973   }
22974 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)22975   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
22976     TEST_REQUIRES_X86_SSE41;
22977     for (uint32_t n = 5; n < 8; n++) {
22978       for (size_t k = 1; k <= 40; k += 9) {
22979         GemmMicrokernelTester()
22980           .mr(3)
22981           .nr(4)
22982           .kr(8)
22983           .sr(1)
22984           .m(3)
22985           .n(n)
22986           .k(k)
22987           .cn_stride(7)
22988           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
22989       }
22990     }
22991   }
22992 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)22993   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
22994     TEST_REQUIRES_X86_SSE41;
22995     for (uint32_t n = 5; n < 8; n++) {
22996       for (size_t k = 1; k <= 40; k += 9) {
22997         for (uint32_t m = 1; m <= 3; m++) {
22998           GemmMicrokernelTester()
22999             .mr(3)
23000             .nr(4)
23001             .kr(8)
23002             .sr(1)
23003             .m(m)
23004             .n(n)
23005             .k(k)
23006             .iterations(1)
23007             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23008         }
23009       }
23010     }
23011   }
23012 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)23013   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
23014     TEST_REQUIRES_X86_SSE41;
23015     for (uint32_t n = 8; n <= 12; n += 4) {
23016       for (size_t k = 1; k <= 40; k += 9) {
23017         GemmMicrokernelTester()
23018           .mr(3)
23019           .nr(4)
23020           .kr(8)
23021           .sr(1)
23022           .m(3)
23023           .n(n)
23024           .k(k)
23025           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23026       }
23027     }
23028   }
23029 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)23030   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
23031     TEST_REQUIRES_X86_SSE41;
23032     for (uint32_t n = 8; n <= 12; n += 4) {
23033       for (size_t k = 1; k <= 40; k += 9) {
23034         GemmMicrokernelTester()
23035           .mr(3)
23036           .nr(4)
23037           .kr(8)
23038           .sr(1)
23039           .m(3)
23040           .n(n)
23041           .k(k)
23042           .cn_stride(7)
23043           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23044       }
23045     }
23046   }
23047 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)23048   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
23049     TEST_REQUIRES_X86_SSE41;
23050     for (uint32_t n = 8; n <= 12; n += 4) {
23051       for (size_t k = 1; k <= 40; k += 9) {
23052         for (uint32_t m = 1; m <= 3; m++) {
23053           GemmMicrokernelTester()
23054             .mr(3)
23055             .nr(4)
23056             .kr(8)
23057             .sr(1)
23058             .m(m)
23059             .n(n)
23060             .k(k)
23061             .iterations(1)
23062             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23063         }
23064       }
23065     }
23066   }
23067 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)23068   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
23069     TEST_REQUIRES_X86_SSE41;
23070     for (size_t k = 1; k <= 40; k += 9) {
23071       GemmMicrokernelTester()
23072         .mr(3)
23073         .nr(4)
23074         .kr(8)
23075         .sr(1)
23076         .m(3)
23077         .n(4)
23078         .k(k)
23079         .ks(3)
23080         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23081     }
23082   }
23083 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)23084   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
23085     TEST_REQUIRES_X86_SSE41;
23086     for (size_t k = 1; k <= 40; k += 9) {
23087       for (uint32_t n = 1; n <= 4; n++) {
23088         for (uint32_t m = 1; m <= 3; m++) {
23089           GemmMicrokernelTester()
23090             .mr(3)
23091             .nr(4)
23092             .kr(8)
23093             .sr(1)
23094             .m(m)
23095             .n(n)
23096             .k(k)
23097             .ks(3)
23098             .iterations(1)
23099             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23100         }
23101       }
23102     }
23103   }
23104 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)23105   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
23106     TEST_REQUIRES_X86_SSE41;
23107     for (uint32_t n = 5; n < 8; n++) {
23108       for (size_t k = 1; k <= 40; k += 9) {
23109         GemmMicrokernelTester()
23110           .mr(3)
23111           .nr(4)
23112           .kr(8)
23113           .sr(1)
23114           .m(3)
23115           .n(n)
23116           .k(k)
23117           .ks(3)
23118           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23119       }
23120     }
23121   }
23122 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)23123   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
23124     TEST_REQUIRES_X86_SSE41;
23125     for (uint32_t n = 8; n <= 12; n += 4) {
23126       for (size_t k = 1; k <= 40; k += 9) {
23127         GemmMicrokernelTester()
23128           .mr(3)
23129           .nr(4)
23130           .kr(8)
23131           .sr(1)
23132           .m(3)
23133           .n(n)
23134           .k(k)
23135           .ks(3)
23136           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23137       }
23138     }
23139   }
23140 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)23141   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
23142     TEST_REQUIRES_X86_SSE41;
23143     for (size_t k = 1; k <= 40; k += 9) {
23144       for (uint32_t n = 1; n <= 4; n++) {
23145         for (uint32_t m = 1; m <= 3; m++) {
23146           GemmMicrokernelTester()
23147             .mr(3)
23148             .nr(4)
23149             .kr(8)
23150             .sr(1)
23151             .m(m)
23152             .n(n)
23153             .k(k)
23154             .cm_stride(7)
23155             .iterations(1)
23156             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23157         }
23158       }
23159     }
23160   }
23161 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)23162   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
23163     TEST_REQUIRES_X86_SSE41;
23164     for (size_t k = 1; k <= 40; k += 9) {
23165       GemmMicrokernelTester()
23166         .mr(3)
23167         .nr(4)
23168         .kr(8)
23169         .sr(1)
23170         .m(3)
23171         .n(4)
23172         .k(k)
23173         .ks(3)
23174         .a_offset(127)
23175         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23176     }
23177   }
23178 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)23179   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
23180     TEST_REQUIRES_X86_SSE41;
23181     for (size_t k = 1; k <= 40; k += 9) {
23182       for (uint32_t mz = 0; mz < 3; mz++) {
23183         GemmMicrokernelTester()
23184           .mr(3)
23185           .nr(4)
23186           .kr(8)
23187           .sr(1)
23188           .m(3)
23189           .n(4)
23190           .k(k)
23191           .ks(3)
23192           .a_offset(127)
23193           .zero_index(mz)
23194           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23195       }
23196     }
23197   }
23198 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)23199   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
23200     TEST_REQUIRES_X86_SSE41;
23201     GemmMicrokernelTester()
23202       .mr(3)
23203       .nr(4)
23204       .kr(8)
23205       .sr(1)
23206       .m(3)
23207       .n(4)
23208       .k(8)
23209       .qmin(128)
23210       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23211   }
23212 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)23213   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
23214     TEST_REQUIRES_X86_SSE41;
23215     GemmMicrokernelTester()
23216       .mr(3)
23217       .nr(4)
23218       .kr(8)
23219       .sr(1)
23220       .m(3)
23221       .n(4)
23222       .k(8)
23223       .qmax(128)
23224       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23225   }
23226 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)23227   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
23228     TEST_REQUIRES_X86_SSE41;
23229     GemmMicrokernelTester()
23230       .mr(3)
23231       .nr(4)
23232       .kr(8)
23233       .sr(1)
23234       .m(3)
23235       .n(4)
23236       .k(8)
23237       .cm_stride(7)
23238       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23239   }
23240 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_a_zero_point)23241   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_a_zero_point) {
23242     TEST_REQUIRES_X86_SSE41;
23243     for (size_t k = 1; k <= 40; k += 9) {
23244       GemmMicrokernelTester()
23245         .mr(3)
23246         .nr(4)
23247         .kr(8)
23248         .sr(1)
23249         .m(3)
23250         .n(4)
23251         .k(k)
23252         .a_zero_point(0)
23253         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23254     }
23255   }
23256 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_b_zero_point)23257   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_b_zero_point) {
23258     TEST_REQUIRES_X86_SSE41;
23259     for (size_t k = 1; k <= 40; k += 9) {
23260       GemmMicrokernelTester()
23261         .mr(3)
23262         .nr(4)
23263         .kr(8)
23264         .sr(1)
23265         .m(3)
23266         .n(4)
23267         .k(k)
23268         .b_zero_point(0)
23269         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23270     }
23271   }
23272 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,no_zero_point)23273   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_zero_point) {
23274     TEST_REQUIRES_X86_SSE41;
23275     for (size_t k = 1; k <= 40; k += 9) {
23276       GemmMicrokernelTester()
23277         .mr(3)
23278         .nr(4)
23279         .kr(8)
23280         .sr(1)
23281         .m(3)
23282         .n(4)
23283         .k(k)
23284         .a_zero_point(0)
23285         .b_zero_point(0)
23286         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23287     }
23288   }
23289 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23290 
23291 
23292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8)23293   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
23294     TEST_REQUIRES_X86_XOP;
23295     GemmMicrokernelTester()
23296       .mr(1)
23297       .nr(4)
23298       .kr(8)
23299       .sr(1)
23300       .m(1)
23301       .n(4)
23302       .k(8)
23303       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23304   }
23305 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cn)23306   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
23307     TEST_REQUIRES_X86_XOP;
23308     GemmMicrokernelTester()
23309       .mr(1)
23310       .nr(4)
23311       .kr(8)
23312       .sr(1)
23313       .m(1)
23314       .n(4)
23315       .k(8)
23316       .cn_stride(7)
23317       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23318   }
23319 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile)23320   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
23321     TEST_REQUIRES_X86_XOP;
23322     for (uint32_t n = 1; n <= 4; n++) {
23323       for (uint32_t m = 1; m <= 1; m++) {
23324         GemmMicrokernelTester()
23325           .mr(1)
23326           .nr(4)
23327           .kr(8)
23328           .sr(1)
23329           .m(m)
23330           .n(n)
23331           .k(8)
23332           .iterations(1)
23333           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23334       }
23335     }
23336   }
23337 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_m)23338   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
23339     TEST_REQUIRES_X86_XOP;
23340     for (uint32_t m = 1; m <= 1; m++) {
23341       GemmMicrokernelTester()
23342         .mr(1)
23343         .nr(4)
23344         .kr(8)
23345         .sr(1)
23346         .m(m)
23347         .n(4)
23348         .k(8)
23349         .iterations(1)
23350         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23351     }
23352   }
23353 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_n)23354   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
23355     TEST_REQUIRES_X86_XOP;
23356     for (uint32_t n = 1; n <= 4; n++) {
23357       GemmMicrokernelTester()
23358         .mr(1)
23359         .nr(4)
23360         .kr(8)
23361         .sr(1)
23362         .m(1)
23363         .n(n)
23364         .k(8)
23365         .iterations(1)
23366         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23367     }
23368   }
23369 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8)23370   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
23371     TEST_REQUIRES_X86_XOP;
23372     for (size_t k = 1; k < 8; k++) {
23373       GemmMicrokernelTester()
23374         .mr(1)
23375         .nr(4)
23376         .kr(8)
23377         .sr(1)
23378         .m(1)
23379         .n(4)
23380         .k(k)
23381         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23382     }
23383   }
23384 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_subtile)23385   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
23386     TEST_REQUIRES_X86_XOP;
23387     for (size_t k = 1; k < 8; k++) {
23388       for (uint32_t n = 1; n <= 4; n++) {
23389         for (uint32_t m = 1; m <= 1; m++) {
23390           GemmMicrokernelTester()
23391             .mr(1)
23392             .nr(4)
23393             .kr(8)
23394             .sr(1)
23395             .m(m)
23396             .n(n)
23397             .k(k)
23398             .iterations(1)
23399             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23400         }
23401       }
23402     }
23403   }
23404 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8)23405   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
23406     TEST_REQUIRES_X86_XOP;
23407     for (size_t k = 9; k < 16; k++) {
23408       GemmMicrokernelTester()
23409         .mr(1)
23410         .nr(4)
23411         .kr(8)
23412         .sr(1)
23413         .m(1)
23414         .n(4)
23415         .k(k)
23416         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23417     }
23418   }
23419 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_subtile)23420   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
23421     TEST_REQUIRES_X86_XOP;
23422     for (size_t k = 9; k < 16; k++) {
23423       for (uint32_t n = 1; n <= 4; n++) {
23424         for (uint32_t m = 1; m <= 1; m++) {
23425           GemmMicrokernelTester()
23426             .mr(1)
23427             .nr(4)
23428             .kr(8)
23429             .sr(1)
23430             .m(m)
23431             .n(n)
23432             .k(k)
23433             .iterations(1)
23434             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23435         }
23436       }
23437     }
23438   }
23439 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8)23440   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
23441     TEST_REQUIRES_X86_XOP;
23442     for (size_t k = 16; k <= 80; k += 8) {
23443       GemmMicrokernelTester()
23444         .mr(1)
23445         .nr(4)
23446         .kr(8)
23447         .sr(1)
23448         .m(1)
23449         .n(4)
23450         .k(k)
23451         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23452     }
23453   }
23454 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_subtile)23455   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
23456     TEST_REQUIRES_X86_XOP;
23457     for (size_t k = 16; k <= 80; k += 8) {
23458       for (uint32_t n = 1; n <= 4; n++) {
23459         for (uint32_t m = 1; m <= 1; m++) {
23460           GemmMicrokernelTester()
23461             .mr(1)
23462             .nr(4)
23463             .kr(8)
23464             .sr(1)
23465             .m(m)
23466             .n(n)
23467             .k(k)
23468             .iterations(1)
23469             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23470         }
23471       }
23472     }
23473   }
23474 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4)23475   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
23476     TEST_REQUIRES_X86_XOP;
23477     for (uint32_t n = 5; n < 8; n++) {
23478       for (size_t k = 1; k <= 40; k += 9) {
23479         GemmMicrokernelTester()
23480           .mr(1)
23481           .nr(4)
23482           .kr(8)
23483           .sr(1)
23484           .m(1)
23485           .n(n)
23486           .k(k)
23487           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23488       }
23489     }
23490   }
23491 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_cn)23492   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
23493     TEST_REQUIRES_X86_XOP;
23494     for (uint32_t n = 5; n < 8; n++) {
23495       for (size_t k = 1; k <= 40; k += 9) {
23496         GemmMicrokernelTester()
23497           .mr(1)
23498           .nr(4)
23499           .kr(8)
23500           .sr(1)
23501           .m(1)
23502           .n(n)
23503           .k(k)
23504           .cn_stride(7)
23505           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23506       }
23507     }
23508   }
23509 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_subtile)23510   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
23511     TEST_REQUIRES_X86_XOP;
23512     for (uint32_t n = 5; n < 8; n++) {
23513       for (size_t k = 1; k <= 40; k += 9) {
23514         for (uint32_t m = 1; m <= 1; m++) {
23515           GemmMicrokernelTester()
23516             .mr(1)
23517             .nr(4)
23518             .kr(8)
23519             .sr(1)
23520             .m(m)
23521             .n(n)
23522             .k(k)
23523             .iterations(1)
23524             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23525         }
23526       }
23527     }
23528   }
23529 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4)23530   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
23531     TEST_REQUIRES_X86_XOP;
23532     for (uint32_t n = 8; n <= 12; n += 4) {
23533       for (size_t k = 1; k <= 40; k += 9) {
23534         GemmMicrokernelTester()
23535           .mr(1)
23536           .nr(4)
23537           .kr(8)
23538           .sr(1)
23539           .m(1)
23540           .n(n)
23541           .k(k)
23542           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23543       }
23544     }
23545   }
23546 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_cn)23547   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
23548     TEST_REQUIRES_X86_XOP;
23549     for (uint32_t n = 8; n <= 12; n += 4) {
23550       for (size_t k = 1; k <= 40; k += 9) {
23551         GemmMicrokernelTester()
23552           .mr(1)
23553           .nr(4)
23554           .kr(8)
23555           .sr(1)
23556           .m(1)
23557           .n(n)
23558           .k(k)
23559           .cn_stride(7)
23560           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23561       }
23562     }
23563   }
23564 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_subtile)23565   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
23566     TEST_REQUIRES_X86_XOP;
23567     for (uint32_t n = 8; n <= 12; n += 4) {
23568       for (size_t k = 1; k <= 40; k += 9) {
23569         for (uint32_t m = 1; m <= 1; m++) {
23570           GemmMicrokernelTester()
23571             .mr(1)
23572             .nr(4)
23573             .kr(8)
23574             .sr(1)
23575             .m(m)
23576             .n(n)
23577             .k(k)
23578             .iterations(1)
23579             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23580         }
23581       }
23582     }
23583   }
23584 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel)23585   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel) {
23586     TEST_REQUIRES_X86_XOP;
23587     for (size_t k = 1; k <= 40; k += 9) {
23588       GemmMicrokernelTester()
23589         .mr(1)
23590         .nr(4)
23591         .kr(8)
23592         .sr(1)
23593         .m(1)
23594         .n(4)
23595         .k(k)
23596         .ks(3)
23597         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23598     }
23599   }
23600 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel_subtile)23601   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel_subtile) {
23602     TEST_REQUIRES_X86_XOP;
23603     for (size_t k = 1; k <= 40; k += 9) {
23604       for (uint32_t n = 1; n <= 4; n++) {
23605         for (uint32_t m = 1; m <= 1; m++) {
23606           GemmMicrokernelTester()
23607             .mr(1)
23608             .nr(4)
23609             .kr(8)
23610             .sr(1)
23611             .m(m)
23612             .n(n)
23613             .k(k)
23614             .ks(3)
23615             .iterations(1)
23616             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23617         }
23618       }
23619     }
23620   }
23621 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_small_kernel)23622   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
23623     TEST_REQUIRES_X86_XOP;
23624     for (uint32_t n = 5; n < 8; n++) {
23625       for (size_t k = 1; k <= 40; k += 9) {
23626         GemmMicrokernelTester()
23627           .mr(1)
23628           .nr(4)
23629           .kr(8)
23630           .sr(1)
23631           .m(1)
23632           .n(n)
23633           .k(k)
23634           .ks(3)
23635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23636       }
23637     }
23638   }
23639 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_small_kernel)23640   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_small_kernel) {
23641     TEST_REQUIRES_X86_XOP;
23642     for (uint32_t n = 8; n <= 12; n += 4) {
23643       for (size_t k = 1; k <= 40; k += 9) {
23644         GemmMicrokernelTester()
23645           .mr(1)
23646           .nr(4)
23647           .kr(8)
23648           .sr(1)
23649           .m(1)
23650           .n(n)
23651           .k(k)
23652           .ks(3)
23653           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23654       }
23655     }
23656   }
23657 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm_subtile)23658   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
23659     TEST_REQUIRES_X86_XOP;
23660     for (size_t k = 1; k <= 40; k += 9) {
23661       for (uint32_t n = 1; n <= 4; n++) {
23662         for (uint32_t m = 1; m <= 1; m++) {
23663           GemmMicrokernelTester()
23664             .mr(1)
23665             .nr(4)
23666             .kr(8)
23667             .sr(1)
23668             .m(m)
23669             .n(n)
23670             .k(k)
23671             .cm_stride(7)
23672             .iterations(1)
23673             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23674         }
23675       }
23676     }
23677   }
23678 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,a_offset)23679   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, a_offset) {
23680     TEST_REQUIRES_X86_XOP;
23681     for (size_t k = 1; k <= 40; k += 9) {
23682       GemmMicrokernelTester()
23683         .mr(1)
23684         .nr(4)
23685         .kr(8)
23686         .sr(1)
23687         .m(1)
23688         .n(4)
23689         .k(k)
23690         .ks(3)
23691         .a_offset(43)
23692         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23693     }
23694   }
23695 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,zero)23696   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, zero) {
23697     TEST_REQUIRES_X86_XOP;
23698     for (size_t k = 1; k <= 40; k += 9) {
23699       for (uint32_t mz = 0; mz < 1; mz++) {
23700         GemmMicrokernelTester()
23701           .mr(1)
23702           .nr(4)
23703           .kr(8)
23704           .sr(1)
23705           .m(1)
23706           .n(4)
23707           .k(k)
23708           .ks(3)
23709           .a_offset(43)
23710           .zero_index(mz)
23711           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23712       }
23713     }
23714   }
23715 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmin)23716   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
23717     TEST_REQUIRES_X86_XOP;
23718     GemmMicrokernelTester()
23719       .mr(1)
23720       .nr(4)
23721       .kr(8)
23722       .sr(1)
23723       .m(1)
23724       .n(4)
23725       .k(8)
23726       .qmin(128)
23727       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23728   }
23729 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmax)23730   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
23731     TEST_REQUIRES_X86_XOP;
23732     GemmMicrokernelTester()
23733       .mr(1)
23734       .nr(4)
23735       .kr(8)
23736       .sr(1)
23737       .m(1)
23738       .n(4)
23739       .k(8)
23740       .qmax(128)
23741       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23742   }
23743 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm)23744   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
23745     TEST_REQUIRES_X86_XOP;
23746     GemmMicrokernelTester()
23747       .mr(1)
23748       .nr(4)
23749       .kr(8)
23750       .sr(1)
23751       .m(1)
23752       .n(4)
23753       .k(8)
23754       .cm_stride(7)
23755       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23756   }
23757 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_a_zero_point)23758   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_a_zero_point) {
23759     TEST_REQUIRES_X86_XOP;
23760     for (size_t k = 1; k <= 40; k += 9) {
23761       GemmMicrokernelTester()
23762         .mr(1)
23763         .nr(4)
23764         .kr(8)
23765         .sr(1)
23766         .m(1)
23767         .n(4)
23768         .k(k)
23769         .a_zero_point(0)
23770         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23771     }
23772   }
23773 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_b_zero_point)23774   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_b_zero_point) {
23775     TEST_REQUIRES_X86_XOP;
23776     for (size_t k = 1; k <= 40; k += 9) {
23777       GemmMicrokernelTester()
23778         .mr(1)
23779         .nr(4)
23780         .kr(8)
23781         .sr(1)
23782         .m(1)
23783         .n(4)
23784         .k(k)
23785         .b_zero_point(0)
23786         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23787     }
23788   }
23789 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,no_zero_point)23790   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_zero_point) {
23791     TEST_REQUIRES_X86_XOP;
23792     for (size_t k = 1; k <= 40; k += 9) {
23793       GemmMicrokernelTester()
23794         .mr(1)
23795         .nr(4)
23796         .kr(8)
23797         .sr(1)
23798         .m(1)
23799         .n(4)
23800         .k(k)
23801         .a_zero_point(0)
23802         .b_zero_point(0)
23803         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23804     }
23805   }
23806 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23807 
23808 
23809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8)23810   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
23811     TEST_REQUIRES_X86_XOP;
23812     GemmMicrokernelTester()
23813       .mr(2)
23814       .nr(4)
23815       .kr(8)
23816       .sr(1)
23817       .m(2)
23818       .n(4)
23819       .k(8)
23820       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23821   }
23822 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cn)23823   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
23824     TEST_REQUIRES_X86_XOP;
23825     GemmMicrokernelTester()
23826       .mr(2)
23827       .nr(4)
23828       .kr(8)
23829       .sr(1)
23830       .m(2)
23831       .n(4)
23832       .k(8)
23833       .cn_stride(7)
23834       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23835   }
23836 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile)23837   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
23838     TEST_REQUIRES_X86_XOP;
23839     for (uint32_t n = 1; n <= 4; n++) {
23840       for (uint32_t m = 1; m <= 2; m++) {
23841         GemmMicrokernelTester()
23842           .mr(2)
23843           .nr(4)
23844           .kr(8)
23845           .sr(1)
23846           .m(m)
23847           .n(n)
23848           .k(8)
23849           .iterations(1)
23850           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23851       }
23852     }
23853   }
23854 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_m)23855   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
23856     TEST_REQUIRES_X86_XOP;
23857     for (uint32_t m = 1; m <= 2; m++) {
23858       GemmMicrokernelTester()
23859         .mr(2)
23860         .nr(4)
23861         .kr(8)
23862         .sr(1)
23863         .m(m)
23864         .n(4)
23865         .k(8)
23866         .iterations(1)
23867         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23868     }
23869   }
23870 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_n)23871   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
23872     TEST_REQUIRES_X86_XOP;
23873     for (uint32_t n = 1; n <= 4; n++) {
23874       GemmMicrokernelTester()
23875         .mr(2)
23876         .nr(4)
23877         .kr(8)
23878         .sr(1)
23879         .m(2)
23880         .n(n)
23881         .k(8)
23882         .iterations(1)
23883         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23884     }
23885   }
23886 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8)23887   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
23888     TEST_REQUIRES_X86_XOP;
23889     for (size_t k = 1; k < 8; k++) {
23890       GemmMicrokernelTester()
23891         .mr(2)
23892         .nr(4)
23893         .kr(8)
23894         .sr(1)
23895         .m(2)
23896         .n(4)
23897         .k(k)
23898         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23899     }
23900   }
23901 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_subtile)23902   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
23903     TEST_REQUIRES_X86_XOP;
23904     for (size_t k = 1; k < 8; k++) {
23905       for (uint32_t n = 1; n <= 4; n++) {
23906         for (uint32_t m = 1; m <= 2; m++) {
23907           GemmMicrokernelTester()
23908             .mr(2)
23909             .nr(4)
23910             .kr(8)
23911             .sr(1)
23912             .m(m)
23913             .n(n)
23914             .k(k)
23915             .iterations(1)
23916             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23917         }
23918       }
23919     }
23920   }
23921 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8)23922   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
23923     TEST_REQUIRES_X86_XOP;
23924     for (size_t k = 9; k < 16; k++) {
23925       GemmMicrokernelTester()
23926         .mr(2)
23927         .nr(4)
23928         .kr(8)
23929         .sr(1)
23930         .m(2)
23931         .n(4)
23932         .k(k)
23933         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23934     }
23935   }
23936 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_subtile)23937   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
23938     TEST_REQUIRES_X86_XOP;
23939     for (size_t k = 9; k < 16; k++) {
23940       for (uint32_t n = 1; n <= 4; n++) {
23941         for (uint32_t m = 1; m <= 2; m++) {
23942           GemmMicrokernelTester()
23943             .mr(2)
23944             .nr(4)
23945             .kr(8)
23946             .sr(1)
23947             .m(m)
23948             .n(n)
23949             .k(k)
23950             .iterations(1)
23951             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23952         }
23953       }
23954     }
23955   }
23956 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8)23957   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
23958     TEST_REQUIRES_X86_XOP;
23959     for (size_t k = 16; k <= 80; k += 8) {
23960       GemmMicrokernelTester()
23961         .mr(2)
23962         .nr(4)
23963         .kr(8)
23964         .sr(1)
23965         .m(2)
23966         .n(4)
23967         .k(k)
23968         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23969     }
23970   }
23971 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_subtile)23972   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
23973     TEST_REQUIRES_X86_XOP;
23974     for (size_t k = 16; k <= 80; k += 8) {
23975       for (uint32_t n = 1; n <= 4; n++) {
23976         for (uint32_t m = 1; m <= 2; m++) {
23977           GemmMicrokernelTester()
23978             .mr(2)
23979             .nr(4)
23980             .kr(8)
23981             .sr(1)
23982             .m(m)
23983             .n(n)
23984             .k(k)
23985             .iterations(1)
23986             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
23987         }
23988       }
23989     }
23990   }
23991 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4)23992   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
23993     TEST_REQUIRES_X86_XOP;
23994     for (uint32_t n = 5; n < 8; n++) {
23995       for (size_t k = 1; k <= 40; k += 9) {
23996         GemmMicrokernelTester()
23997           .mr(2)
23998           .nr(4)
23999           .kr(8)
24000           .sr(1)
24001           .m(2)
24002           .n(n)
24003           .k(k)
24004           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24005       }
24006     }
24007   }
24008 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_cn)24009   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
24010     TEST_REQUIRES_X86_XOP;
24011     for (uint32_t n = 5; n < 8; n++) {
24012       for (size_t k = 1; k <= 40; k += 9) {
24013         GemmMicrokernelTester()
24014           .mr(2)
24015           .nr(4)
24016           .kr(8)
24017           .sr(1)
24018           .m(2)
24019           .n(n)
24020           .k(k)
24021           .cn_stride(7)
24022           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24023       }
24024     }
24025   }
24026 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_subtile)24027   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
24028     TEST_REQUIRES_X86_XOP;
24029     for (uint32_t n = 5; n < 8; n++) {
24030       for (size_t k = 1; k <= 40; k += 9) {
24031         for (uint32_t m = 1; m <= 2; m++) {
24032           GemmMicrokernelTester()
24033             .mr(2)
24034             .nr(4)
24035             .kr(8)
24036             .sr(1)
24037             .m(m)
24038             .n(n)
24039             .k(k)
24040             .iterations(1)
24041             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24042         }
24043       }
24044     }
24045   }
24046 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4)24047   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
24048     TEST_REQUIRES_X86_XOP;
24049     for (uint32_t n = 8; n <= 12; n += 4) {
24050       for (size_t k = 1; k <= 40; k += 9) {
24051         GemmMicrokernelTester()
24052           .mr(2)
24053           .nr(4)
24054           .kr(8)
24055           .sr(1)
24056           .m(2)
24057           .n(n)
24058           .k(k)
24059           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24060       }
24061     }
24062   }
24063 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_cn)24064   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
24065     TEST_REQUIRES_X86_XOP;
24066     for (uint32_t n = 8; n <= 12; n += 4) {
24067       for (size_t k = 1; k <= 40; k += 9) {
24068         GemmMicrokernelTester()
24069           .mr(2)
24070           .nr(4)
24071           .kr(8)
24072           .sr(1)
24073           .m(2)
24074           .n(n)
24075           .k(k)
24076           .cn_stride(7)
24077           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24078       }
24079     }
24080   }
24081 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_subtile)24082   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
24083     TEST_REQUIRES_X86_XOP;
24084     for (uint32_t n = 8; n <= 12; n += 4) {
24085       for (size_t k = 1; k <= 40; k += 9) {
24086         for (uint32_t m = 1; m <= 2; m++) {
24087           GemmMicrokernelTester()
24088             .mr(2)
24089             .nr(4)
24090             .kr(8)
24091             .sr(1)
24092             .m(m)
24093             .n(n)
24094             .k(k)
24095             .iterations(1)
24096             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24097         }
24098       }
24099     }
24100   }
24101 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel)24102   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel) {
24103     TEST_REQUIRES_X86_XOP;
24104     for (size_t k = 1; k <= 40; k += 9) {
24105       GemmMicrokernelTester()
24106         .mr(2)
24107         .nr(4)
24108         .kr(8)
24109         .sr(1)
24110         .m(2)
24111         .n(4)
24112         .k(k)
24113         .ks(3)
24114         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24115     }
24116   }
24117 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel_subtile)24118   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel_subtile) {
24119     TEST_REQUIRES_X86_XOP;
24120     for (size_t k = 1; k <= 40; k += 9) {
24121       for (uint32_t n = 1; n <= 4; n++) {
24122         for (uint32_t m = 1; m <= 2; m++) {
24123           GemmMicrokernelTester()
24124             .mr(2)
24125             .nr(4)
24126             .kr(8)
24127             .sr(1)
24128             .m(m)
24129             .n(n)
24130             .k(k)
24131             .ks(3)
24132             .iterations(1)
24133             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24134         }
24135       }
24136     }
24137   }
24138 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_small_kernel)24139   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
24140     TEST_REQUIRES_X86_XOP;
24141     for (uint32_t n = 5; n < 8; n++) {
24142       for (size_t k = 1; k <= 40; k += 9) {
24143         GemmMicrokernelTester()
24144           .mr(2)
24145           .nr(4)
24146           .kr(8)
24147           .sr(1)
24148           .m(2)
24149           .n(n)
24150           .k(k)
24151           .ks(3)
24152           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24153       }
24154     }
24155   }
24156 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_small_kernel)24157   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_small_kernel) {
24158     TEST_REQUIRES_X86_XOP;
24159     for (uint32_t n = 8; n <= 12; n += 4) {
24160       for (size_t k = 1; k <= 40; k += 9) {
24161         GemmMicrokernelTester()
24162           .mr(2)
24163           .nr(4)
24164           .kr(8)
24165           .sr(1)
24166           .m(2)
24167           .n(n)
24168           .k(k)
24169           .ks(3)
24170           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24171       }
24172     }
24173   }
24174 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm_subtile)24175   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
24176     TEST_REQUIRES_X86_XOP;
24177     for (size_t k = 1; k <= 40; k += 9) {
24178       for (uint32_t n = 1; n <= 4; n++) {
24179         for (uint32_t m = 1; m <= 2; m++) {
24180           GemmMicrokernelTester()
24181             .mr(2)
24182             .nr(4)
24183             .kr(8)
24184             .sr(1)
24185             .m(m)
24186             .n(n)
24187             .k(k)
24188             .cm_stride(7)
24189             .iterations(1)
24190             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24191         }
24192       }
24193     }
24194   }
24195 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,a_offset)24196   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, a_offset) {
24197     TEST_REQUIRES_X86_XOP;
24198     for (size_t k = 1; k <= 40; k += 9) {
24199       GemmMicrokernelTester()
24200         .mr(2)
24201         .nr(4)
24202         .kr(8)
24203         .sr(1)
24204         .m(2)
24205         .n(4)
24206         .k(k)
24207         .ks(3)
24208         .a_offset(83)
24209         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24210     }
24211   }
24212 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,zero)24213   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, zero) {
24214     TEST_REQUIRES_X86_XOP;
24215     for (size_t k = 1; k <= 40; k += 9) {
24216       for (uint32_t mz = 0; mz < 2; mz++) {
24217         GemmMicrokernelTester()
24218           .mr(2)
24219           .nr(4)
24220           .kr(8)
24221           .sr(1)
24222           .m(2)
24223           .n(4)
24224           .k(k)
24225           .ks(3)
24226           .a_offset(83)
24227           .zero_index(mz)
24228           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24229       }
24230     }
24231   }
24232 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmin)24233   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
24234     TEST_REQUIRES_X86_XOP;
24235     GemmMicrokernelTester()
24236       .mr(2)
24237       .nr(4)
24238       .kr(8)
24239       .sr(1)
24240       .m(2)
24241       .n(4)
24242       .k(8)
24243       .qmin(128)
24244       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24245   }
24246 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmax)24247   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
24248     TEST_REQUIRES_X86_XOP;
24249     GemmMicrokernelTester()
24250       .mr(2)
24251       .nr(4)
24252       .kr(8)
24253       .sr(1)
24254       .m(2)
24255       .n(4)
24256       .k(8)
24257       .qmax(128)
24258       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24259   }
24260 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm)24261   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
24262     TEST_REQUIRES_X86_XOP;
24263     GemmMicrokernelTester()
24264       .mr(2)
24265       .nr(4)
24266       .kr(8)
24267       .sr(1)
24268       .m(2)
24269       .n(4)
24270       .k(8)
24271       .cm_stride(7)
24272       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24273   }
24274 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_a_zero_point)24275   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_a_zero_point) {
24276     TEST_REQUIRES_X86_XOP;
24277     for (size_t k = 1; k <= 40; k += 9) {
24278       GemmMicrokernelTester()
24279         .mr(2)
24280         .nr(4)
24281         .kr(8)
24282         .sr(1)
24283         .m(2)
24284         .n(4)
24285         .k(k)
24286         .a_zero_point(0)
24287         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24288     }
24289   }
24290 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_b_zero_point)24291   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_b_zero_point) {
24292     TEST_REQUIRES_X86_XOP;
24293     for (size_t k = 1; k <= 40; k += 9) {
24294       GemmMicrokernelTester()
24295         .mr(2)
24296         .nr(4)
24297         .kr(8)
24298         .sr(1)
24299         .m(2)
24300         .n(4)
24301         .k(k)
24302         .b_zero_point(0)
24303         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24304     }
24305   }
24306 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,no_zero_point)24307   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_zero_point) {
24308     TEST_REQUIRES_X86_XOP;
24309     for (size_t k = 1; k <= 40; k += 9) {
24310       GemmMicrokernelTester()
24311         .mr(2)
24312         .nr(4)
24313         .kr(8)
24314         .sr(1)
24315         .m(2)
24316         .n(4)
24317         .k(k)
24318         .a_zero_point(0)
24319         .b_zero_point(0)
24320         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24321     }
24322   }
24323 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24324 
24325 
24326 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8)24327   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
24328     TEST_REQUIRES_X86_AVX;
24329     GemmMicrokernelTester()
24330       .mr(3)
24331       .nr(4)
24332       .kr(8)
24333       .sr(1)
24334       .m(3)
24335       .n(4)
24336       .k(8)
24337       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24338   }
24339 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cn)24340   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
24341     TEST_REQUIRES_X86_AVX;
24342     GemmMicrokernelTester()
24343       .mr(3)
24344       .nr(4)
24345       .kr(8)
24346       .sr(1)
24347       .m(3)
24348       .n(4)
24349       .k(8)
24350       .cn_stride(7)
24351       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24352   }
24353 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile)24354   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
24355     TEST_REQUIRES_X86_AVX;
24356     for (uint32_t n = 1; n <= 4; n++) {
24357       for (uint32_t m = 1; m <= 3; m++) {
24358         GemmMicrokernelTester()
24359           .mr(3)
24360           .nr(4)
24361           .kr(8)
24362           .sr(1)
24363           .m(m)
24364           .n(n)
24365           .k(8)
24366           .iterations(1)
24367           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24368       }
24369     }
24370   }
24371 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_m)24372   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
24373     TEST_REQUIRES_X86_AVX;
24374     for (uint32_t m = 1; m <= 3; m++) {
24375       GemmMicrokernelTester()
24376         .mr(3)
24377         .nr(4)
24378         .kr(8)
24379         .sr(1)
24380         .m(m)
24381         .n(4)
24382         .k(8)
24383         .iterations(1)
24384         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24385     }
24386   }
24387 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_n)24388   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
24389     TEST_REQUIRES_X86_AVX;
24390     for (uint32_t n = 1; n <= 4; n++) {
24391       GemmMicrokernelTester()
24392         .mr(3)
24393         .nr(4)
24394         .kr(8)
24395         .sr(1)
24396         .m(3)
24397         .n(n)
24398         .k(8)
24399         .iterations(1)
24400         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24401     }
24402   }
24403 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8)24404   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
24405     TEST_REQUIRES_X86_AVX;
24406     for (size_t k = 1; k < 8; k++) {
24407       GemmMicrokernelTester()
24408         .mr(3)
24409         .nr(4)
24410         .kr(8)
24411         .sr(1)
24412         .m(3)
24413         .n(4)
24414         .k(k)
24415         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24416     }
24417   }
24418 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_subtile)24419   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
24420     TEST_REQUIRES_X86_AVX;
24421     for (size_t k = 1; k < 8; k++) {
24422       for (uint32_t n = 1; n <= 4; n++) {
24423         for (uint32_t m = 1; m <= 3; m++) {
24424           GemmMicrokernelTester()
24425             .mr(3)
24426             .nr(4)
24427             .kr(8)
24428             .sr(1)
24429             .m(m)
24430             .n(n)
24431             .k(k)
24432             .iterations(1)
24433             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24434         }
24435       }
24436     }
24437   }
24438 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8)24439   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
24440     TEST_REQUIRES_X86_AVX;
24441     for (size_t k = 9; k < 16; k++) {
24442       GemmMicrokernelTester()
24443         .mr(3)
24444         .nr(4)
24445         .kr(8)
24446         .sr(1)
24447         .m(3)
24448         .n(4)
24449         .k(k)
24450         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24451     }
24452   }
24453 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_subtile)24454   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
24455     TEST_REQUIRES_X86_AVX;
24456     for (size_t k = 9; k < 16; k++) {
24457       for (uint32_t n = 1; n <= 4; n++) {
24458         for (uint32_t m = 1; m <= 3; m++) {
24459           GemmMicrokernelTester()
24460             .mr(3)
24461             .nr(4)
24462             .kr(8)
24463             .sr(1)
24464             .m(m)
24465             .n(n)
24466             .k(k)
24467             .iterations(1)
24468             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24469         }
24470       }
24471     }
24472   }
24473 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8)24474   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
24475     TEST_REQUIRES_X86_AVX;
24476     for (size_t k = 16; k <= 80; k += 8) {
24477       GemmMicrokernelTester()
24478         .mr(3)
24479         .nr(4)
24480         .kr(8)
24481         .sr(1)
24482         .m(3)
24483         .n(4)
24484         .k(k)
24485         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24486     }
24487   }
24488 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_subtile)24489   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
24490     TEST_REQUIRES_X86_AVX;
24491     for (size_t k = 16; k <= 80; k += 8) {
24492       for (uint32_t n = 1; n <= 4; n++) {
24493         for (uint32_t m = 1; m <= 3; m++) {
24494           GemmMicrokernelTester()
24495             .mr(3)
24496             .nr(4)
24497             .kr(8)
24498             .sr(1)
24499             .m(m)
24500             .n(n)
24501             .k(k)
24502             .iterations(1)
24503             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24504         }
24505       }
24506     }
24507   }
24508 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4)24509   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
24510     TEST_REQUIRES_X86_AVX;
24511     for (uint32_t n = 5; n < 8; n++) {
24512       for (size_t k = 1; k <= 40; k += 9) {
24513         GemmMicrokernelTester()
24514           .mr(3)
24515           .nr(4)
24516           .kr(8)
24517           .sr(1)
24518           .m(3)
24519           .n(n)
24520           .k(k)
24521           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24522       }
24523     }
24524   }
24525 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_cn)24526   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
24527     TEST_REQUIRES_X86_AVX;
24528     for (uint32_t n = 5; n < 8; n++) {
24529       for (size_t k = 1; k <= 40; k += 9) {
24530         GemmMicrokernelTester()
24531           .mr(3)
24532           .nr(4)
24533           .kr(8)
24534           .sr(1)
24535           .m(3)
24536           .n(n)
24537           .k(k)
24538           .cn_stride(7)
24539           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24540       }
24541     }
24542   }
24543 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_subtile)24544   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
24545     TEST_REQUIRES_X86_AVX;
24546     for (uint32_t n = 5; n < 8; n++) {
24547       for (size_t k = 1; k <= 40; k += 9) {
24548         for (uint32_t m = 1; m <= 3; m++) {
24549           GemmMicrokernelTester()
24550             .mr(3)
24551             .nr(4)
24552             .kr(8)
24553             .sr(1)
24554             .m(m)
24555             .n(n)
24556             .k(k)
24557             .iterations(1)
24558             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24559         }
24560       }
24561     }
24562   }
24563 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4)24564   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
24565     TEST_REQUIRES_X86_AVX;
24566     for (uint32_t n = 8; n <= 12; n += 4) {
24567       for (size_t k = 1; k <= 40; k += 9) {
24568         GemmMicrokernelTester()
24569           .mr(3)
24570           .nr(4)
24571           .kr(8)
24572           .sr(1)
24573           .m(3)
24574           .n(n)
24575           .k(k)
24576           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24577       }
24578     }
24579   }
24580 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_cn)24581   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
24582     TEST_REQUIRES_X86_AVX;
24583     for (uint32_t n = 8; n <= 12; n += 4) {
24584       for (size_t k = 1; k <= 40; k += 9) {
24585         GemmMicrokernelTester()
24586           .mr(3)
24587           .nr(4)
24588           .kr(8)
24589           .sr(1)
24590           .m(3)
24591           .n(n)
24592           .k(k)
24593           .cn_stride(7)
24594           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24595       }
24596     }
24597   }
24598 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_subtile)24599   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
24600     TEST_REQUIRES_X86_AVX;
24601     for (uint32_t n = 8; n <= 12; n += 4) {
24602       for (size_t k = 1; k <= 40; k += 9) {
24603         for (uint32_t m = 1; m <= 3; m++) {
24604           GemmMicrokernelTester()
24605             .mr(3)
24606             .nr(4)
24607             .kr(8)
24608             .sr(1)
24609             .m(m)
24610             .n(n)
24611             .k(k)
24612             .iterations(1)
24613             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24614         }
24615       }
24616     }
24617   }
24618 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel)24619   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel) {
24620     TEST_REQUIRES_X86_AVX;
24621     for (size_t k = 1; k <= 40; k += 9) {
24622       GemmMicrokernelTester()
24623         .mr(3)
24624         .nr(4)
24625         .kr(8)
24626         .sr(1)
24627         .m(3)
24628         .n(4)
24629         .k(k)
24630         .ks(3)
24631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24632     }
24633   }
24634 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel_subtile)24635   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel_subtile) {
24636     TEST_REQUIRES_X86_AVX;
24637     for (size_t k = 1; k <= 40; k += 9) {
24638       for (uint32_t n = 1; n <= 4; n++) {
24639         for (uint32_t m = 1; m <= 3; m++) {
24640           GemmMicrokernelTester()
24641             .mr(3)
24642             .nr(4)
24643             .kr(8)
24644             .sr(1)
24645             .m(m)
24646             .n(n)
24647             .k(k)
24648             .ks(3)
24649             .iterations(1)
24650             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24651         }
24652       }
24653     }
24654   }
24655 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_small_kernel)24656   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
24657     TEST_REQUIRES_X86_AVX;
24658     for (uint32_t n = 5; n < 8; n++) {
24659       for (size_t k = 1; k <= 40; k += 9) {
24660         GemmMicrokernelTester()
24661           .mr(3)
24662           .nr(4)
24663           .kr(8)
24664           .sr(1)
24665           .m(3)
24666           .n(n)
24667           .k(k)
24668           .ks(3)
24669           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24670       }
24671     }
24672   }
24673 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_small_kernel)24674   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_small_kernel) {
24675     TEST_REQUIRES_X86_AVX;
24676     for (uint32_t n = 8; n <= 12; n += 4) {
24677       for (size_t k = 1; k <= 40; k += 9) {
24678         GemmMicrokernelTester()
24679           .mr(3)
24680           .nr(4)
24681           .kr(8)
24682           .sr(1)
24683           .m(3)
24684           .n(n)
24685           .k(k)
24686           .ks(3)
24687           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24688       }
24689     }
24690   }
24691 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm_subtile)24692   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
24693     TEST_REQUIRES_X86_AVX;
24694     for (size_t k = 1; k <= 40; k += 9) {
24695       for (uint32_t n = 1; n <= 4; n++) {
24696         for (uint32_t m = 1; m <= 3; m++) {
24697           GemmMicrokernelTester()
24698             .mr(3)
24699             .nr(4)
24700             .kr(8)
24701             .sr(1)
24702             .m(m)
24703             .n(n)
24704             .k(k)
24705             .cm_stride(7)
24706             .iterations(1)
24707             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24708         }
24709       }
24710     }
24711   }
24712 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,a_offset)24713   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, a_offset) {
24714     TEST_REQUIRES_X86_AVX;
24715     for (size_t k = 1; k <= 40; k += 9) {
24716       GemmMicrokernelTester()
24717         .mr(3)
24718         .nr(4)
24719         .kr(8)
24720         .sr(1)
24721         .m(3)
24722         .n(4)
24723         .k(k)
24724         .ks(3)
24725         .a_offset(127)
24726         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24727     }
24728   }
24729 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,zero)24730   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, zero) {
24731     TEST_REQUIRES_X86_AVX;
24732     for (size_t k = 1; k <= 40; k += 9) {
24733       for (uint32_t mz = 0; mz < 3; mz++) {
24734         GemmMicrokernelTester()
24735           .mr(3)
24736           .nr(4)
24737           .kr(8)
24738           .sr(1)
24739           .m(3)
24740           .n(4)
24741           .k(k)
24742           .ks(3)
24743           .a_offset(127)
24744           .zero_index(mz)
24745           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24746       }
24747     }
24748   }
24749 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmin)24750   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
24751     TEST_REQUIRES_X86_AVX;
24752     GemmMicrokernelTester()
24753       .mr(3)
24754       .nr(4)
24755       .kr(8)
24756       .sr(1)
24757       .m(3)
24758       .n(4)
24759       .k(8)
24760       .qmin(128)
24761       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24762   }
24763 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmax)24764   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
24765     TEST_REQUIRES_X86_AVX;
24766     GemmMicrokernelTester()
24767       .mr(3)
24768       .nr(4)
24769       .kr(8)
24770       .sr(1)
24771       .m(3)
24772       .n(4)
24773       .k(8)
24774       .qmax(128)
24775       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24776   }
24777 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm)24778   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
24779     TEST_REQUIRES_X86_AVX;
24780     GemmMicrokernelTester()
24781       .mr(3)
24782       .nr(4)
24783       .kr(8)
24784       .sr(1)
24785       .m(3)
24786       .n(4)
24787       .k(8)
24788       .cm_stride(7)
24789       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24790   }
24791 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_a_zero_point)24792   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_a_zero_point) {
24793     TEST_REQUIRES_X86_AVX;
24794     for (size_t k = 1; k <= 40; k += 9) {
24795       GemmMicrokernelTester()
24796         .mr(3)
24797         .nr(4)
24798         .kr(8)
24799         .sr(1)
24800         .m(3)
24801         .n(4)
24802         .k(k)
24803         .a_zero_point(0)
24804         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24805     }
24806   }
24807 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_b_zero_point)24808   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_b_zero_point) {
24809     TEST_REQUIRES_X86_AVX;
24810     for (size_t k = 1; k <= 40; k += 9) {
24811       GemmMicrokernelTester()
24812         .mr(3)
24813         .nr(4)
24814         .kr(8)
24815         .sr(1)
24816         .m(3)
24817         .n(4)
24818         .k(k)
24819         .b_zero_point(0)
24820         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24821     }
24822   }
24823 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,no_zero_point)24824   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_zero_point) {
24825     TEST_REQUIRES_X86_AVX;
24826     for (size_t k = 1; k <= 40; k += 9) {
24827       GemmMicrokernelTester()
24828         .mr(3)
24829         .nr(4)
24830         .kr(8)
24831         .sr(1)
24832         .m(3)
24833         .n(4)
24834         .k(k)
24835         .a_zero_point(0)
24836         .b_zero_point(0)
24837         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
24838     }
24839   }
24840 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24841 
24842 
24843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)24844   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
24845     TEST_REQUIRES_X86_AVX2;
24846     GemmMicrokernelTester()
24847       .mr(2)
24848       .nr(8)
24849       .kr(8)
24850       .sr(1)
24851       .m(2)
24852       .n(8)
24853       .k(8)
24854       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24855   }
24856 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)24857   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
24858     TEST_REQUIRES_X86_AVX2;
24859     GemmMicrokernelTester()
24860       .mr(2)
24861       .nr(8)
24862       .kr(8)
24863       .sr(1)
24864       .m(2)
24865       .n(8)
24866       .k(8)
24867       .cn_stride(11)
24868       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24869   }
24870 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)24871   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
24872     TEST_REQUIRES_X86_AVX2;
24873     for (uint32_t n = 1; n <= 8; n++) {
24874       for (uint32_t m = 1; m <= 2; m++) {
24875         GemmMicrokernelTester()
24876           .mr(2)
24877           .nr(8)
24878           .kr(8)
24879           .sr(1)
24880           .m(m)
24881           .n(n)
24882           .k(8)
24883           .iterations(1)
24884           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24885       }
24886     }
24887   }
24888 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)24889   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
24890     TEST_REQUIRES_X86_AVX2;
24891     for (uint32_t m = 1; m <= 2; m++) {
24892       GemmMicrokernelTester()
24893         .mr(2)
24894         .nr(8)
24895         .kr(8)
24896         .sr(1)
24897         .m(m)
24898         .n(8)
24899         .k(8)
24900         .iterations(1)
24901         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24902     }
24903   }
24904 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)24905   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
24906     TEST_REQUIRES_X86_AVX2;
24907     for (uint32_t n = 1; n <= 8; n++) {
24908       GemmMicrokernelTester()
24909         .mr(2)
24910         .nr(8)
24911         .kr(8)
24912         .sr(1)
24913         .m(2)
24914         .n(n)
24915         .k(8)
24916         .iterations(1)
24917         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24918     }
24919   }
24920 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)24921   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
24922     TEST_REQUIRES_X86_AVX2;
24923     for (size_t k = 1; k < 8; k++) {
24924       GemmMicrokernelTester()
24925         .mr(2)
24926         .nr(8)
24927         .kr(8)
24928         .sr(1)
24929         .m(2)
24930         .n(8)
24931         .k(k)
24932         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24933     }
24934   }
24935 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)24936   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
24937     TEST_REQUIRES_X86_AVX2;
24938     for (size_t k = 1; k < 8; k++) {
24939       for (uint32_t n = 1; n <= 8; n++) {
24940         for (uint32_t m = 1; m <= 2; m++) {
24941           GemmMicrokernelTester()
24942             .mr(2)
24943             .nr(8)
24944             .kr(8)
24945             .sr(1)
24946             .m(m)
24947             .n(n)
24948             .k(k)
24949             .iterations(1)
24950             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24951         }
24952       }
24953     }
24954   }
24955 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)24956   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
24957     TEST_REQUIRES_X86_AVX2;
24958     for (size_t k = 9; k < 16; k++) {
24959       GemmMicrokernelTester()
24960         .mr(2)
24961         .nr(8)
24962         .kr(8)
24963         .sr(1)
24964         .m(2)
24965         .n(8)
24966         .k(k)
24967         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24968     }
24969   }
24970 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)24971   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
24972     TEST_REQUIRES_X86_AVX2;
24973     for (size_t k = 9; k < 16; k++) {
24974       for (uint32_t n = 1; n <= 8; n++) {
24975         for (uint32_t m = 1; m <= 2; m++) {
24976           GemmMicrokernelTester()
24977             .mr(2)
24978             .nr(8)
24979             .kr(8)
24980             .sr(1)
24981             .m(m)
24982             .n(n)
24983             .k(k)
24984             .iterations(1)
24985             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
24986         }
24987       }
24988     }
24989   }
24990 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)24991   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
24992     TEST_REQUIRES_X86_AVX2;
24993     for (size_t k = 16; k <= 80; k += 8) {
24994       GemmMicrokernelTester()
24995         .mr(2)
24996         .nr(8)
24997         .kr(8)
24998         .sr(1)
24999         .m(2)
25000         .n(8)
25001         .k(k)
25002         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25003     }
25004   }
25005 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)25006   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
25007     TEST_REQUIRES_X86_AVX2;
25008     for (size_t k = 16; k <= 80; k += 8) {
25009       for (uint32_t n = 1; n <= 8; n++) {
25010         for (uint32_t m = 1; m <= 2; m++) {
25011           GemmMicrokernelTester()
25012             .mr(2)
25013             .nr(8)
25014             .kr(8)
25015             .sr(1)
25016             .m(m)
25017             .n(n)
25018             .k(k)
25019             .iterations(1)
25020             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25021         }
25022       }
25023     }
25024   }
25025 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)25026   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
25027     TEST_REQUIRES_X86_AVX2;
25028     for (uint32_t n = 9; n < 16; n++) {
25029       for (size_t k = 1; k <= 40; k += 9) {
25030         GemmMicrokernelTester()
25031           .mr(2)
25032           .nr(8)
25033           .kr(8)
25034           .sr(1)
25035           .m(2)
25036           .n(n)
25037           .k(k)
25038           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25039       }
25040     }
25041   }
25042 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)25043   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
25044     TEST_REQUIRES_X86_AVX2;
25045     for (uint32_t n = 9; n < 16; n++) {
25046       for (size_t k = 1; k <= 40; k += 9) {
25047         GemmMicrokernelTester()
25048           .mr(2)
25049           .nr(8)
25050           .kr(8)
25051           .sr(1)
25052           .m(2)
25053           .n(n)
25054           .k(k)
25055           .cn_stride(11)
25056           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25057       }
25058     }
25059   }
25060 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)25061   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
25062     TEST_REQUIRES_X86_AVX2;
25063     for (uint32_t n = 9; n < 16; n++) {
25064       for (size_t k = 1; k <= 40; k += 9) {
25065         for (uint32_t m = 1; m <= 2; m++) {
25066           GemmMicrokernelTester()
25067             .mr(2)
25068             .nr(8)
25069             .kr(8)
25070             .sr(1)
25071             .m(m)
25072             .n(n)
25073             .k(k)
25074             .iterations(1)
25075             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25076         }
25077       }
25078     }
25079   }
25080 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)25081   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
25082     TEST_REQUIRES_X86_AVX2;
25083     for (uint32_t n = 16; n <= 24; n += 8) {
25084       for (size_t k = 1; k <= 40; k += 9) {
25085         GemmMicrokernelTester()
25086           .mr(2)
25087           .nr(8)
25088           .kr(8)
25089           .sr(1)
25090           .m(2)
25091           .n(n)
25092           .k(k)
25093           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25094       }
25095     }
25096   }
25097 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)25098   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
25099     TEST_REQUIRES_X86_AVX2;
25100     for (uint32_t n = 16; n <= 24; n += 8) {
25101       for (size_t k = 1; k <= 40; k += 9) {
25102         GemmMicrokernelTester()
25103           .mr(2)
25104           .nr(8)
25105           .kr(8)
25106           .sr(1)
25107           .m(2)
25108           .n(n)
25109           .k(k)
25110           .cn_stride(11)
25111           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25112       }
25113     }
25114   }
25115 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)25116   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
25117     TEST_REQUIRES_X86_AVX2;
25118     for (uint32_t n = 16; n <= 24; n += 8) {
25119       for (size_t k = 1; k <= 40; k += 9) {
25120         for (uint32_t m = 1; m <= 2; m++) {
25121           GemmMicrokernelTester()
25122             .mr(2)
25123             .nr(8)
25124             .kr(8)
25125             .sr(1)
25126             .m(m)
25127             .n(n)
25128             .k(k)
25129             .iterations(1)
25130             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25131         }
25132       }
25133     }
25134   }
25135 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel)25136   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
25137     TEST_REQUIRES_X86_AVX2;
25138     for (size_t k = 1; k <= 40; k += 9) {
25139       GemmMicrokernelTester()
25140         .mr(2)
25141         .nr(8)
25142         .kr(8)
25143         .sr(1)
25144         .m(2)
25145         .n(8)
25146         .k(k)
25147         .ks(3)
25148         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25149     }
25150   }
25151 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel_subtile)25152   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
25153     TEST_REQUIRES_X86_AVX2;
25154     for (size_t k = 1; k <= 40; k += 9) {
25155       for (uint32_t n = 1; n <= 8; n++) {
25156         for (uint32_t m = 1; m <= 2; m++) {
25157           GemmMicrokernelTester()
25158             .mr(2)
25159             .nr(8)
25160             .kr(8)
25161             .sr(1)
25162             .m(m)
25163             .n(n)
25164             .k(k)
25165             .ks(3)
25166             .iterations(1)
25167             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25168         }
25169       }
25170     }
25171   }
25172 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_small_kernel)25173   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
25174     TEST_REQUIRES_X86_AVX2;
25175     for (uint32_t n = 9; n < 16; n++) {
25176       for (size_t k = 1; k <= 40; k += 9) {
25177         GemmMicrokernelTester()
25178           .mr(2)
25179           .nr(8)
25180           .kr(8)
25181           .sr(1)
25182           .m(2)
25183           .n(n)
25184           .k(k)
25185           .ks(3)
25186           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25187       }
25188     }
25189   }
25190 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_small_kernel)25191   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
25192     TEST_REQUIRES_X86_AVX2;
25193     for (uint32_t n = 16; n <= 24; n += 8) {
25194       for (size_t k = 1; k <= 40; k += 9) {
25195         GemmMicrokernelTester()
25196           .mr(2)
25197           .nr(8)
25198           .kr(8)
25199           .sr(1)
25200           .m(2)
25201           .n(n)
25202           .k(k)
25203           .ks(3)
25204           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25205       }
25206     }
25207   }
25208 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)25209   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
25210     TEST_REQUIRES_X86_AVX2;
25211     for (size_t k = 1; k <= 40; k += 9) {
25212       for (uint32_t n = 1; n <= 8; n++) {
25213         for (uint32_t m = 1; m <= 2; m++) {
25214           GemmMicrokernelTester()
25215             .mr(2)
25216             .nr(8)
25217             .kr(8)
25218             .sr(1)
25219             .m(m)
25220             .n(n)
25221             .k(k)
25222             .cm_stride(11)
25223             .iterations(1)
25224             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25225         }
25226       }
25227     }
25228   }
25229 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,a_offset)25230   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
25231     TEST_REQUIRES_X86_AVX2;
25232     for (size_t k = 1; k <= 40; k += 9) {
25233       GemmMicrokernelTester()
25234         .mr(2)
25235         .nr(8)
25236         .kr(8)
25237         .sr(1)
25238         .m(2)
25239         .n(8)
25240         .k(k)
25241         .ks(3)
25242         .a_offset(83)
25243         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25244     }
25245   }
25246 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,zero)25247   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
25248     TEST_REQUIRES_X86_AVX2;
25249     for (size_t k = 1; k <= 40; k += 9) {
25250       for (uint32_t mz = 0; mz < 2; mz++) {
25251         GemmMicrokernelTester()
25252           .mr(2)
25253           .nr(8)
25254           .kr(8)
25255           .sr(1)
25256           .m(2)
25257           .n(8)
25258           .k(k)
25259           .ks(3)
25260           .a_offset(83)
25261           .zero_index(mz)
25262           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25263       }
25264     }
25265   }
25266 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmin)25267   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
25268     TEST_REQUIRES_X86_AVX2;
25269     GemmMicrokernelTester()
25270       .mr(2)
25271       .nr(8)
25272       .kr(8)
25273       .sr(1)
25274       .m(2)
25275       .n(8)
25276       .k(8)
25277       .qmin(128)
25278       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25279   }
25280 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmax)25281   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
25282     TEST_REQUIRES_X86_AVX2;
25283     GemmMicrokernelTester()
25284       .mr(2)
25285       .nr(8)
25286       .kr(8)
25287       .sr(1)
25288       .m(2)
25289       .n(8)
25290       .k(8)
25291       .qmax(128)
25292       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25293   }
25294 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)25295   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
25296     TEST_REQUIRES_X86_AVX2;
25297     GemmMicrokernelTester()
25298       .mr(2)
25299       .nr(8)
25300       .kr(8)
25301       .sr(1)
25302       .m(2)
25303       .n(8)
25304       .k(8)
25305       .cm_stride(11)
25306       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25307   }
25308 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_a_zero_point)25309   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_a_zero_point) {
25310     TEST_REQUIRES_X86_AVX2;
25311     for (size_t k = 1; k <= 40; k += 9) {
25312       GemmMicrokernelTester()
25313         .mr(2)
25314         .nr(8)
25315         .kr(8)
25316         .sr(1)
25317         .m(2)
25318         .n(8)
25319         .k(k)
25320         .a_zero_point(0)
25321         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25322     }
25323   }
25324 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_b_zero_point)25325   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_b_zero_point) {
25326     TEST_REQUIRES_X86_AVX2;
25327     for (size_t k = 1; k <= 40; k += 9) {
25328       GemmMicrokernelTester()
25329         .mr(2)
25330         .nr(8)
25331         .kr(8)
25332         .sr(1)
25333         .m(2)
25334         .n(8)
25335         .k(k)
25336         .b_zero_point(0)
25337         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25338     }
25339   }
25340 
TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2,no_zero_point)25341   TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_zero_point) {
25342     TEST_REQUIRES_X86_AVX2;
25343     for (size_t k = 1; k <= 40; k += 9) {
25344       GemmMicrokernelTester()
25345         .mr(2)
25346         .nr(8)
25347         .kr(8)
25348         .sr(1)
25349         .m(2)
25350         .n(8)
25351         .k(k)
25352         .a_zero_point(0)
25353         .b_zero_point(0)
25354         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25355     }
25356   }
25357 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25358 
25359 
25360 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)25361   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
25362     TEST_REQUIRES_X86_AVX2;
25363     GemmMicrokernelTester()
25364       .mr(3)
25365       .nr(8)
25366       .kr(8)
25367       .sr(1)
25368       .m(3)
25369       .n(8)
25370       .k(8)
25371       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25372   }
25373 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)25374   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
25375     TEST_REQUIRES_X86_AVX2;
25376     GemmMicrokernelTester()
25377       .mr(3)
25378       .nr(8)
25379       .kr(8)
25380       .sr(1)
25381       .m(3)
25382       .n(8)
25383       .k(8)
25384       .cn_stride(11)
25385       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25386   }
25387 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)25388   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
25389     TEST_REQUIRES_X86_AVX2;
25390     for (uint32_t n = 1; n <= 8; n++) {
25391       for (uint32_t m = 1; m <= 3; m++) {
25392         GemmMicrokernelTester()
25393           .mr(3)
25394           .nr(8)
25395           .kr(8)
25396           .sr(1)
25397           .m(m)
25398           .n(n)
25399           .k(8)
25400           .iterations(1)
25401           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25402       }
25403     }
25404   }
25405 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)25406   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
25407     TEST_REQUIRES_X86_AVX2;
25408     for (uint32_t m = 1; m <= 3; m++) {
25409       GemmMicrokernelTester()
25410         .mr(3)
25411         .nr(8)
25412         .kr(8)
25413         .sr(1)
25414         .m(m)
25415         .n(8)
25416         .k(8)
25417         .iterations(1)
25418         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25419     }
25420   }
25421 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)25422   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
25423     TEST_REQUIRES_X86_AVX2;
25424     for (uint32_t n = 1; n <= 8; n++) {
25425       GemmMicrokernelTester()
25426         .mr(3)
25427         .nr(8)
25428         .kr(8)
25429         .sr(1)
25430         .m(3)
25431         .n(n)
25432         .k(8)
25433         .iterations(1)
25434         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25435     }
25436   }
25437 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)25438   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
25439     TEST_REQUIRES_X86_AVX2;
25440     for (size_t k = 1; k < 8; k++) {
25441       GemmMicrokernelTester()
25442         .mr(3)
25443         .nr(8)
25444         .kr(8)
25445         .sr(1)
25446         .m(3)
25447         .n(8)
25448         .k(k)
25449         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25450     }
25451   }
25452 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)25453   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
25454     TEST_REQUIRES_X86_AVX2;
25455     for (size_t k = 1; k < 8; k++) {
25456       for (uint32_t n = 1; n <= 8; n++) {
25457         for (uint32_t m = 1; m <= 3; m++) {
25458           GemmMicrokernelTester()
25459             .mr(3)
25460             .nr(8)
25461             .kr(8)
25462             .sr(1)
25463             .m(m)
25464             .n(n)
25465             .k(k)
25466             .iterations(1)
25467             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25468         }
25469       }
25470     }
25471   }
25472 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)25473   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
25474     TEST_REQUIRES_X86_AVX2;
25475     for (size_t k = 9; k < 16; k++) {
25476       GemmMicrokernelTester()
25477         .mr(3)
25478         .nr(8)
25479         .kr(8)
25480         .sr(1)
25481         .m(3)
25482         .n(8)
25483         .k(k)
25484         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25485     }
25486   }
25487 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)25488   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
25489     TEST_REQUIRES_X86_AVX2;
25490     for (size_t k = 9; k < 16; k++) {
25491       for (uint32_t n = 1; n <= 8; n++) {
25492         for (uint32_t m = 1; m <= 3; m++) {
25493           GemmMicrokernelTester()
25494             .mr(3)
25495             .nr(8)
25496             .kr(8)
25497             .sr(1)
25498             .m(m)
25499             .n(n)
25500             .k(k)
25501             .iterations(1)
25502             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25503         }
25504       }
25505     }
25506   }
25507 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)25508   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
25509     TEST_REQUIRES_X86_AVX2;
25510     for (size_t k = 16; k <= 80; k += 8) {
25511       GemmMicrokernelTester()
25512         .mr(3)
25513         .nr(8)
25514         .kr(8)
25515         .sr(1)
25516         .m(3)
25517         .n(8)
25518         .k(k)
25519         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25520     }
25521   }
25522 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)25523   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
25524     TEST_REQUIRES_X86_AVX2;
25525     for (size_t k = 16; k <= 80; k += 8) {
25526       for (uint32_t n = 1; n <= 8; n++) {
25527         for (uint32_t m = 1; m <= 3; m++) {
25528           GemmMicrokernelTester()
25529             .mr(3)
25530             .nr(8)
25531             .kr(8)
25532             .sr(1)
25533             .m(m)
25534             .n(n)
25535             .k(k)
25536             .iterations(1)
25537             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25538         }
25539       }
25540     }
25541   }
25542 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)25543   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
25544     TEST_REQUIRES_X86_AVX2;
25545     for (uint32_t n = 9; n < 16; n++) {
25546       for (size_t k = 1; k <= 40; k += 9) {
25547         GemmMicrokernelTester()
25548           .mr(3)
25549           .nr(8)
25550           .kr(8)
25551           .sr(1)
25552           .m(3)
25553           .n(n)
25554           .k(k)
25555           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25556       }
25557     }
25558   }
25559 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)25560   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
25561     TEST_REQUIRES_X86_AVX2;
25562     for (uint32_t n = 9; n < 16; n++) {
25563       for (size_t k = 1; k <= 40; k += 9) {
25564         GemmMicrokernelTester()
25565           .mr(3)
25566           .nr(8)
25567           .kr(8)
25568           .sr(1)
25569           .m(3)
25570           .n(n)
25571           .k(k)
25572           .cn_stride(11)
25573           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25574       }
25575     }
25576   }
25577 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)25578   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
25579     TEST_REQUIRES_X86_AVX2;
25580     for (uint32_t n = 9; n < 16; n++) {
25581       for (size_t k = 1; k <= 40; k += 9) {
25582         for (uint32_t m = 1; m <= 3; m++) {
25583           GemmMicrokernelTester()
25584             .mr(3)
25585             .nr(8)
25586             .kr(8)
25587             .sr(1)
25588             .m(m)
25589             .n(n)
25590             .k(k)
25591             .iterations(1)
25592             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25593         }
25594       }
25595     }
25596   }
25597 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)25598   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
25599     TEST_REQUIRES_X86_AVX2;
25600     for (uint32_t n = 16; n <= 24; n += 8) {
25601       for (size_t k = 1; k <= 40; k += 9) {
25602         GemmMicrokernelTester()
25603           .mr(3)
25604           .nr(8)
25605           .kr(8)
25606           .sr(1)
25607           .m(3)
25608           .n(n)
25609           .k(k)
25610           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25611       }
25612     }
25613   }
25614 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)25615   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
25616     TEST_REQUIRES_X86_AVX2;
25617     for (uint32_t n = 16; n <= 24; n += 8) {
25618       for (size_t k = 1; k <= 40; k += 9) {
25619         GemmMicrokernelTester()
25620           .mr(3)
25621           .nr(8)
25622           .kr(8)
25623           .sr(1)
25624           .m(3)
25625           .n(n)
25626           .k(k)
25627           .cn_stride(11)
25628           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25629       }
25630     }
25631   }
25632 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)25633   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
25634     TEST_REQUIRES_X86_AVX2;
25635     for (uint32_t n = 16; n <= 24; n += 8) {
25636       for (size_t k = 1; k <= 40; k += 9) {
25637         for (uint32_t m = 1; m <= 3; m++) {
25638           GemmMicrokernelTester()
25639             .mr(3)
25640             .nr(8)
25641             .kr(8)
25642             .sr(1)
25643             .m(m)
25644             .n(n)
25645             .k(k)
25646             .iterations(1)
25647             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25648         }
25649       }
25650     }
25651   }
25652 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel)25653   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
25654     TEST_REQUIRES_X86_AVX2;
25655     for (size_t k = 1; k <= 40; k += 9) {
25656       GemmMicrokernelTester()
25657         .mr(3)
25658         .nr(8)
25659         .kr(8)
25660         .sr(1)
25661         .m(3)
25662         .n(8)
25663         .k(k)
25664         .ks(3)
25665         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25666     }
25667   }
25668 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel_subtile)25669   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
25670     TEST_REQUIRES_X86_AVX2;
25671     for (size_t k = 1; k <= 40; k += 9) {
25672       for (uint32_t n = 1; n <= 8; n++) {
25673         for (uint32_t m = 1; m <= 3; m++) {
25674           GemmMicrokernelTester()
25675             .mr(3)
25676             .nr(8)
25677             .kr(8)
25678             .sr(1)
25679             .m(m)
25680             .n(n)
25681             .k(k)
25682             .ks(3)
25683             .iterations(1)
25684             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25685         }
25686       }
25687     }
25688   }
25689 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_small_kernel)25690   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
25691     TEST_REQUIRES_X86_AVX2;
25692     for (uint32_t n = 9; n < 16; n++) {
25693       for (size_t k = 1; k <= 40; k += 9) {
25694         GemmMicrokernelTester()
25695           .mr(3)
25696           .nr(8)
25697           .kr(8)
25698           .sr(1)
25699           .m(3)
25700           .n(n)
25701           .k(k)
25702           .ks(3)
25703           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25704       }
25705     }
25706   }
25707 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_small_kernel)25708   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
25709     TEST_REQUIRES_X86_AVX2;
25710     for (uint32_t n = 16; n <= 24; n += 8) {
25711       for (size_t k = 1; k <= 40; k += 9) {
25712         GemmMicrokernelTester()
25713           .mr(3)
25714           .nr(8)
25715           .kr(8)
25716           .sr(1)
25717           .m(3)
25718           .n(n)
25719           .k(k)
25720           .ks(3)
25721           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25722       }
25723     }
25724   }
25725 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)25726   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
25727     TEST_REQUIRES_X86_AVX2;
25728     for (size_t k = 1; k <= 40; k += 9) {
25729       for (uint32_t n = 1; n <= 8; n++) {
25730         for (uint32_t m = 1; m <= 3; m++) {
25731           GemmMicrokernelTester()
25732             .mr(3)
25733             .nr(8)
25734             .kr(8)
25735             .sr(1)
25736             .m(m)
25737             .n(n)
25738             .k(k)
25739             .cm_stride(11)
25740             .iterations(1)
25741             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25742         }
25743       }
25744     }
25745   }
25746 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,a_offset)25747   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
25748     TEST_REQUIRES_X86_AVX2;
25749     for (size_t k = 1; k <= 40; k += 9) {
25750       GemmMicrokernelTester()
25751         .mr(3)
25752         .nr(8)
25753         .kr(8)
25754         .sr(1)
25755         .m(3)
25756         .n(8)
25757         .k(k)
25758         .ks(3)
25759         .a_offset(127)
25760         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25761     }
25762   }
25763 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,zero)25764   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
25765     TEST_REQUIRES_X86_AVX2;
25766     for (size_t k = 1; k <= 40; k += 9) {
25767       for (uint32_t mz = 0; mz < 3; mz++) {
25768         GemmMicrokernelTester()
25769           .mr(3)
25770           .nr(8)
25771           .kr(8)
25772           .sr(1)
25773           .m(3)
25774           .n(8)
25775           .k(k)
25776           .ks(3)
25777           .a_offset(127)
25778           .zero_index(mz)
25779           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25780       }
25781     }
25782   }
25783 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmin)25784   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
25785     TEST_REQUIRES_X86_AVX2;
25786     GemmMicrokernelTester()
25787       .mr(3)
25788       .nr(8)
25789       .kr(8)
25790       .sr(1)
25791       .m(3)
25792       .n(8)
25793       .k(8)
25794       .qmin(128)
25795       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25796   }
25797 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmax)25798   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
25799     TEST_REQUIRES_X86_AVX2;
25800     GemmMicrokernelTester()
25801       .mr(3)
25802       .nr(8)
25803       .kr(8)
25804       .sr(1)
25805       .m(3)
25806       .n(8)
25807       .k(8)
25808       .qmax(128)
25809       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25810   }
25811 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)25812   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
25813     TEST_REQUIRES_X86_AVX2;
25814     GemmMicrokernelTester()
25815       .mr(3)
25816       .nr(8)
25817       .kr(8)
25818       .sr(1)
25819       .m(3)
25820       .n(8)
25821       .k(8)
25822       .cm_stride(11)
25823       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25824   }
25825 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_a_zero_point)25826   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_a_zero_point) {
25827     TEST_REQUIRES_X86_AVX2;
25828     for (size_t k = 1; k <= 40; k += 9) {
25829       GemmMicrokernelTester()
25830         .mr(3)
25831         .nr(8)
25832         .kr(8)
25833         .sr(1)
25834         .m(3)
25835         .n(8)
25836         .k(k)
25837         .a_zero_point(0)
25838         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25839     }
25840   }
25841 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_b_zero_point)25842   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_b_zero_point) {
25843     TEST_REQUIRES_X86_AVX2;
25844     for (size_t k = 1; k <= 40; k += 9) {
25845       GemmMicrokernelTester()
25846         .mr(3)
25847         .nr(8)
25848         .kr(8)
25849         .sr(1)
25850         .m(3)
25851         .n(8)
25852         .k(k)
25853         .b_zero_point(0)
25854         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25855     }
25856   }
25857 
TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2,no_zero_point)25858   TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_zero_point) {
25859     TEST_REQUIRES_X86_AVX2;
25860     for (size_t k = 1; k <= 40; k += 9) {
25861       GemmMicrokernelTester()
25862         .mr(3)
25863         .nr(8)
25864         .kr(8)
25865         .sr(1)
25866         .m(3)
25867         .n(8)
25868         .k(k)
25869         .a_zero_point(0)
25870         .b_zero_point(0)
25871         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
25872     }
25873   }
25874 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25875 
25876 
25877 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)25878   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
25879     TEST_REQUIRES_X86_AVX512SKX;
25880     GemmMicrokernelTester()
25881       .mr(3)
25882       .nr(16)
25883       .kr(8)
25884       .sr(1)
25885       .m(3)
25886       .n(16)
25887       .k(8)
25888       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25889   }
25890 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)25891   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
25892     TEST_REQUIRES_X86_AVX512SKX;
25893     GemmMicrokernelTester()
25894       .mr(3)
25895       .nr(16)
25896       .kr(8)
25897       .sr(1)
25898       .m(3)
25899       .n(16)
25900       .k(8)
25901       .cn_stride(19)
25902       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25903   }
25904 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)25905   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
25906     TEST_REQUIRES_X86_AVX512SKX;
25907     for (uint32_t n = 1; n <= 16; n++) {
25908       for (uint32_t m = 1; m <= 3; m++) {
25909         GemmMicrokernelTester()
25910           .mr(3)
25911           .nr(16)
25912           .kr(8)
25913           .sr(1)
25914           .m(m)
25915           .n(n)
25916           .k(8)
25917           .iterations(1)
25918           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25919       }
25920     }
25921   }
25922 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)25923   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
25924     TEST_REQUIRES_X86_AVX512SKX;
25925     for (uint32_t m = 1; m <= 3; m++) {
25926       GemmMicrokernelTester()
25927         .mr(3)
25928         .nr(16)
25929         .kr(8)
25930         .sr(1)
25931         .m(m)
25932         .n(16)
25933         .k(8)
25934         .iterations(1)
25935         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25936     }
25937   }
25938 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)25939   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
25940     TEST_REQUIRES_X86_AVX512SKX;
25941     for (uint32_t n = 1; n <= 16; n++) {
25942       GemmMicrokernelTester()
25943         .mr(3)
25944         .nr(16)
25945         .kr(8)
25946         .sr(1)
25947         .m(3)
25948         .n(n)
25949         .k(8)
25950         .iterations(1)
25951         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25952     }
25953   }
25954 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)25955   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
25956     TEST_REQUIRES_X86_AVX512SKX;
25957     for (size_t k = 1; k < 8; k++) {
25958       GemmMicrokernelTester()
25959         .mr(3)
25960         .nr(16)
25961         .kr(8)
25962         .sr(1)
25963         .m(3)
25964         .n(16)
25965         .k(k)
25966         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25967     }
25968   }
25969 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)25970   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
25971     TEST_REQUIRES_X86_AVX512SKX;
25972     for (size_t k = 1; k < 8; k++) {
25973       for (uint32_t n = 1; n <= 16; n++) {
25974         for (uint32_t m = 1; m <= 3; m++) {
25975           GemmMicrokernelTester()
25976             .mr(3)
25977             .nr(16)
25978             .kr(8)
25979             .sr(1)
25980             .m(m)
25981             .n(n)
25982             .k(k)
25983             .iterations(1)
25984             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
25985         }
25986       }
25987     }
25988   }
25989 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)25990   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
25991     TEST_REQUIRES_X86_AVX512SKX;
25992     for (size_t k = 9; k < 16; k++) {
25993       GemmMicrokernelTester()
25994         .mr(3)
25995         .nr(16)
25996         .kr(8)
25997         .sr(1)
25998         .m(3)
25999         .n(16)
26000         .k(k)
26001         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26002     }
26003   }
26004 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)26005   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
26006     TEST_REQUIRES_X86_AVX512SKX;
26007     for (size_t k = 9; k < 16; k++) {
26008       for (uint32_t n = 1; n <= 16; n++) {
26009         for (uint32_t m = 1; m <= 3; m++) {
26010           GemmMicrokernelTester()
26011             .mr(3)
26012             .nr(16)
26013             .kr(8)
26014             .sr(1)
26015             .m(m)
26016             .n(n)
26017             .k(k)
26018             .iterations(1)
26019             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26020         }
26021       }
26022     }
26023   }
26024 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)26025   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
26026     TEST_REQUIRES_X86_AVX512SKX;
26027     for (size_t k = 16; k <= 80; k += 8) {
26028       GemmMicrokernelTester()
26029         .mr(3)
26030         .nr(16)
26031         .kr(8)
26032         .sr(1)
26033         .m(3)
26034         .n(16)
26035         .k(k)
26036         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26037     }
26038   }
26039 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)26040   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
26041     TEST_REQUIRES_X86_AVX512SKX;
26042     for (size_t k = 16; k <= 80; k += 8) {
26043       for (uint32_t n = 1; n <= 16; n++) {
26044         for (uint32_t m = 1; m <= 3; m++) {
26045           GemmMicrokernelTester()
26046             .mr(3)
26047             .nr(16)
26048             .kr(8)
26049             .sr(1)
26050             .m(m)
26051             .n(n)
26052             .k(k)
26053             .iterations(1)
26054             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26055         }
26056       }
26057     }
26058   }
26059 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)26060   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
26061     TEST_REQUIRES_X86_AVX512SKX;
26062     for (uint32_t n = 17; n < 32; n++) {
26063       for (size_t k = 1; k <= 40; k += 9) {
26064         GemmMicrokernelTester()
26065           .mr(3)
26066           .nr(16)
26067           .kr(8)
26068           .sr(1)
26069           .m(3)
26070           .n(n)
26071           .k(k)
26072           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26073       }
26074     }
26075   }
26076 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)26077   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
26078     TEST_REQUIRES_X86_AVX512SKX;
26079     for (uint32_t n = 17; n < 32; n++) {
26080       for (size_t k = 1; k <= 40; k += 9) {
26081         GemmMicrokernelTester()
26082           .mr(3)
26083           .nr(16)
26084           .kr(8)
26085           .sr(1)
26086           .m(3)
26087           .n(n)
26088           .k(k)
26089           .cn_stride(19)
26090           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26091       }
26092     }
26093   }
26094 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)26095   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
26096     TEST_REQUIRES_X86_AVX512SKX;
26097     for (uint32_t n = 17; n < 32; n++) {
26098       for (size_t k = 1; k <= 40; k += 9) {
26099         for (uint32_t m = 1; m <= 3; m++) {
26100           GemmMicrokernelTester()
26101             .mr(3)
26102             .nr(16)
26103             .kr(8)
26104             .sr(1)
26105             .m(m)
26106             .n(n)
26107             .k(k)
26108             .iterations(1)
26109             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26110         }
26111       }
26112     }
26113   }
26114 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)26115   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
26116     TEST_REQUIRES_X86_AVX512SKX;
26117     for (uint32_t n = 32; n <= 48; n += 16) {
26118       for (size_t k = 1; k <= 40; k += 9) {
26119         GemmMicrokernelTester()
26120           .mr(3)
26121           .nr(16)
26122           .kr(8)
26123           .sr(1)
26124           .m(3)
26125           .n(n)
26126           .k(k)
26127           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26128       }
26129     }
26130   }
26131 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)26132   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
26133     TEST_REQUIRES_X86_AVX512SKX;
26134     for (uint32_t n = 32; n <= 48; n += 16) {
26135       for (size_t k = 1; k <= 40; k += 9) {
26136         GemmMicrokernelTester()
26137           .mr(3)
26138           .nr(16)
26139           .kr(8)
26140           .sr(1)
26141           .m(3)
26142           .n(n)
26143           .k(k)
26144           .cn_stride(19)
26145           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26146       }
26147     }
26148   }
26149 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)26150   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
26151     TEST_REQUIRES_X86_AVX512SKX;
26152     for (uint32_t n = 32; n <= 48; n += 16) {
26153       for (size_t k = 1; k <= 40; k += 9) {
26154         for (uint32_t m = 1; m <= 3; m++) {
26155           GemmMicrokernelTester()
26156             .mr(3)
26157             .nr(16)
26158             .kr(8)
26159             .sr(1)
26160             .m(m)
26161             .n(n)
26162             .k(k)
26163             .iterations(1)
26164             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26165         }
26166       }
26167     }
26168   }
26169 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel)26170   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
26171     TEST_REQUIRES_X86_AVX512SKX;
26172     for (size_t k = 1; k <= 40; k += 9) {
26173       GemmMicrokernelTester()
26174         .mr(3)
26175         .nr(16)
26176         .kr(8)
26177         .sr(1)
26178         .m(3)
26179         .n(16)
26180         .k(k)
26181         .ks(3)
26182         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26183     }
26184   }
26185 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel_subtile)26186   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
26187     TEST_REQUIRES_X86_AVX512SKX;
26188     for (size_t k = 1; k <= 40; k += 9) {
26189       for (uint32_t n = 1; n <= 16; n++) {
26190         for (uint32_t m = 1; m <= 3; m++) {
26191           GemmMicrokernelTester()
26192             .mr(3)
26193             .nr(16)
26194             .kr(8)
26195             .sr(1)
26196             .m(m)
26197             .n(n)
26198             .k(k)
26199             .ks(3)
26200             .iterations(1)
26201             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26202         }
26203       }
26204     }
26205   }
26206 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_small_kernel)26207   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
26208     TEST_REQUIRES_X86_AVX512SKX;
26209     for (uint32_t n = 17; n < 32; n++) {
26210       for (size_t k = 1; k <= 40; k += 9) {
26211         GemmMicrokernelTester()
26212           .mr(3)
26213           .nr(16)
26214           .kr(8)
26215           .sr(1)
26216           .m(3)
26217           .n(n)
26218           .k(k)
26219           .ks(3)
26220           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26221       }
26222     }
26223   }
26224 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_small_kernel)26225   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
26226     TEST_REQUIRES_X86_AVX512SKX;
26227     for (uint32_t n = 32; n <= 48; n += 16) {
26228       for (size_t k = 1; k <= 40; k += 9) {
26229         GemmMicrokernelTester()
26230           .mr(3)
26231           .nr(16)
26232           .kr(8)
26233           .sr(1)
26234           .m(3)
26235           .n(n)
26236           .k(k)
26237           .ks(3)
26238           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26239       }
26240     }
26241   }
26242 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)26243   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
26244     TEST_REQUIRES_X86_AVX512SKX;
26245     for (size_t k = 1; k <= 40; k += 9) {
26246       for (uint32_t n = 1; n <= 16; n++) {
26247         for (uint32_t m = 1; m <= 3; m++) {
26248           GemmMicrokernelTester()
26249             .mr(3)
26250             .nr(16)
26251             .kr(8)
26252             .sr(1)
26253             .m(m)
26254             .n(n)
26255             .k(k)
26256             .cm_stride(19)
26257             .iterations(1)
26258             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26259         }
26260       }
26261     }
26262   }
26263 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,a_offset)26264   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
26265     TEST_REQUIRES_X86_AVX512SKX;
26266     for (size_t k = 1; k <= 40; k += 9) {
26267       GemmMicrokernelTester()
26268         .mr(3)
26269         .nr(16)
26270         .kr(8)
26271         .sr(1)
26272         .m(3)
26273         .n(16)
26274         .k(k)
26275         .ks(3)
26276         .a_offset(127)
26277         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26278     }
26279   }
26280 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,zero)26281   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
26282     TEST_REQUIRES_X86_AVX512SKX;
26283     for (size_t k = 1; k <= 40; k += 9) {
26284       for (uint32_t mz = 0; mz < 3; mz++) {
26285         GemmMicrokernelTester()
26286           .mr(3)
26287           .nr(16)
26288           .kr(8)
26289           .sr(1)
26290           .m(3)
26291           .n(16)
26292           .k(k)
26293           .ks(3)
26294           .a_offset(127)
26295           .zero_index(mz)
26296           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26297       }
26298     }
26299   }
26300 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)26301   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
26302     TEST_REQUIRES_X86_AVX512SKX;
26303     GemmMicrokernelTester()
26304       .mr(3)
26305       .nr(16)
26306       .kr(8)
26307       .sr(1)
26308       .m(3)
26309       .n(16)
26310       .k(8)
26311       .qmin(128)
26312       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26313   }
26314 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)26315   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
26316     TEST_REQUIRES_X86_AVX512SKX;
26317     GemmMicrokernelTester()
26318       .mr(3)
26319       .nr(16)
26320       .kr(8)
26321       .sr(1)
26322       .m(3)
26323       .n(16)
26324       .k(8)
26325       .qmax(128)
26326       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26327   }
26328 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)26329   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
26330     TEST_REQUIRES_X86_AVX512SKX;
26331     GemmMicrokernelTester()
26332       .mr(3)
26333       .nr(16)
26334       .kr(8)
26335       .sr(1)
26336       .m(3)
26337       .n(16)
26338       .k(8)
26339       .cm_stride(19)
26340       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26341   }
26342 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_a_zero_point)26343   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_a_zero_point) {
26344     TEST_REQUIRES_X86_AVX512SKX;
26345     for (size_t k = 1; k <= 40; k += 9) {
26346       GemmMicrokernelTester()
26347         .mr(3)
26348         .nr(16)
26349         .kr(8)
26350         .sr(1)
26351         .m(3)
26352         .n(16)
26353         .k(k)
26354         .a_zero_point(0)
26355         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26356     }
26357   }
26358 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_b_zero_point)26359   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_b_zero_point) {
26360     TEST_REQUIRES_X86_AVX512SKX;
26361     for (size_t k = 1; k <= 40; k += 9) {
26362       GemmMicrokernelTester()
26363         .mr(3)
26364         .nr(16)
26365         .kr(8)
26366         .sr(1)
26367         .m(3)
26368         .n(16)
26369         .k(k)
26370         .b_zero_point(0)
26371         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26372     }
26373   }
26374 
TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,no_zero_point)26375   TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_zero_point) {
26376     TEST_REQUIRES_X86_AVX512SKX;
26377     for (size_t k = 1; k <= 40; k += 9) {
26378       GemmMicrokernelTester()
26379         .mr(3)
26380         .nr(16)
26381         .kr(8)
26382         .sr(1)
26383         .m(3)
26384         .n(16)
26385         .k(k)
26386         .a_zero_point(0)
26387         .b_zero_point(0)
26388         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
26389     }
26390   }
26391 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26392 
26393 
26394 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)26395   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26396     GemmMicrokernelTester()
26397       .mr(1)
26398       .nr(4)
26399       .kr(2)
26400       .sr(1)
26401       .m(1)
26402       .n(4)
26403       .k(8)
26404       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26405   }
26406 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)26407   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
26408     GemmMicrokernelTester()
26409       .mr(1)
26410       .nr(4)
26411       .kr(2)
26412       .sr(1)
26413       .m(1)
26414       .n(4)
26415       .k(8)
26416       .cn_stride(7)
26417       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26418   }
26419 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)26420   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
26421     for (uint32_t n = 1; n <= 4; n++) {
26422       for (uint32_t m = 1; m <= 1; m++) {
26423         GemmMicrokernelTester()
26424           .mr(1)
26425           .nr(4)
26426           .kr(2)
26427           .sr(1)
26428           .m(m)
26429           .n(n)
26430           .k(8)
26431           .iterations(1)
26432           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26433       }
26434     }
26435   }
26436 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)26437   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26438     for (uint32_t m = 1; m <= 1; m++) {
26439       GemmMicrokernelTester()
26440         .mr(1)
26441         .nr(4)
26442         .kr(2)
26443         .sr(1)
26444         .m(m)
26445         .n(4)
26446         .k(8)
26447         .iterations(1)
26448         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26449     }
26450   }
26451 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)26452   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26453     for (uint32_t n = 1; n <= 4; n++) {
26454       GemmMicrokernelTester()
26455         .mr(1)
26456         .nr(4)
26457         .kr(2)
26458         .sr(1)
26459         .m(1)
26460         .n(n)
26461         .k(8)
26462         .iterations(1)
26463         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26464     }
26465   }
26466 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)26467   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26468     for (size_t k = 1; k < 8; k++) {
26469       GemmMicrokernelTester()
26470         .mr(1)
26471         .nr(4)
26472         .kr(2)
26473         .sr(1)
26474         .m(1)
26475         .n(4)
26476         .k(k)
26477         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26478     }
26479   }
26480 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)26481   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26482     for (size_t k = 1; k < 8; k++) {
26483       for (uint32_t n = 1; n <= 4; n++) {
26484         for (uint32_t m = 1; m <= 1; m++) {
26485           GemmMicrokernelTester()
26486             .mr(1)
26487             .nr(4)
26488             .kr(2)
26489             .sr(1)
26490             .m(m)
26491             .n(n)
26492             .k(k)
26493             .iterations(1)
26494             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26495         }
26496       }
26497     }
26498   }
26499 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)26500   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26501     for (size_t k = 9; k < 16; k++) {
26502       GemmMicrokernelTester()
26503         .mr(1)
26504         .nr(4)
26505         .kr(2)
26506         .sr(1)
26507         .m(1)
26508         .n(4)
26509         .k(k)
26510         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26511     }
26512   }
26513 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)26514   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
26515     for (size_t k = 9; k < 16; k++) {
26516       for (uint32_t n = 1; n <= 4; n++) {
26517         for (uint32_t m = 1; m <= 1; m++) {
26518           GemmMicrokernelTester()
26519             .mr(1)
26520             .nr(4)
26521             .kr(2)
26522             .sr(1)
26523             .m(m)
26524             .n(n)
26525             .k(k)
26526             .iterations(1)
26527             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26528         }
26529       }
26530     }
26531   }
26532 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)26533   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
26534     for (size_t k = 16; k <= 80; k += 8) {
26535       GemmMicrokernelTester()
26536         .mr(1)
26537         .nr(4)
26538         .kr(2)
26539         .sr(1)
26540         .m(1)
26541         .n(4)
26542         .k(k)
26543         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26544     }
26545   }
26546 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)26547   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
26548     for (size_t k = 16; k <= 80; k += 8) {
26549       for (uint32_t n = 1; n <= 4; n++) {
26550         for (uint32_t m = 1; m <= 1; m++) {
26551           GemmMicrokernelTester()
26552             .mr(1)
26553             .nr(4)
26554             .kr(2)
26555             .sr(1)
26556             .m(m)
26557             .n(n)
26558             .k(k)
26559             .iterations(1)
26560             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26561         }
26562       }
26563     }
26564   }
26565 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)26566   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
26567     for (uint32_t n = 5; n < 8; n++) {
26568       for (size_t k = 1; k <= 40; k += 9) {
26569         GemmMicrokernelTester()
26570           .mr(1)
26571           .nr(4)
26572           .kr(2)
26573           .sr(1)
26574           .m(1)
26575           .n(n)
26576           .k(k)
26577           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26578       }
26579     }
26580   }
26581 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)26582   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
26583     for (uint32_t n = 5; n < 8; n++) {
26584       for (size_t k = 1; k <= 40; k += 9) {
26585         GemmMicrokernelTester()
26586           .mr(1)
26587           .nr(4)
26588           .kr(2)
26589           .sr(1)
26590           .m(1)
26591           .n(n)
26592           .k(k)
26593           .cn_stride(7)
26594           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26595       }
26596     }
26597   }
26598 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)26599   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
26600     for (uint32_t n = 5; n < 8; n++) {
26601       for (size_t k = 1; k <= 40; k += 9) {
26602         for (uint32_t m = 1; m <= 1; m++) {
26603           GemmMicrokernelTester()
26604             .mr(1)
26605             .nr(4)
26606             .kr(2)
26607             .sr(1)
26608             .m(m)
26609             .n(n)
26610             .k(k)
26611             .iterations(1)
26612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26613         }
26614       }
26615     }
26616   }
26617 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)26618   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
26619     for (uint32_t n = 8; n <= 12; n += 4) {
26620       for (size_t k = 1; k <= 40; k += 9) {
26621         GemmMicrokernelTester()
26622           .mr(1)
26623           .nr(4)
26624           .kr(2)
26625           .sr(1)
26626           .m(1)
26627           .n(n)
26628           .k(k)
26629           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26630       }
26631     }
26632   }
26633 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)26634   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
26635     for (uint32_t n = 8; n <= 12; n += 4) {
26636       for (size_t k = 1; k <= 40; k += 9) {
26637         GemmMicrokernelTester()
26638           .mr(1)
26639           .nr(4)
26640           .kr(2)
26641           .sr(1)
26642           .m(1)
26643           .n(n)
26644           .k(k)
26645           .cn_stride(7)
26646           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26647       }
26648     }
26649   }
26650 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)26651   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
26652     for (uint32_t n = 8; n <= 12; n += 4) {
26653       for (size_t k = 1; k <= 40; k += 9) {
26654         for (uint32_t m = 1; m <= 1; m++) {
26655           GemmMicrokernelTester()
26656             .mr(1)
26657             .nr(4)
26658             .kr(2)
26659             .sr(1)
26660             .m(m)
26661             .n(n)
26662             .k(k)
26663             .iterations(1)
26664             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26665         }
26666       }
26667     }
26668   }
26669 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)26670   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
26671     for (size_t k = 1; k <= 40; k += 9) {
26672       GemmMicrokernelTester()
26673         .mr(1)
26674         .nr(4)
26675         .kr(2)
26676         .sr(1)
26677         .m(1)
26678         .n(4)
26679         .k(k)
26680         .ks(3)
26681         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26682     }
26683   }
26684 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)26685   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
26686     for (size_t k = 1; k <= 40; k += 9) {
26687       for (uint32_t n = 1; n <= 4; n++) {
26688         for (uint32_t m = 1; m <= 1; m++) {
26689           GemmMicrokernelTester()
26690             .mr(1)
26691             .nr(4)
26692             .kr(2)
26693             .sr(1)
26694             .m(m)
26695             .n(n)
26696             .k(k)
26697             .ks(3)
26698             .iterations(1)
26699             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26700         }
26701       }
26702     }
26703   }
26704 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)26705   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
26706     for (uint32_t n = 5; n < 8; n++) {
26707       for (size_t k = 1; k <= 40; k += 9) {
26708         GemmMicrokernelTester()
26709           .mr(1)
26710           .nr(4)
26711           .kr(2)
26712           .sr(1)
26713           .m(1)
26714           .n(n)
26715           .k(k)
26716           .ks(3)
26717           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26718       }
26719     }
26720   }
26721 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)26722   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
26723     for (uint32_t n = 8; n <= 12; n += 4) {
26724       for (size_t k = 1; k <= 40; k += 9) {
26725         GemmMicrokernelTester()
26726           .mr(1)
26727           .nr(4)
26728           .kr(2)
26729           .sr(1)
26730           .m(1)
26731           .n(n)
26732           .k(k)
26733           .ks(3)
26734           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26735       }
26736     }
26737   }
26738 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)26739   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
26740     for (size_t k = 1; k <= 40; k += 9) {
26741       for (uint32_t n = 1; n <= 4; n++) {
26742         for (uint32_t m = 1; m <= 1; m++) {
26743           GemmMicrokernelTester()
26744             .mr(1)
26745             .nr(4)
26746             .kr(2)
26747             .sr(1)
26748             .m(m)
26749             .n(n)
26750             .k(k)
26751             .cm_stride(7)
26752             .iterations(1)
26753             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26754         }
26755       }
26756     }
26757   }
26758 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,a_offset)26759   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
26760     for (size_t k = 1; k <= 40; k += 9) {
26761       GemmMicrokernelTester()
26762         .mr(1)
26763         .nr(4)
26764         .kr(2)
26765         .sr(1)
26766         .m(1)
26767         .n(4)
26768         .k(k)
26769         .ks(3)
26770         .a_offset(43)
26771         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26772     }
26773   }
26774 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,zero)26775   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
26776     for (size_t k = 1; k <= 40; k += 9) {
26777       for (uint32_t mz = 0; mz < 1; mz++) {
26778         GemmMicrokernelTester()
26779           .mr(1)
26780           .nr(4)
26781           .kr(2)
26782           .sr(1)
26783           .m(1)
26784           .n(4)
26785           .k(k)
26786           .ks(3)
26787           .a_offset(43)
26788           .zero_index(mz)
26789           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26790       }
26791     }
26792   }
26793 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)26794   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
26795     GemmMicrokernelTester()
26796       .mr(1)
26797       .nr(4)
26798       .kr(2)
26799       .sr(1)
26800       .m(1)
26801       .n(4)
26802       .k(8)
26803       .qmin(128)
26804       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26805   }
26806 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)26807   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
26808     GemmMicrokernelTester()
26809       .mr(1)
26810       .nr(4)
26811       .kr(2)
26812       .sr(1)
26813       .m(1)
26814       .n(4)
26815       .k(8)
26816       .qmax(128)
26817       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26818   }
26819 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)26820   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
26821     GemmMicrokernelTester()
26822       .mr(1)
26823       .nr(4)
26824       .kr(2)
26825       .sr(1)
26826       .m(1)
26827       .n(4)
26828       .k(8)
26829       .cm_stride(7)
26830       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26831   }
26832 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)26833   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
26834     for (size_t k = 1; k <= 40; k += 9) {
26835       GemmMicrokernelTester()
26836         .mr(1)
26837         .nr(4)
26838         .kr(2)
26839         .sr(1)
26840         .m(1)
26841         .n(4)
26842         .k(k)
26843         .a_zero_point(0)
26844         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26845     }
26846   }
26847 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)26848   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
26849     for (size_t k = 1; k <= 40; k += 9) {
26850       GemmMicrokernelTester()
26851         .mr(1)
26852         .nr(4)
26853         .kr(2)
26854         .sr(1)
26855         .m(1)
26856         .n(4)
26857         .k(k)
26858         .b_zero_point(0)
26859         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26860     }
26861   }
26862 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)26863   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
26864     for (size_t k = 1; k <= 40; k += 9) {
26865       GemmMicrokernelTester()
26866         .mr(1)
26867         .nr(4)
26868         .kr(2)
26869         .sr(1)
26870         .m(1)
26871         .n(4)
26872         .k(k)
26873         .a_zero_point(0)
26874         .b_zero_point(0)
26875         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26876     }
26877   }
26878 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26879 
26880 
26881 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)26882   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26883     GemmMicrokernelTester()
26884       .mr(1)
26885       .nr(4)
26886       .kr(2)
26887       .sr(4)
26888       .m(1)
26889       .n(4)
26890       .k(8)
26891       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26892   }
26893 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)26894   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
26895     GemmMicrokernelTester()
26896       .mr(1)
26897       .nr(4)
26898       .kr(2)
26899       .sr(4)
26900       .m(1)
26901       .n(4)
26902       .k(8)
26903       .cn_stride(7)
26904       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26905   }
26906 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)26907   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
26908     for (uint32_t n = 1; n <= 4; n++) {
26909       for (uint32_t m = 1; m <= 1; m++) {
26910         GemmMicrokernelTester()
26911           .mr(1)
26912           .nr(4)
26913           .kr(2)
26914           .sr(4)
26915           .m(m)
26916           .n(n)
26917           .k(8)
26918           .iterations(1)
26919           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26920       }
26921     }
26922   }
26923 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)26924   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26925     for (uint32_t m = 1; m <= 1; m++) {
26926       GemmMicrokernelTester()
26927         .mr(1)
26928         .nr(4)
26929         .kr(2)
26930         .sr(4)
26931         .m(m)
26932         .n(4)
26933         .k(8)
26934         .iterations(1)
26935         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26936     }
26937   }
26938 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)26939   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26940     for (uint32_t n = 1; n <= 4; n++) {
26941       GemmMicrokernelTester()
26942         .mr(1)
26943         .nr(4)
26944         .kr(2)
26945         .sr(4)
26946         .m(1)
26947         .n(n)
26948         .k(8)
26949         .iterations(1)
26950         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26951     }
26952   }
26953 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)26954   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26955     for (size_t k = 1; k < 8; k++) {
26956       GemmMicrokernelTester()
26957         .mr(1)
26958         .nr(4)
26959         .kr(2)
26960         .sr(4)
26961         .m(1)
26962         .n(4)
26963         .k(k)
26964         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26965     }
26966   }
26967 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)26968   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26969     for (size_t k = 1; k < 8; k++) {
26970       for (uint32_t n = 1; n <= 4; n++) {
26971         for (uint32_t m = 1; m <= 1; m++) {
26972           GemmMicrokernelTester()
26973             .mr(1)
26974             .nr(4)
26975             .kr(2)
26976             .sr(4)
26977             .m(m)
26978             .n(n)
26979             .k(k)
26980             .iterations(1)
26981             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26982         }
26983       }
26984     }
26985   }
26986 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)26987   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26988     for (size_t k = 9; k < 16; k++) {
26989       GemmMicrokernelTester()
26990         .mr(1)
26991         .nr(4)
26992         .kr(2)
26993         .sr(4)
26994         .m(1)
26995         .n(4)
26996         .k(k)
26997         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
26998     }
26999   }
27000 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)27001   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
27002     for (size_t k = 9; k < 16; k++) {
27003       for (uint32_t n = 1; n <= 4; n++) {
27004         for (uint32_t m = 1; m <= 1; m++) {
27005           GemmMicrokernelTester()
27006             .mr(1)
27007             .nr(4)
27008             .kr(2)
27009             .sr(4)
27010             .m(m)
27011             .n(n)
27012             .k(k)
27013             .iterations(1)
27014             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27015         }
27016       }
27017     }
27018   }
27019 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)27020   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
27021     for (size_t k = 16; k <= 80; k += 8) {
27022       GemmMicrokernelTester()
27023         .mr(1)
27024         .nr(4)
27025         .kr(2)
27026         .sr(4)
27027         .m(1)
27028         .n(4)
27029         .k(k)
27030         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27031     }
27032   }
27033 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)27034   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
27035     for (size_t k = 16; k <= 80; k += 8) {
27036       for (uint32_t n = 1; n <= 4; n++) {
27037         for (uint32_t m = 1; m <= 1; m++) {
27038           GemmMicrokernelTester()
27039             .mr(1)
27040             .nr(4)
27041             .kr(2)
27042             .sr(4)
27043             .m(m)
27044             .n(n)
27045             .k(k)
27046             .iterations(1)
27047             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27048         }
27049       }
27050     }
27051   }
27052 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)27053   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
27054     for (uint32_t n = 5; n < 8; n++) {
27055       for (size_t k = 1; k <= 40; k += 9) {
27056         GemmMicrokernelTester()
27057           .mr(1)
27058           .nr(4)
27059           .kr(2)
27060           .sr(4)
27061           .m(1)
27062           .n(n)
27063           .k(k)
27064           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27065       }
27066     }
27067   }
27068 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)27069   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
27070     for (uint32_t n = 5; n < 8; n++) {
27071       for (size_t k = 1; k <= 40; k += 9) {
27072         GemmMicrokernelTester()
27073           .mr(1)
27074           .nr(4)
27075           .kr(2)
27076           .sr(4)
27077           .m(1)
27078           .n(n)
27079           .k(k)
27080           .cn_stride(7)
27081           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27082       }
27083     }
27084   }
27085 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)27086   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
27087     for (uint32_t n = 5; n < 8; n++) {
27088       for (size_t k = 1; k <= 40; k += 9) {
27089         for (uint32_t m = 1; m <= 1; m++) {
27090           GemmMicrokernelTester()
27091             .mr(1)
27092             .nr(4)
27093             .kr(2)
27094             .sr(4)
27095             .m(m)
27096             .n(n)
27097             .k(k)
27098             .iterations(1)
27099             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27100         }
27101       }
27102     }
27103   }
27104 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)27105   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
27106     for (uint32_t n = 8; n <= 12; n += 4) {
27107       for (size_t k = 1; k <= 40; k += 9) {
27108         GemmMicrokernelTester()
27109           .mr(1)
27110           .nr(4)
27111           .kr(2)
27112           .sr(4)
27113           .m(1)
27114           .n(n)
27115           .k(k)
27116           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27117       }
27118     }
27119   }
27120 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)27121   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
27122     for (uint32_t n = 8; n <= 12; n += 4) {
27123       for (size_t k = 1; k <= 40; k += 9) {
27124         GemmMicrokernelTester()
27125           .mr(1)
27126           .nr(4)
27127           .kr(2)
27128           .sr(4)
27129           .m(1)
27130           .n(n)
27131           .k(k)
27132           .cn_stride(7)
27133           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27134       }
27135     }
27136   }
27137 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)27138   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
27139     for (uint32_t n = 8; n <= 12; n += 4) {
27140       for (size_t k = 1; k <= 40; k += 9) {
27141         for (uint32_t m = 1; m <= 1; m++) {
27142           GemmMicrokernelTester()
27143             .mr(1)
27144             .nr(4)
27145             .kr(2)
27146             .sr(4)
27147             .m(m)
27148             .n(n)
27149             .k(k)
27150             .iterations(1)
27151             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27152         }
27153       }
27154     }
27155   }
27156 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)27157   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
27158     for (size_t k = 1; k <= 40; k += 9) {
27159       GemmMicrokernelTester()
27160         .mr(1)
27161         .nr(4)
27162         .kr(2)
27163         .sr(4)
27164         .m(1)
27165         .n(4)
27166         .k(k)
27167         .ks(3)
27168         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27169     }
27170   }
27171 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)27172   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
27173     for (size_t k = 1; k <= 40; k += 9) {
27174       for (uint32_t n = 1; n <= 4; n++) {
27175         for (uint32_t m = 1; m <= 1; m++) {
27176           GemmMicrokernelTester()
27177             .mr(1)
27178             .nr(4)
27179             .kr(2)
27180             .sr(4)
27181             .m(m)
27182             .n(n)
27183             .k(k)
27184             .ks(3)
27185             .iterations(1)
27186             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27187         }
27188       }
27189     }
27190   }
27191 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)27192   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
27193     for (uint32_t n = 5; n < 8; n++) {
27194       for (size_t k = 1; k <= 40; k += 9) {
27195         GemmMicrokernelTester()
27196           .mr(1)
27197           .nr(4)
27198           .kr(2)
27199           .sr(4)
27200           .m(1)
27201           .n(n)
27202           .k(k)
27203           .ks(3)
27204           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27205       }
27206     }
27207   }
27208 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)27209   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
27210     for (uint32_t n = 8; n <= 12; n += 4) {
27211       for (size_t k = 1; k <= 40; k += 9) {
27212         GemmMicrokernelTester()
27213           .mr(1)
27214           .nr(4)
27215           .kr(2)
27216           .sr(4)
27217           .m(1)
27218           .n(n)
27219           .k(k)
27220           .ks(3)
27221           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27222       }
27223     }
27224   }
27225 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)27226   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
27227     for (size_t k = 1; k <= 40; k += 9) {
27228       for (uint32_t n = 1; n <= 4; n++) {
27229         for (uint32_t m = 1; m <= 1; m++) {
27230           GemmMicrokernelTester()
27231             .mr(1)
27232             .nr(4)
27233             .kr(2)
27234             .sr(4)
27235             .m(m)
27236             .n(n)
27237             .k(k)
27238             .cm_stride(7)
27239             .iterations(1)
27240             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27241         }
27242       }
27243     }
27244   }
27245 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)27246   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
27247     for (size_t k = 1; k <= 40; k += 9) {
27248       GemmMicrokernelTester()
27249         .mr(1)
27250         .nr(4)
27251         .kr(2)
27252         .sr(4)
27253         .m(1)
27254         .n(4)
27255         .k(k)
27256         .ks(3)
27257         .a_offset(43)
27258         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27259     }
27260   }
27261 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,zero)27262   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
27263     for (size_t k = 1; k <= 40; k += 9) {
27264       for (uint32_t mz = 0; mz < 1; mz++) {
27265         GemmMicrokernelTester()
27266           .mr(1)
27267           .nr(4)
27268           .kr(2)
27269           .sr(4)
27270           .m(1)
27271           .n(4)
27272           .k(k)
27273           .ks(3)
27274           .a_offset(43)
27275           .zero_index(mz)
27276           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27277       }
27278     }
27279   }
27280 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)27281   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
27282     GemmMicrokernelTester()
27283       .mr(1)
27284       .nr(4)
27285       .kr(2)
27286       .sr(4)
27287       .m(1)
27288       .n(4)
27289       .k(8)
27290       .qmin(128)
27291       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27292   }
27293 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)27294   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
27295     GemmMicrokernelTester()
27296       .mr(1)
27297       .nr(4)
27298       .kr(2)
27299       .sr(4)
27300       .m(1)
27301       .n(4)
27302       .k(8)
27303       .qmax(128)
27304       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27305   }
27306 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)27307   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
27308     GemmMicrokernelTester()
27309       .mr(1)
27310       .nr(4)
27311       .kr(2)
27312       .sr(4)
27313       .m(1)
27314       .n(4)
27315       .k(8)
27316       .cm_stride(7)
27317       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27318   }
27319 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)27320   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
27321     for (size_t k = 1; k <= 40; k += 9) {
27322       GemmMicrokernelTester()
27323         .mr(1)
27324         .nr(4)
27325         .kr(2)
27326         .sr(4)
27327         .m(1)
27328         .n(4)
27329         .k(k)
27330         .a_zero_point(0)
27331         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27332     }
27333   }
27334 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)27335   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
27336     for (size_t k = 1; k <= 40; k += 9) {
27337       GemmMicrokernelTester()
27338         .mr(1)
27339         .nr(4)
27340         .kr(2)
27341         .sr(4)
27342         .m(1)
27343         .n(4)
27344         .k(k)
27345         .b_zero_point(0)
27346         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27347     }
27348   }
27349 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)27350   TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
27351     for (size_t k = 1; k <= 40; k += 9) {
27352       GemmMicrokernelTester()
27353         .mr(1)
27354         .nr(4)
27355         .kr(2)
27356         .sr(4)
27357         .m(1)
27358         .n(4)
27359         .k(k)
27360         .a_zero_point(0)
27361         .b_zero_point(0)
27362         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27363     }
27364   }
27365 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27366 
27367 
27368 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)27369   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
27370     GemmMicrokernelTester()
27371       .mr(1)
27372       .nr(4)
27373       .kr(8)
27374       .sr(1)
27375       .m(1)
27376       .n(4)
27377       .k(8)
27378       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27379   }
27380 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)27381   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
27382     GemmMicrokernelTester()
27383       .mr(1)
27384       .nr(4)
27385       .kr(8)
27386       .sr(1)
27387       .m(1)
27388       .n(4)
27389       .k(8)
27390       .cn_stride(7)
27391       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27392   }
27393 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)27394   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
27395     for (uint32_t n = 1; n <= 4; n++) {
27396       for (uint32_t m = 1; m <= 1; m++) {
27397         GemmMicrokernelTester()
27398           .mr(1)
27399           .nr(4)
27400           .kr(8)
27401           .sr(1)
27402           .m(m)
27403           .n(n)
27404           .k(8)
27405           .iterations(1)
27406           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27407       }
27408     }
27409   }
27410 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)27411   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
27412     for (uint32_t m = 1; m <= 1; m++) {
27413       GemmMicrokernelTester()
27414         .mr(1)
27415         .nr(4)
27416         .kr(8)
27417         .sr(1)
27418         .m(m)
27419         .n(4)
27420         .k(8)
27421         .iterations(1)
27422         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27423     }
27424   }
27425 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)27426   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
27427     for (uint32_t n = 1; n <= 4; n++) {
27428       GemmMicrokernelTester()
27429         .mr(1)
27430         .nr(4)
27431         .kr(8)
27432         .sr(1)
27433         .m(1)
27434         .n(n)
27435         .k(8)
27436         .iterations(1)
27437         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27438     }
27439   }
27440 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)27441   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
27442     for (size_t k = 1; k < 8; k++) {
27443       GemmMicrokernelTester()
27444         .mr(1)
27445         .nr(4)
27446         .kr(8)
27447         .sr(1)
27448         .m(1)
27449         .n(4)
27450         .k(k)
27451         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27452     }
27453   }
27454 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)27455   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
27456     for (size_t k = 1; k < 8; k++) {
27457       for (uint32_t n = 1; n <= 4; n++) {
27458         for (uint32_t m = 1; m <= 1; m++) {
27459           GemmMicrokernelTester()
27460             .mr(1)
27461             .nr(4)
27462             .kr(8)
27463             .sr(1)
27464             .m(m)
27465             .n(n)
27466             .k(k)
27467             .iterations(1)
27468             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27469         }
27470       }
27471     }
27472   }
27473 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)27474   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
27475     for (size_t k = 9; k < 16; k++) {
27476       GemmMicrokernelTester()
27477         .mr(1)
27478         .nr(4)
27479         .kr(8)
27480         .sr(1)
27481         .m(1)
27482         .n(4)
27483         .k(k)
27484         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27485     }
27486   }
27487 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)27488   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
27489     for (size_t k = 9; k < 16; k++) {
27490       for (uint32_t n = 1; n <= 4; n++) {
27491         for (uint32_t m = 1; m <= 1; m++) {
27492           GemmMicrokernelTester()
27493             .mr(1)
27494             .nr(4)
27495             .kr(8)
27496             .sr(1)
27497             .m(m)
27498             .n(n)
27499             .k(k)
27500             .iterations(1)
27501             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27502         }
27503       }
27504     }
27505   }
27506 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)27507   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
27508     for (size_t k = 16; k <= 80; k += 8) {
27509       GemmMicrokernelTester()
27510         .mr(1)
27511         .nr(4)
27512         .kr(8)
27513         .sr(1)
27514         .m(1)
27515         .n(4)
27516         .k(k)
27517         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27518     }
27519   }
27520 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)27521   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
27522     for (size_t k = 16; k <= 80; k += 8) {
27523       for (uint32_t n = 1; n <= 4; n++) {
27524         for (uint32_t m = 1; m <= 1; m++) {
27525           GemmMicrokernelTester()
27526             .mr(1)
27527             .nr(4)
27528             .kr(8)
27529             .sr(1)
27530             .m(m)
27531             .n(n)
27532             .k(k)
27533             .iterations(1)
27534             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27535         }
27536       }
27537     }
27538   }
27539 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)27540   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
27541     for (uint32_t n = 5; n < 8; n++) {
27542       for (size_t k = 1; k <= 40; k += 9) {
27543         GemmMicrokernelTester()
27544           .mr(1)
27545           .nr(4)
27546           .kr(8)
27547           .sr(1)
27548           .m(1)
27549           .n(n)
27550           .k(k)
27551           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27552       }
27553     }
27554   }
27555 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)27556   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
27557     for (uint32_t n = 5; n < 8; n++) {
27558       for (size_t k = 1; k <= 40; k += 9) {
27559         GemmMicrokernelTester()
27560           .mr(1)
27561           .nr(4)
27562           .kr(8)
27563           .sr(1)
27564           .m(1)
27565           .n(n)
27566           .k(k)
27567           .cn_stride(7)
27568           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27569       }
27570     }
27571   }
27572 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)27573   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
27574     for (uint32_t n = 5; n < 8; n++) {
27575       for (size_t k = 1; k <= 40; k += 9) {
27576         for (uint32_t m = 1; m <= 1; m++) {
27577           GemmMicrokernelTester()
27578             .mr(1)
27579             .nr(4)
27580             .kr(8)
27581             .sr(1)
27582             .m(m)
27583             .n(n)
27584             .k(k)
27585             .iterations(1)
27586             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27587         }
27588       }
27589     }
27590   }
27591 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)27592   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
27593     for (uint32_t n = 8; n <= 12; n += 4) {
27594       for (size_t k = 1; k <= 40; k += 9) {
27595         GemmMicrokernelTester()
27596           .mr(1)
27597           .nr(4)
27598           .kr(8)
27599           .sr(1)
27600           .m(1)
27601           .n(n)
27602           .k(k)
27603           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27604       }
27605     }
27606   }
27607 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)27608   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
27609     for (uint32_t n = 8; n <= 12; n += 4) {
27610       for (size_t k = 1; k <= 40; k += 9) {
27611         GemmMicrokernelTester()
27612           .mr(1)
27613           .nr(4)
27614           .kr(8)
27615           .sr(1)
27616           .m(1)
27617           .n(n)
27618           .k(k)
27619           .cn_stride(7)
27620           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27621       }
27622     }
27623   }
27624 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)27625   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
27626     for (uint32_t n = 8; n <= 12; n += 4) {
27627       for (size_t k = 1; k <= 40; k += 9) {
27628         for (uint32_t m = 1; m <= 1; m++) {
27629           GemmMicrokernelTester()
27630             .mr(1)
27631             .nr(4)
27632             .kr(8)
27633             .sr(1)
27634             .m(m)
27635             .n(n)
27636             .k(k)
27637             .iterations(1)
27638             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27639         }
27640       }
27641     }
27642   }
27643 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)27644   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
27645     for (size_t k = 1; k <= 40; k += 9) {
27646       GemmMicrokernelTester()
27647         .mr(1)
27648         .nr(4)
27649         .kr(8)
27650         .sr(1)
27651         .m(1)
27652         .n(4)
27653         .k(k)
27654         .ks(3)
27655         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27656     }
27657   }
27658 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)27659   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
27660     for (size_t k = 1; k <= 40; k += 9) {
27661       for (uint32_t n = 1; n <= 4; n++) {
27662         for (uint32_t m = 1; m <= 1; m++) {
27663           GemmMicrokernelTester()
27664             .mr(1)
27665             .nr(4)
27666             .kr(8)
27667             .sr(1)
27668             .m(m)
27669             .n(n)
27670             .k(k)
27671             .ks(3)
27672             .iterations(1)
27673             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27674         }
27675       }
27676     }
27677   }
27678 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)27679   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
27680     for (uint32_t n = 5; n < 8; n++) {
27681       for (size_t k = 1; k <= 40; k += 9) {
27682         GemmMicrokernelTester()
27683           .mr(1)
27684           .nr(4)
27685           .kr(8)
27686           .sr(1)
27687           .m(1)
27688           .n(n)
27689           .k(k)
27690           .ks(3)
27691           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27692       }
27693     }
27694   }
27695 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)27696   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
27697     for (uint32_t n = 8; n <= 12; n += 4) {
27698       for (size_t k = 1; k <= 40; k += 9) {
27699         GemmMicrokernelTester()
27700           .mr(1)
27701           .nr(4)
27702           .kr(8)
27703           .sr(1)
27704           .m(1)
27705           .n(n)
27706           .k(k)
27707           .ks(3)
27708           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27709       }
27710     }
27711   }
27712 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)27713   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
27714     for (size_t k = 1; k <= 40; k += 9) {
27715       for (uint32_t n = 1; n <= 4; n++) {
27716         for (uint32_t m = 1; m <= 1; m++) {
27717           GemmMicrokernelTester()
27718             .mr(1)
27719             .nr(4)
27720             .kr(8)
27721             .sr(1)
27722             .m(m)
27723             .n(n)
27724             .k(k)
27725             .cm_stride(7)
27726             .iterations(1)
27727             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27728         }
27729       }
27730     }
27731   }
27732 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,a_offset)27733   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
27734     for (size_t k = 1; k <= 40; k += 9) {
27735       GemmMicrokernelTester()
27736         .mr(1)
27737         .nr(4)
27738         .kr(8)
27739         .sr(1)
27740         .m(1)
27741         .n(4)
27742         .k(k)
27743         .ks(3)
27744         .a_offset(43)
27745         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27746     }
27747   }
27748 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,zero)27749   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, zero) {
27750     for (size_t k = 1; k <= 40; k += 9) {
27751       for (uint32_t mz = 0; mz < 1; mz++) {
27752         GemmMicrokernelTester()
27753           .mr(1)
27754           .nr(4)
27755           .kr(8)
27756           .sr(1)
27757           .m(1)
27758           .n(4)
27759           .k(k)
27760           .ks(3)
27761           .a_offset(43)
27762           .zero_index(mz)
27763           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27764       }
27765     }
27766   }
27767 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)27768   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
27769     GemmMicrokernelTester()
27770       .mr(1)
27771       .nr(4)
27772       .kr(8)
27773       .sr(1)
27774       .m(1)
27775       .n(4)
27776       .k(8)
27777       .qmin(128)
27778       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27779   }
27780 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)27781   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
27782     GemmMicrokernelTester()
27783       .mr(1)
27784       .nr(4)
27785       .kr(8)
27786       .sr(1)
27787       .m(1)
27788       .n(4)
27789       .k(8)
27790       .qmax(128)
27791       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27792   }
27793 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)27794   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
27795     GemmMicrokernelTester()
27796       .mr(1)
27797       .nr(4)
27798       .kr(8)
27799       .sr(1)
27800       .m(1)
27801       .n(4)
27802       .k(8)
27803       .cm_stride(7)
27804       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27805   }
27806 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)27807   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
27808     for (size_t k = 1; k <= 40; k += 9) {
27809       GemmMicrokernelTester()
27810         .mr(1)
27811         .nr(4)
27812         .kr(8)
27813         .sr(1)
27814         .m(1)
27815         .n(4)
27816         .k(k)
27817         .a_zero_point(0)
27818         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27819     }
27820   }
27821 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)27822   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
27823     for (size_t k = 1; k <= 40; k += 9) {
27824       GemmMicrokernelTester()
27825         .mr(1)
27826         .nr(4)
27827         .kr(8)
27828         .sr(1)
27829         .m(1)
27830         .n(4)
27831         .k(k)
27832         .b_zero_point(0)
27833         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27834     }
27835   }
27836 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)27837   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
27838     for (size_t k = 1; k <= 40; k += 9) {
27839       GemmMicrokernelTester()
27840         .mr(1)
27841         .nr(4)
27842         .kr(8)
27843         .sr(1)
27844         .m(1)
27845         .n(4)
27846         .k(k)
27847         .a_zero_point(0)
27848         .b_zero_point(0)
27849         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27850     }
27851   }
27852 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27853 
27854 
27855 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)27856   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
27857     GemmMicrokernelTester()
27858       .mr(1)
27859       .nr(4)
27860       .kr(8)
27861       .sr(1)
27862       .m(1)
27863       .n(4)
27864       .k(8)
27865       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27866   }
27867 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)27868   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
27869     GemmMicrokernelTester()
27870       .mr(1)
27871       .nr(4)
27872       .kr(8)
27873       .sr(1)
27874       .m(1)
27875       .n(4)
27876       .k(8)
27877       .cn_stride(7)
27878       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27879   }
27880 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)27881   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
27882     for (uint32_t n = 1; n <= 4; n++) {
27883       for (uint32_t m = 1; m <= 1; m++) {
27884         GemmMicrokernelTester()
27885           .mr(1)
27886           .nr(4)
27887           .kr(8)
27888           .sr(1)
27889           .m(m)
27890           .n(n)
27891           .k(8)
27892           .iterations(1)
27893           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27894       }
27895     }
27896   }
27897 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)27898   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
27899     for (uint32_t m = 1; m <= 1; m++) {
27900       GemmMicrokernelTester()
27901         .mr(1)
27902         .nr(4)
27903         .kr(8)
27904         .sr(1)
27905         .m(m)
27906         .n(4)
27907         .k(8)
27908         .iterations(1)
27909         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27910     }
27911   }
27912 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)27913   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
27914     for (uint32_t n = 1; n <= 4; n++) {
27915       GemmMicrokernelTester()
27916         .mr(1)
27917         .nr(4)
27918         .kr(8)
27919         .sr(1)
27920         .m(1)
27921         .n(n)
27922         .k(8)
27923         .iterations(1)
27924         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27925     }
27926   }
27927 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)27928   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
27929     for (size_t k = 1; k < 8; k++) {
27930       GemmMicrokernelTester()
27931         .mr(1)
27932         .nr(4)
27933         .kr(8)
27934         .sr(1)
27935         .m(1)
27936         .n(4)
27937         .k(k)
27938         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27939     }
27940   }
27941 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)27942   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
27943     for (size_t k = 1; k < 8; k++) {
27944       for (uint32_t n = 1; n <= 4; n++) {
27945         for (uint32_t m = 1; m <= 1; m++) {
27946           GemmMicrokernelTester()
27947             .mr(1)
27948             .nr(4)
27949             .kr(8)
27950             .sr(1)
27951             .m(m)
27952             .n(n)
27953             .k(k)
27954             .iterations(1)
27955             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27956         }
27957       }
27958     }
27959   }
27960 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)27961   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
27962     for (size_t k = 9; k < 16; k++) {
27963       GemmMicrokernelTester()
27964         .mr(1)
27965         .nr(4)
27966         .kr(8)
27967         .sr(1)
27968         .m(1)
27969         .n(4)
27970         .k(k)
27971         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27972     }
27973   }
27974 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)27975   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
27976     for (size_t k = 9; k < 16; k++) {
27977       for (uint32_t n = 1; n <= 4; n++) {
27978         for (uint32_t m = 1; m <= 1; m++) {
27979           GemmMicrokernelTester()
27980             .mr(1)
27981             .nr(4)
27982             .kr(8)
27983             .sr(1)
27984             .m(m)
27985             .n(n)
27986             .k(k)
27987             .iterations(1)
27988             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
27989         }
27990       }
27991     }
27992   }
27993 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)27994   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
27995     for (size_t k = 16; k <= 80; k += 8) {
27996       GemmMicrokernelTester()
27997         .mr(1)
27998         .nr(4)
27999         .kr(8)
28000         .sr(1)
28001         .m(1)
28002         .n(4)
28003         .k(k)
28004         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28005     }
28006   }
28007 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)28008   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
28009     for (size_t k = 16; k <= 80; k += 8) {
28010       for (uint32_t n = 1; n <= 4; n++) {
28011         for (uint32_t m = 1; m <= 1; m++) {
28012           GemmMicrokernelTester()
28013             .mr(1)
28014             .nr(4)
28015             .kr(8)
28016             .sr(1)
28017             .m(m)
28018             .n(n)
28019             .k(k)
28020             .iterations(1)
28021             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28022         }
28023       }
28024     }
28025   }
28026 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)28027   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
28028     for (uint32_t n = 5; n < 8; n++) {
28029       for (size_t k = 1; k <= 40; k += 9) {
28030         GemmMicrokernelTester()
28031           .mr(1)
28032           .nr(4)
28033           .kr(8)
28034           .sr(1)
28035           .m(1)
28036           .n(n)
28037           .k(k)
28038           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28039       }
28040     }
28041   }
28042 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)28043   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
28044     for (uint32_t n = 5; n < 8; n++) {
28045       for (size_t k = 1; k <= 40; k += 9) {
28046         GemmMicrokernelTester()
28047           .mr(1)
28048           .nr(4)
28049           .kr(8)
28050           .sr(1)
28051           .m(1)
28052           .n(n)
28053           .k(k)
28054           .cn_stride(7)
28055           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28056       }
28057     }
28058   }
28059 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)28060   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
28061     for (uint32_t n = 5; n < 8; n++) {
28062       for (size_t k = 1; k <= 40; k += 9) {
28063         for (uint32_t m = 1; m <= 1; m++) {
28064           GemmMicrokernelTester()
28065             .mr(1)
28066             .nr(4)
28067             .kr(8)
28068             .sr(1)
28069             .m(m)
28070             .n(n)
28071             .k(k)
28072             .iterations(1)
28073             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28074         }
28075       }
28076     }
28077   }
28078 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)28079   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
28080     for (uint32_t n = 8; n <= 12; n += 4) {
28081       for (size_t k = 1; k <= 40; k += 9) {
28082         GemmMicrokernelTester()
28083           .mr(1)
28084           .nr(4)
28085           .kr(8)
28086           .sr(1)
28087           .m(1)
28088           .n(n)
28089           .k(k)
28090           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28091       }
28092     }
28093   }
28094 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)28095   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
28096     for (uint32_t n = 8; n <= 12; n += 4) {
28097       for (size_t k = 1; k <= 40; k += 9) {
28098         GemmMicrokernelTester()
28099           .mr(1)
28100           .nr(4)
28101           .kr(8)
28102           .sr(1)
28103           .m(1)
28104           .n(n)
28105           .k(k)
28106           .cn_stride(7)
28107           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28108       }
28109     }
28110   }
28111 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)28112   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
28113     for (uint32_t n = 8; n <= 12; n += 4) {
28114       for (size_t k = 1; k <= 40; k += 9) {
28115         for (uint32_t m = 1; m <= 1; m++) {
28116           GemmMicrokernelTester()
28117             .mr(1)
28118             .nr(4)
28119             .kr(8)
28120             .sr(1)
28121             .m(m)
28122             .n(n)
28123             .k(k)
28124             .iterations(1)
28125             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28126         }
28127       }
28128     }
28129   }
28130 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)28131   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
28132     for (size_t k = 1; k <= 40; k += 9) {
28133       GemmMicrokernelTester()
28134         .mr(1)
28135         .nr(4)
28136         .kr(8)
28137         .sr(1)
28138         .m(1)
28139         .n(4)
28140         .k(k)
28141         .ks(3)
28142         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28143     }
28144   }
28145 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)28146   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
28147     for (size_t k = 1; k <= 40; k += 9) {
28148       for (uint32_t n = 1; n <= 4; n++) {
28149         for (uint32_t m = 1; m <= 1; m++) {
28150           GemmMicrokernelTester()
28151             .mr(1)
28152             .nr(4)
28153             .kr(8)
28154             .sr(1)
28155             .m(m)
28156             .n(n)
28157             .k(k)
28158             .ks(3)
28159             .iterations(1)
28160             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28161         }
28162       }
28163     }
28164   }
28165 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)28166   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
28167     for (uint32_t n = 5; n < 8; n++) {
28168       for (size_t k = 1; k <= 40; k += 9) {
28169         GemmMicrokernelTester()
28170           .mr(1)
28171           .nr(4)
28172           .kr(8)
28173           .sr(1)
28174           .m(1)
28175           .n(n)
28176           .k(k)
28177           .ks(3)
28178           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28179       }
28180     }
28181   }
28182 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)28183   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
28184     for (uint32_t n = 8; n <= 12; n += 4) {
28185       for (size_t k = 1; k <= 40; k += 9) {
28186         GemmMicrokernelTester()
28187           .mr(1)
28188           .nr(4)
28189           .kr(8)
28190           .sr(1)
28191           .m(1)
28192           .n(n)
28193           .k(k)
28194           .ks(3)
28195           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28196       }
28197     }
28198   }
28199 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)28200   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
28201     for (size_t k = 1; k <= 40; k += 9) {
28202       for (uint32_t n = 1; n <= 4; n++) {
28203         for (uint32_t m = 1; m <= 1; m++) {
28204           GemmMicrokernelTester()
28205             .mr(1)
28206             .nr(4)
28207             .kr(8)
28208             .sr(1)
28209             .m(m)
28210             .n(n)
28211             .k(k)
28212             .cm_stride(7)
28213             .iterations(1)
28214             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28215         }
28216       }
28217     }
28218   }
28219 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,a_offset)28220   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
28221     for (size_t k = 1; k <= 40; k += 9) {
28222       GemmMicrokernelTester()
28223         .mr(1)
28224         .nr(4)
28225         .kr(8)
28226         .sr(1)
28227         .m(1)
28228         .n(4)
28229         .k(k)
28230         .ks(3)
28231         .a_offset(43)
28232         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28233     }
28234   }
28235 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,zero)28236   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
28237     for (size_t k = 1; k <= 40; k += 9) {
28238       for (uint32_t mz = 0; mz < 1; mz++) {
28239         GemmMicrokernelTester()
28240           .mr(1)
28241           .nr(4)
28242           .kr(8)
28243           .sr(1)
28244           .m(1)
28245           .n(4)
28246           .k(k)
28247           .ks(3)
28248           .a_offset(43)
28249           .zero_index(mz)
28250           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28251       }
28252     }
28253   }
28254 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)28255   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
28256     GemmMicrokernelTester()
28257       .mr(1)
28258       .nr(4)
28259       .kr(8)
28260       .sr(1)
28261       .m(1)
28262       .n(4)
28263       .k(8)
28264       .qmin(128)
28265       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28266   }
28267 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)28268   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
28269     GemmMicrokernelTester()
28270       .mr(1)
28271       .nr(4)
28272       .kr(8)
28273       .sr(1)
28274       .m(1)
28275       .n(4)
28276       .k(8)
28277       .qmax(128)
28278       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28279   }
28280 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)28281   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
28282     GemmMicrokernelTester()
28283       .mr(1)
28284       .nr(4)
28285       .kr(8)
28286       .sr(1)
28287       .m(1)
28288       .n(4)
28289       .k(8)
28290       .cm_stride(7)
28291       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28292   }
28293 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)28294   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
28295     for (size_t k = 1; k <= 40; k += 9) {
28296       GemmMicrokernelTester()
28297         .mr(1)
28298         .nr(4)
28299         .kr(8)
28300         .sr(1)
28301         .m(1)
28302         .n(4)
28303         .k(k)
28304         .a_zero_point(0)
28305         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28306     }
28307   }
28308 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)28309   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
28310     for (size_t k = 1; k <= 40; k += 9) {
28311       GemmMicrokernelTester()
28312         .mr(1)
28313         .nr(4)
28314         .kr(8)
28315         .sr(1)
28316         .m(1)
28317         .n(4)
28318         .k(k)
28319         .b_zero_point(0)
28320         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28321     }
28322   }
28323 
TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)28324   TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
28325     for (size_t k = 1; k <= 40; k += 9) {
28326       GemmMicrokernelTester()
28327         .mr(1)
28328         .nr(4)
28329         .kr(8)
28330         .sr(1)
28331         .m(1)
28332         .n(4)
28333         .k(k)
28334         .a_zero_point(0)
28335         .b_zero_point(0)
28336         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28337     }
28338   }
28339 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28340 
28341 
28342 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)28343   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
28344     GemmMicrokernelTester()
28345       .mr(2)
28346       .nr(4)
28347       .kr(2)
28348       .sr(1)
28349       .m(2)
28350       .n(4)
28351       .k(8)
28352       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28353   }
28354 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)28355   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
28356     GemmMicrokernelTester()
28357       .mr(2)
28358       .nr(4)
28359       .kr(2)
28360       .sr(1)
28361       .m(2)
28362       .n(4)
28363       .k(8)
28364       .cn_stride(7)
28365       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28366   }
28367 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)28368   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
28369     for (uint32_t n = 1; n <= 4; n++) {
28370       for (uint32_t m = 1; m <= 2; m++) {
28371         GemmMicrokernelTester()
28372           .mr(2)
28373           .nr(4)
28374           .kr(2)
28375           .sr(1)
28376           .m(m)
28377           .n(n)
28378           .k(8)
28379           .iterations(1)
28380           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28381       }
28382     }
28383   }
28384 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)28385   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
28386     for (uint32_t m = 1; m <= 2; m++) {
28387       GemmMicrokernelTester()
28388         .mr(2)
28389         .nr(4)
28390         .kr(2)
28391         .sr(1)
28392         .m(m)
28393         .n(4)
28394         .k(8)
28395         .iterations(1)
28396         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28397     }
28398   }
28399 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)28400   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
28401     for (uint32_t n = 1; n <= 4; n++) {
28402       GemmMicrokernelTester()
28403         .mr(2)
28404         .nr(4)
28405         .kr(2)
28406         .sr(1)
28407         .m(2)
28408         .n(n)
28409         .k(8)
28410         .iterations(1)
28411         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28412     }
28413   }
28414 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)28415   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
28416     for (size_t k = 1; k < 8; k++) {
28417       GemmMicrokernelTester()
28418         .mr(2)
28419         .nr(4)
28420         .kr(2)
28421         .sr(1)
28422         .m(2)
28423         .n(4)
28424         .k(k)
28425         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28426     }
28427   }
28428 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)28429   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
28430     for (size_t k = 1; k < 8; k++) {
28431       for (uint32_t n = 1; n <= 4; n++) {
28432         for (uint32_t m = 1; m <= 2; m++) {
28433           GemmMicrokernelTester()
28434             .mr(2)
28435             .nr(4)
28436             .kr(2)
28437             .sr(1)
28438             .m(m)
28439             .n(n)
28440             .k(k)
28441             .iterations(1)
28442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28443         }
28444       }
28445     }
28446   }
28447 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)28448   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
28449     for (size_t k = 9; k < 16; k++) {
28450       GemmMicrokernelTester()
28451         .mr(2)
28452         .nr(4)
28453         .kr(2)
28454         .sr(1)
28455         .m(2)
28456         .n(4)
28457         .k(k)
28458         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28459     }
28460   }
28461 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)28462   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
28463     for (size_t k = 9; k < 16; k++) {
28464       for (uint32_t n = 1; n <= 4; n++) {
28465         for (uint32_t m = 1; m <= 2; m++) {
28466           GemmMicrokernelTester()
28467             .mr(2)
28468             .nr(4)
28469             .kr(2)
28470             .sr(1)
28471             .m(m)
28472             .n(n)
28473             .k(k)
28474             .iterations(1)
28475             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28476         }
28477       }
28478     }
28479   }
28480 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)28481   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
28482     for (size_t k = 16; k <= 80; k += 8) {
28483       GemmMicrokernelTester()
28484         .mr(2)
28485         .nr(4)
28486         .kr(2)
28487         .sr(1)
28488         .m(2)
28489         .n(4)
28490         .k(k)
28491         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28492     }
28493   }
28494 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)28495   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
28496     for (size_t k = 16; k <= 80; k += 8) {
28497       for (uint32_t n = 1; n <= 4; n++) {
28498         for (uint32_t m = 1; m <= 2; m++) {
28499           GemmMicrokernelTester()
28500             .mr(2)
28501             .nr(4)
28502             .kr(2)
28503             .sr(1)
28504             .m(m)
28505             .n(n)
28506             .k(k)
28507             .iterations(1)
28508             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28509         }
28510       }
28511     }
28512   }
28513 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)28514   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
28515     for (uint32_t n = 5; n < 8; n++) {
28516       for (size_t k = 1; k <= 40; k += 9) {
28517         GemmMicrokernelTester()
28518           .mr(2)
28519           .nr(4)
28520           .kr(2)
28521           .sr(1)
28522           .m(2)
28523           .n(n)
28524           .k(k)
28525           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28526       }
28527     }
28528   }
28529 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)28530   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
28531     for (uint32_t n = 5; n < 8; n++) {
28532       for (size_t k = 1; k <= 40; k += 9) {
28533         GemmMicrokernelTester()
28534           .mr(2)
28535           .nr(4)
28536           .kr(2)
28537           .sr(1)
28538           .m(2)
28539           .n(n)
28540           .k(k)
28541           .cn_stride(7)
28542           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28543       }
28544     }
28545   }
28546 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)28547   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
28548     for (uint32_t n = 5; n < 8; n++) {
28549       for (size_t k = 1; k <= 40; k += 9) {
28550         for (uint32_t m = 1; m <= 2; m++) {
28551           GemmMicrokernelTester()
28552             .mr(2)
28553             .nr(4)
28554             .kr(2)
28555             .sr(1)
28556             .m(m)
28557             .n(n)
28558             .k(k)
28559             .iterations(1)
28560             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28561         }
28562       }
28563     }
28564   }
28565 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)28566   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
28567     for (uint32_t n = 8; n <= 12; n += 4) {
28568       for (size_t k = 1; k <= 40; k += 9) {
28569         GemmMicrokernelTester()
28570           .mr(2)
28571           .nr(4)
28572           .kr(2)
28573           .sr(1)
28574           .m(2)
28575           .n(n)
28576           .k(k)
28577           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28578       }
28579     }
28580   }
28581 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)28582   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
28583     for (uint32_t n = 8; n <= 12; n += 4) {
28584       for (size_t k = 1; k <= 40; k += 9) {
28585         GemmMicrokernelTester()
28586           .mr(2)
28587           .nr(4)
28588           .kr(2)
28589           .sr(1)
28590           .m(2)
28591           .n(n)
28592           .k(k)
28593           .cn_stride(7)
28594           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28595       }
28596     }
28597   }
28598 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)28599   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
28600     for (uint32_t n = 8; n <= 12; n += 4) {
28601       for (size_t k = 1; k <= 40; k += 9) {
28602         for (uint32_t m = 1; m <= 2; m++) {
28603           GemmMicrokernelTester()
28604             .mr(2)
28605             .nr(4)
28606             .kr(2)
28607             .sr(1)
28608             .m(m)
28609             .n(n)
28610             .k(k)
28611             .iterations(1)
28612             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28613         }
28614       }
28615     }
28616   }
28617 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)28618   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
28619     for (size_t k = 1; k <= 40; k += 9) {
28620       GemmMicrokernelTester()
28621         .mr(2)
28622         .nr(4)
28623         .kr(2)
28624         .sr(1)
28625         .m(2)
28626         .n(4)
28627         .k(k)
28628         .ks(3)
28629         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28630     }
28631   }
28632 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)28633   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
28634     for (size_t k = 1; k <= 40; k += 9) {
28635       for (uint32_t n = 1; n <= 4; n++) {
28636         for (uint32_t m = 1; m <= 2; m++) {
28637           GemmMicrokernelTester()
28638             .mr(2)
28639             .nr(4)
28640             .kr(2)
28641             .sr(1)
28642             .m(m)
28643             .n(n)
28644             .k(k)
28645             .ks(3)
28646             .iterations(1)
28647             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28648         }
28649       }
28650     }
28651   }
28652 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)28653   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
28654     for (uint32_t n = 5; n < 8; n++) {
28655       for (size_t k = 1; k <= 40; k += 9) {
28656         GemmMicrokernelTester()
28657           .mr(2)
28658           .nr(4)
28659           .kr(2)
28660           .sr(1)
28661           .m(2)
28662           .n(n)
28663           .k(k)
28664           .ks(3)
28665           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28666       }
28667     }
28668   }
28669 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)28670   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
28671     for (uint32_t n = 8; n <= 12; n += 4) {
28672       for (size_t k = 1; k <= 40; k += 9) {
28673         GemmMicrokernelTester()
28674           .mr(2)
28675           .nr(4)
28676           .kr(2)
28677           .sr(1)
28678           .m(2)
28679           .n(n)
28680           .k(k)
28681           .ks(3)
28682           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28683       }
28684     }
28685   }
28686 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)28687   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
28688     for (size_t k = 1; k <= 40; k += 9) {
28689       for (uint32_t n = 1; n <= 4; n++) {
28690         for (uint32_t m = 1; m <= 2; m++) {
28691           GemmMicrokernelTester()
28692             .mr(2)
28693             .nr(4)
28694             .kr(2)
28695             .sr(1)
28696             .m(m)
28697             .n(n)
28698             .k(k)
28699             .cm_stride(7)
28700             .iterations(1)
28701             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28702         }
28703       }
28704     }
28705   }
28706 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,a_offset)28707   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
28708     for (size_t k = 1; k <= 40; k += 9) {
28709       GemmMicrokernelTester()
28710         .mr(2)
28711         .nr(4)
28712         .kr(2)
28713         .sr(1)
28714         .m(2)
28715         .n(4)
28716         .k(k)
28717         .ks(3)
28718         .a_offset(83)
28719         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28720     }
28721   }
28722 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,zero)28723   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, zero) {
28724     for (size_t k = 1; k <= 40; k += 9) {
28725       for (uint32_t mz = 0; mz < 2; mz++) {
28726         GemmMicrokernelTester()
28727           .mr(2)
28728           .nr(4)
28729           .kr(2)
28730           .sr(1)
28731           .m(2)
28732           .n(4)
28733           .k(k)
28734           .ks(3)
28735           .a_offset(83)
28736           .zero_index(mz)
28737           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28738       }
28739     }
28740   }
28741 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)28742   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
28743     GemmMicrokernelTester()
28744       .mr(2)
28745       .nr(4)
28746       .kr(2)
28747       .sr(1)
28748       .m(2)
28749       .n(4)
28750       .k(8)
28751       .qmin(128)
28752       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28753   }
28754 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)28755   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
28756     GemmMicrokernelTester()
28757       .mr(2)
28758       .nr(4)
28759       .kr(2)
28760       .sr(1)
28761       .m(2)
28762       .n(4)
28763       .k(8)
28764       .qmax(128)
28765       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28766   }
28767 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)28768   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
28769     GemmMicrokernelTester()
28770       .mr(2)
28771       .nr(4)
28772       .kr(2)
28773       .sr(1)
28774       .m(2)
28775       .n(4)
28776       .k(8)
28777       .cm_stride(7)
28778       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28779   }
28780 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)28781   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
28782     for (size_t k = 1; k <= 40; k += 9) {
28783       GemmMicrokernelTester()
28784         .mr(2)
28785         .nr(4)
28786         .kr(2)
28787         .sr(1)
28788         .m(2)
28789         .n(4)
28790         .k(k)
28791         .a_zero_point(0)
28792         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28793     }
28794   }
28795 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)28796   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
28797     for (size_t k = 1; k <= 40; k += 9) {
28798       GemmMicrokernelTester()
28799         .mr(2)
28800         .nr(4)
28801         .kr(2)
28802         .sr(1)
28803         .m(2)
28804         .n(4)
28805         .k(k)
28806         .b_zero_point(0)
28807         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28808     }
28809   }
28810 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)28811   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
28812     for (size_t k = 1; k <= 40; k += 9) {
28813       GemmMicrokernelTester()
28814         .mr(2)
28815         .nr(4)
28816         .kr(2)
28817         .sr(1)
28818         .m(2)
28819         .n(4)
28820         .k(k)
28821         .a_zero_point(0)
28822         .b_zero_point(0)
28823         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28824     }
28825   }
28826 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28827 
28828 
28829 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)28830   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
28831     GemmMicrokernelTester()
28832       .mr(2)
28833       .nr(4)
28834       .kr(2)
28835       .sr(1)
28836       .m(2)
28837       .n(4)
28838       .k(8)
28839       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28840   }
28841 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)28842   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
28843     GemmMicrokernelTester()
28844       .mr(2)
28845       .nr(4)
28846       .kr(2)
28847       .sr(1)
28848       .m(2)
28849       .n(4)
28850       .k(8)
28851       .cn_stride(7)
28852       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28853   }
28854 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)28855   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
28856     for (uint32_t n = 1; n <= 4; n++) {
28857       for (uint32_t m = 1; m <= 2; m++) {
28858         GemmMicrokernelTester()
28859           .mr(2)
28860           .nr(4)
28861           .kr(2)
28862           .sr(1)
28863           .m(m)
28864           .n(n)
28865           .k(8)
28866           .iterations(1)
28867           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28868       }
28869     }
28870   }
28871 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)28872   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
28873     for (uint32_t m = 1; m <= 2; m++) {
28874       GemmMicrokernelTester()
28875         .mr(2)
28876         .nr(4)
28877         .kr(2)
28878         .sr(1)
28879         .m(m)
28880         .n(4)
28881         .k(8)
28882         .iterations(1)
28883         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28884     }
28885   }
28886 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)28887   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
28888     for (uint32_t n = 1; n <= 4; n++) {
28889       GemmMicrokernelTester()
28890         .mr(2)
28891         .nr(4)
28892         .kr(2)
28893         .sr(1)
28894         .m(2)
28895         .n(n)
28896         .k(8)
28897         .iterations(1)
28898         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28899     }
28900   }
28901 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)28902   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
28903     for (size_t k = 1; k < 8; k++) {
28904       GemmMicrokernelTester()
28905         .mr(2)
28906         .nr(4)
28907         .kr(2)
28908         .sr(1)
28909         .m(2)
28910         .n(4)
28911         .k(k)
28912         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28913     }
28914   }
28915 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)28916   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
28917     for (size_t k = 1; k < 8; k++) {
28918       for (uint32_t n = 1; n <= 4; n++) {
28919         for (uint32_t m = 1; m <= 2; m++) {
28920           GemmMicrokernelTester()
28921             .mr(2)
28922             .nr(4)
28923             .kr(2)
28924             .sr(1)
28925             .m(m)
28926             .n(n)
28927             .k(k)
28928             .iterations(1)
28929             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28930         }
28931       }
28932     }
28933   }
28934 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)28935   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
28936     for (size_t k = 9; k < 16; k++) {
28937       GemmMicrokernelTester()
28938         .mr(2)
28939         .nr(4)
28940         .kr(2)
28941         .sr(1)
28942         .m(2)
28943         .n(4)
28944         .k(k)
28945         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28946     }
28947   }
28948 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)28949   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
28950     for (size_t k = 9; k < 16; k++) {
28951       for (uint32_t n = 1; n <= 4; n++) {
28952         for (uint32_t m = 1; m <= 2; m++) {
28953           GemmMicrokernelTester()
28954             .mr(2)
28955             .nr(4)
28956             .kr(2)
28957             .sr(1)
28958             .m(m)
28959             .n(n)
28960             .k(k)
28961             .iterations(1)
28962             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28963         }
28964       }
28965     }
28966   }
28967 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)28968   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
28969     for (size_t k = 16; k <= 80; k += 8) {
28970       GemmMicrokernelTester()
28971         .mr(2)
28972         .nr(4)
28973         .kr(2)
28974         .sr(1)
28975         .m(2)
28976         .n(4)
28977         .k(k)
28978         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28979     }
28980   }
28981 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)28982   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
28983     for (size_t k = 16; k <= 80; k += 8) {
28984       for (uint32_t n = 1; n <= 4; n++) {
28985         for (uint32_t m = 1; m <= 2; m++) {
28986           GemmMicrokernelTester()
28987             .mr(2)
28988             .nr(4)
28989             .kr(2)
28990             .sr(1)
28991             .m(m)
28992             .n(n)
28993             .k(k)
28994             .iterations(1)
28995             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
28996         }
28997       }
28998     }
28999   }
29000 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29001   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29002     for (uint32_t n = 5; n < 8; n++) {
29003       for (size_t k = 1; k <= 40; k += 9) {
29004         GemmMicrokernelTester()
29005           .mr(2)
29006           .nr(4)
29007           .kr(2)
29008           .sr(1)
29009           .m(2)
29010           .n(n)
29011           .k(k)
29012           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29013       }
29014     }
29015   }
29016 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29017   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29018     for (uint32_t n = 5; n < 8; n++) {
29019       for (size_t k = 1; k <= 40; k += 9) {
29020         GemmMicrokernelTester()
29021           .mr(2)
29022           .nr(4)
29023           .kr(2)
29024           .sr(1)
29025           .m(2)
29026           .n(n)
29027           .k(k)
29028           .cn_stride(7)
29029           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29030       }
29031     }
29032   }
29033 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29034   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29035     for (uint32_t n = 5; n < 8; n++) {
29036       for (size_t k = 1; k <= 40; k += 9) {
29037         for (uint32_t m = 1; m <= 2; m++) {
29038           GemmMicrokernelTester()
29039             .mr(2)
29040             .nr(4)
29041             .kr(2)
29042             .sr(1)
29043             .m(m)
29044             .n(n)
29045             .k(k)
29046             .iterations(1)
29047             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29048         }
29049       }
29050     }
29051   }
29052 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)29053   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
29054     for (uint32_t n = 8; n <= 12; n += 4) {
29055       for (size_t k = 1; k <= 40; k += 9) {
29056         GemmMicrokernelTester()
29057           .mr(2)
29058           .nr(4)
29059           .kr(2)
29060           .sr(1)
29061           .m(2)
29062           .n(n)
29063           .k(k)
29064           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29065       }
29066     }
29067   }
29068 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29069   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29070     for (uint32_t n = 8; n <= 12; n += 4) {
29071       for (size_t k = 1; k <= 40; k += 9) {
29072         GemmMicrokernelTester()
29073           .mr(2)
29074           .nr(4)
29075           .kr(2)
29076           .sr(1)
29077           .m(2)
29078           .n(n)
29079           .k(k)
29080           .cn_stride(7)
29081           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29082       }
29083     }
29084   }
29085 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29086   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29087     for (uint32_t n = 8; n <= 12; n += 4) {
29088       for (size_t k = 1; k <= 40; k += 9) {
29089         for (uint32_t m = 1; m <= 2; m++) {
29090           GemmMicrokernelTester()
29091             .mr(2)
29092             .nr(4)
29093             .kr(2)
29094             .sr(1)
29095             .m(m)
29096             .n(n)
29097             .k(k)
29098             .iterations(1)
29099             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29100         }
29101       }
29102     }
29103   }
29104 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)29105   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
29106     for (size_t k = 1; k <= 40; k += 9) {
29107       GemmMicrokernelTester()
29108         .mr(2)
29109         .nr(4)
29110         .kr(2)
29111         .sr(1)
29112         .m(2)
29113         .n(4)
29114         .k(k)
29115         .ks(3)
29116         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29117     }
29118   }
29119 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)29120   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
29121     for (size_t k = 1; k <= 40; k += 9) {
29122       for (uint32_t n = 1; n <= 4; n++) {
29123         for (uint32_t m = 1; m <= 2; m++) {
29124           GemmMicrokernelTester()
29125             .mr(2)
29126             .nr(4)
29127             .kr(2)
29128             .sr(1)
29129             .m(m)
29130             .n(n)
29131             .k(k)
29132             .ks(3)
29133             .iterations(1)
29134             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29135         }
29136       }
29137     }
29138   }
29139 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)29140   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
29141     for (uint32_t n = 5; n < 8; n++) {
29142       for (size_t k = 1; k <= 40; k += 9) {
29143         GemmMicrokernelTester()
29144           .mr(2)
29145           .nr(4)
29146           .kr(2)
29147           .sr(1)
29148           .m(2)
29149           .n(n)
29150           .k(k)
29151           .ks(3)
29152           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29153       }
29154     }
29155   }
29156 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)29157   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
29158     for (uint32_t n = 8; n <= 12; n += 4) {
29159       for (size_t k = 1; k <= 40; k += 9) {
29160         GemmMicrokernelTester()
29161           .mr(2)
29162           .nr(4)
29163           .kr(2)
29164           .sr(1)
29165           .m(2)
29166           .n(n)
29167           .k(k)
29168           .ks(3)
29169           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29170       }
29171     }
29172   }
29173 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)29174   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
29175     for (size_t k = 1; k <= 40; k += 9) {
29176       for (uint32_t n = 1; n <= 4; n++) {
29177         for (uint32_t m = 1; m <= 2; m++) {
29178           GemmMicrokernelTester()
29179             .mr(2)
29180             .nr(4)
29181             .kr(2)
29182             .sr(1)
29183             .m(m)
29184             .n(n)
29185             .k(k)
29186             .cm_stride(7)
29187             .iterations(1)
29188             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29189         }
29190       }
29191     }
29192   }
29193 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)29194   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
29195     for (size_t k = 1; k <= 40; k += 9) {
29196       GemmMicrokernelTester()
29197         .mr(2)
29198         .nr(4)
29199         .kr(2)
29200         .sr(1)
29201         .m(2)
29202         .n(4)
29203         .k(k)
29204         .ks(3)
29205         .a_offset(83)
29206         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29207     }
29208   }
29209 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)29210   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
29211     for (size_t k = 1; k <= 40; k += 9) {
29212       for (uint32_t mz = 0; mz < 2; mz++) {
29213         GemmMicrokernelTester()
29214           .mr(2)
29215           .nr(4)
29216           .kr(2)
29217           .sr(1)
29218           .m(2)
29219           .n(4)
29220           .k(k)
29221           .ks(3)
29222           .a_offset(83)
29223           .zero_index(mz)
29224           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29225       }
29226     }
29227   }
29228 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)29229   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
29230     GemmMicrokernelTester()
29231       .mr(2)
29232       .nr(4)
29233       .kr(2)
29234       .sr(1)
29235       .m(2)
29236       .n(4)
29237       .k(8)
29238       .qmin(128)
29239       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29240   }
29241 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)29242   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
29243     GemmMicrokernelTester()
29244       .mr(2)
29245       .nr(4)
29246       .kr(2)
29247       .sr(1)
29248       .m(2)
29249       .n(4)
29250       .k(8)
29251       .qmax(128)
29252       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29253   }
29254 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)29255   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
29256     GemmMicrokernelTester()
29257       .mr(2)
29258       .nr(4)
29259       .kr(2)
29260       .sr(1)
29261       .m(2)
29262       .n(4)
29263       .k(8)
29264       .cm_stride(7)
29265       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29266   }
29267 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)29268   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
29269     for (size_t k = 1; k <= 40; k += 9) {
29270       GemmMicrokernelTester()
29271         .mr(2)
29272         .nr(4)
29273         .kr(2)
29274         .sr(1)
29275         .m(2)
29276         .n(4)
29277         .k(k)
29278         .a_zero_point(0)
29279         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29280     }
29281   }
29282 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)29283   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
29284     for (size_t k = 1; k <= 40; k += 9) {
29285       GemmMicrokernelTester()
29286         .mr(2)
29287         .nr(4)
29288         .kr(2)
29289         .sr(1)
29290         .m(2)
29291         .n(4)
29292         .k(k)
29293         .b_zero_point(0)
29294         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29295     }
29296   }
29297 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)29298   TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
29299     for (size_t k = 1; k <= 40; k += 9) {
29300       GemmMicrokernelTester()
29301         .mr(2)
29302         .nr(4)
29303         .kr(2)
29304         .sr(1)
29305         .m(2)
29306         .n(4)
29307         .k(k)
29308         .a_zero_point(0)
29309         .b_zero_point(0)
29310         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29311     }
29312   }
29313 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29314 
29315 
29316 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)29317   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29318     GemmMicrokernelTester()
29319       .mr(2)
29320       .nr(4)
29321       .kr(8)
29322       .sr(1)
29323       .m(2)
29324       .n(4)
29325       .k(8)
29326       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29327   }
29328 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)29329   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
29330     GemmMicrokernelTester()
29331       .mr(2)
29332       .nr(4)
29333       .kr(8)
29334       .sr(1)
29335       .m(2)
29336       .n(4)
29337       .k(8)
29338       .cn_stride(7)
29339       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29340   }
29341 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29342   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29343     for (uint32_t n = 1; n <= 4; n++) {
29344       for (uint32_t m = 1; m <= 2; m++) {
29345         GemmMicrokernelTester()
29346           .mr(2)
29347           .nr(4)
29348           .kr(8)
29349           .sr(1)
29350           .m(m)
29351           .n(n)
29352           .k(8)
29353           .iterations(1)
29354           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29355       }
29356     }
29357   }
29358 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29359   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29360     for (uint32_t m = 1; m <= 2; m++) {
29361       GemmMicrokernelTester()
29362         .mr(2)
29363         .nr(4)
29364         .kr(8)
29365         .sr(1)
29366         .m(m)
29367         .n(4)
29368         .k(8)
29369         .iterations(1)
29370         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29371     }
29372   }
29373 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29374   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29375     for (uint32_t n = 1; n <= 4; n++) {
29376       GemmMicrokernelTester()
29377         .mr(2)
29378         .nr(4)
29379         .kr(8)
29380         .sr(1)
29381         .m(2)
29382         .n(n)
29383         .k(8)
29384         .iterations(1)
29385         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29386     }
29387   }
29388 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)29389   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29390     for (size_t k = 1; k < 8; k++) {
29391       GemmMicrokernelTester()
29392         .mr(2)
29393         .nr(4)
29394         .kr(8)
29395         .sr(1)
29396         .m(2)
29397         .n(4)
29398         .k(k)
29399         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29400     }
29401   }
29402 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29403   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29404     for (size_t k = 1; k < 8; k++) {
29405       for (uint32_t n = 1; n <= 4; n++) {
29406         for (uint32_t m = 1; m <= 2; m++) {
29407           GemmMicrokernelTester()
29408             .mr(2)
29409             .nr(4)
29410             .kr(8)
29411             .sr(1)
29412             .m(m)
29413             .n(n)
29414             .k(k)
29415             .iterations(1)
29416             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29417         }
29418       }
29419     }
29420   }
29421 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)29422   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29423     for (size_t k = 9; k < 16; k++) {
29424       GemmMicrokernelTester()
29425         .mr(2)
29426         .nr(4)
29427         .kr(8)
29428         .sr(1)
29429         .m(2)
29430         .n(4)
29431         .k(k)
29432         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29433     }
29434   }
29435 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29436   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29437     for (size_t k = 9; k < 16; k++) {
29438       for (uint32_t n = 1; n <= 4; n++) {
29439         for (uint32_t m = 1; m <= 2; m++) {
29440           GemmMicrokernelTester()
29441             .mr(2)
29442             .nr(4)
29443             .kr(8)
29444             .sr(1)
29445             .m(m)
29446             .n(n)
29447             .k(k)
29448             .iterations(1)
29449             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29450         }
29451       }
29452     }
29453   }
29454 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)29455   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
29456     for (size_t k = 16; k <= 80; k += 8) {
29457       GemmMicrokernelTester()
29458         .mr(2)
29459         .nr(4)
29460         .kr(8)
29461         .sr(1)
29462         .m(2)
29463         .n(4)
29464         .k(k)
29465         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29466     }
29467   }
29468 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29469   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29470     for (size_t k = 16; k <= 80; k += 8) {
29471       for (uint32_t n = 1; n <= 4; n++) {
29472         for (uint32_t m = 1; m <= 2; m++) {
29473           GemmMicrokernelTester()
29474             .mr(2)
29475             .nr(4)
29476             .kr(8)
29477             .sr(1)
29478             .m(m)
29479             .n(n)
29480             .k(k)
29481             .iterations(1)
29482             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29483         }
29484       }
29485     }
29486   }
29487 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)29488   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29489     for (uint32_t n = 5; n < 8; n++) {
29490       for (size_t k = 1; k <= 40; k += 9) {
29491         GemmMicrokernelTester()
29492           .mr(2)
29493           .nr(4)
29494           .kr(8)
29495           .sr(1)
29496           .m(2)
29497           .n(n)
29498           .k(k)
29499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29500       }
29501     }
29502   }
29503 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29504   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29505     for (uint32_t n = 5; n < 8; n++) {
29506       for (size_t k = 1; k <= 40; k += 9) {
29507         GemmMicrokernelTester()
29508           .mr(2)
29509           .nr(4)
29510           .kr(8)
29511           .sr(1)
29512           .m(2)
29513           .n(n)
29514           .k(k)
29515           .cn_stride(7)
29516           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29517       }
29518     }
29519   }
29520 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)29521   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
29522     for (uint32_t n = 5; n < 8; n++) {
29523       for (size_t k = 1; k <= 40; k += 9) {
29524         for (uint32_t m = 1; m <= 2; m++) {
29525           GemmMicrokernelTester()
29526             .mr(2)
29527             .nr(4)
29528             .kr(8)
29529             .sr(1)
29530             .m(m)
29531             .n(n)
29532             .k(k)
29533             .iterations(1)
29534             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29535         }
29536       }
29537     }
29538   }
29539 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)29540   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
29541     for (uint32_t n = 8; n <= 12; n += 4) {
29542       for (size_t k = 1; k <= 40; k += 9) {
29543         GemmMicrokernelTester()
29544           .mr(2)
29545           .nr(4)
29546           .kr(8)
29547           .sr(1)
29548           .m(2)
29549           .n(n)
29550           .k(k)
29551           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29552       }
29553     }
29554   }
29555 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)29556   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
29557     for (uint32_t n = 8; n <= 12; n += 4) {
29558       for (size_t k = 1; k <= 40; k += 9) {
29559         GemmMicrokernelTester()
29560           .mr(2)
29561           .nr(4)
29562           .kr(8)
29563           .sr(1)
29564           .m(2)
29565           .n(n)
29566           .k(k)
29567           .cn_stride(7)
29568           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29569       }
29570     }
29571   }
29572 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)29573   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
29574     for (uint32_t n = 8; n <= 12; n += 4) {
29575       for (size_t k = 1; k <= 40; k += 9) {
29576         for (uint32_t m = 1; m <= 2; m++) {
29577           GemmMicrokernelTester()
29578             .mr(2)
29579             .nr(4)
29580             .kr(8)
29581             .sr(1)
29582             .m(m)
29583             .n(n)
29584             .k(k)
29585             .iterations(1)
29586             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29587         }
29588       }
29589     }
29590   }
29591 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)29592   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
29593     for (size_t k = 1; k <= 40; k += 9) {
29594       GemmMicrokernelTester()
29595         .mr(2)
29596         .nr(4)
29597         .kr(8)
29598         .sr(1)
29599         .m(2)
29600         .n(4)
29601         .k(k)
29602         .ks(3)
29603         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29604     }
29605   }
29606 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)29607   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
29608     for (size_t k = 1; k <= 40; k += 9) {
29609       for (uint32_t n = 1; n <= 4; n++) {
29610         for (uint32_t m = 1; m <= 2; m++) {
29611           GemmMicrokernelTester()
29612             .mr(2)
29613             .nr(4)
29614             .kr(8)
29615             .sr(1)
29616             .m(m)
29617             .n(n)
29618             .k(k)
29619             .ks(3)
29620             .iterations(1)
29621             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29622         }
29623       }
29624     }
29625   }
29626 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)29627   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
29628     for (uint32_t n = 5; n < 8; n++) {
29629       for (size_t k = 1; k <= 40; k += 9) {
29630         GemmMicrokernelTester()
29631           .mr(2)
29632           .nr(4)
29633           .kr(8)
29634           .sr(1)
29635           .m(2)
29636           .n(n)
29637           .k(k)
29638           .ks(3)
29639           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29640       }
29641     }
29642   }
29643 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)29644   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
29645     for (uint32_t n = 8; n <= 12; n += 4) {
29646       for (size_t k = 1; k <= 40; k += 9) {
29647         GemmMicrokernelTester()
29648           .mr(2)
29649           .nr(4)
29650           .kr(8)
29651           .sr(1)
29652           .m(2)
29653           .n(n)
29654           .k(k)
29655           .ks(3)
29656           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29657       }
29658     }
29659   }
29660 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)29661   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
29662     for (size_t k = 1; k <= 40; k += 9) {
29663       for (uint32_t n = 1; n <= 4; n++) {
29664         for (uint32_t m = 1; m <= 2; m++) {
29665           GemmMicrokernelTester()
29666             .mr(2)
29667             .nr(4)
29668             .kr(8)
29669             .sr(1)
29670             .m(m)
29671             .n(n)
29672             .k(k)
29673             .cm_stride(7)
29674             .iterations(1)
29675             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29676         }
29677       }
29678     }
29679   }
29680 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,a_offset)29681   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
29682     for (size_t k = 1; k <= 40; k += 9) {
29683       GemmMicrokernelTester()
29684         .mr(2)
29685         .nr(4)
29686         .kr(8)
29687         .sr(1)
29688         .m(2)
29689         .n(4)
29690         .k(k)
29691         .ks(3)
29692         .a_offset(83)
29693         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29694     }
29695   }
29696 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,zero)29697   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
29698     for (size_t k = 1; k <= 40; k += 9) {
29699       for (uint32_t mz = 0; mz < 2; mz++) {
29700         GemmMicrokernelTester()
29701           .mr(2)
29702           .nr(4)
29703           .kr(8)
29704           .sr(1)
29705           .m(2)
29706           .n(4)
29707           .k(k)
29708           .ks(3)
29709           .a_offset(83)
29710           .zero_index(mz)
29711           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29712       }
29713     }
29714   }
29715 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)29716   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
29717     GemmMicrokernelTester()
29718       .mr(2)
29719       .nr(4)
29720       .kr(8)
29721       .sr(1)
29722       .m(2)
29723       .n(4)
29724       .k(8)
29725       .qmin(128)
29726       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29727   }
29728 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)29729   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
29730     GemmMicrokernelTester()
29731       .mr(2)
29732       .nr(4)
29733       .kr(8)
29734       .sr(1)
29735       .m(2)
29736       .n(4)
29737       .k(8)
29738       .qmax(128)
29739       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29740   }
29741 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)29742   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
29743     GemmMicrokernelTester()
29744       .mr(2)
29745       .nr(4)
29746       .kr(8)
29747       .sr(1)
29748       .m(2)
29749       .n(4)
29750       .k(8)
29751       .cm_stride(7)
29752       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29753   }
29754 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)29755   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
29756     for (size_t k = 1; k <= 40; k += 9) {
29757       GemmMicrokernelTester()
29758         .mr(2)
29759         .nr(4)
29760         .kr(8)
29761         .sr(1)
29762         .m(2)
29763         .n(4)
29764         .k(k)
29765         .a_zero_point(0)
29766         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29767     }
29768   }
29769 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)29770   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
29771     for (size_t k = 1; k <= 40; k += 9) {
29772       GemmMicrokernelTester()
29773         .mr(2)
29774         .nr(4)
29775         .kr(8)
29776         .sr(1)
29777         .m(2)
29778         .n(4)
29779         .k(k)
29780         .b_zero_point(0)
29781         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29782     }
29783   }
29784 
TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)29785   TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
29786     for (size_t k = 1; k <= 40; k += 9) {
29787       GemmMicrokernelTester()
29788         .mr(2)
29789         .nr(4)
29790         .kr(8)
29791         .sr(1)
29792         .m(2)
29793         .n(4)
29794         .k(k)
29795         .a_zero_point(0)
29796         .b_zero_point(0)
29797         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29798     }
29799   }
29800 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29801 
29802 
29803 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)29804   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29805     GemmMicrokernelTester()
29806       .mr(3)
29807       .nr(4)
29808       .kr(2)
29809       .sr(1)
29810       .m(3)
29811       .n(4)
29812       .k(8)
29813       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29814   }
29815 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)29816   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
29817     GemmMicrokernelTester()
29818       .mr(3)
29819       .nr(4)
29820       .kr(2)
29821       .sr(1)
29822       .m(3)
29823       .n(4)
29824       .k(8)
29825       .cn_stride(7)
29826       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29827   }
29828 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)29829   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
29830     for (uint32_t n = 1; n <= 4; n++) {
29831       for (uint32_t m = 1; m <= 3; m++) {
29832         GemmMicrokernelTester()
29833           .mr(3)
29834           .nr(4)
29835           .kr(2)
29836           .sr(1)
29837           .m(m)
29838           .n(n)
29839           .k(8)
29840           .iterations(1)
29841           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29842       }
29843     }
29844   }
29845 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)29846   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
29847     for (uint32_t m = 1; m <= 3; m++) {
29848       GemmMicrokernelTester()
29849         .mr(3)
29850         .nr(4)
29851         .kr(2)
29852         .sr(1)
29853         .m(m)
29854         .n(4)
29855         .k(8)
29856         .iterations(1)
29857         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29858     }
29859   }
29860 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)29861   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
29862     for (uint32_t n = 1; n <= 4; n++) {
29863       GemmMicrokernelTester()
29864         .mr(3)
29865         .nr(4)
29866         .kr(2)
29867         .sr(1)
29868         .m(3)
29869         .n(n)
29870         .k(8)
29871         .iterations(1)
29872         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29873     }
29874   }
29875 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)29876   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
29877     for (size_t k = 1; k < 8; k++) {
29878       GemmMicrokernelTester()
29879         .mr(3)
29880         .nr(4)
29881         .kr(2)
29882         .sr(1)
29883         .m(3)
29884         .n(4)
29885         .k(k)
29886         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29887     }
29888   }
29889 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)29890   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
29891     for (size_t k = 1; k < 8; k++) {
29892       for (uint32_t n = 1; n <= 4; n++) {
29893         for (uint32_t m = 1; m <= 3; m++) {
29894           GemmMicrokernelTester()
29895             .mr(3)
29896             .nr(4)
29897             .kr(2)
29898             .sr(1)
29899             .m(m)
29900             .n(n)
29901             .k(k)
29902             .iterations(1)
29903             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29904         }
29905       }
29906     }
29907   }
29908 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)29909   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
29910     for (size_t k = 9; k < 16; k++) {
29911       GemmMicrokernelTester()
29912         .mr(3)
29913         .nr(4)
29914         .kr(2)
29915         .sr(1)
29916         .m(3)
29917         .n(4)
29918         .k(k)
29919         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29920     }
29921   }
29922 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)29923   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
29924     for (size_t k = 9; k < 16; k++) {
29925       for (uint32_t n = 1; n <= 4; n++) {
29926         for (uint32_t m = 1; m <= 3; m++) {
29927           GemmMicrokernelTester()
29928             .mr(3)
29929             .nr(4)
29930             .kr(2)
29931             .sr(1)
29932             .m(m)
29933             .n(n)
29934             .k(k)
29935             .iterations(1)
29936             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29937         }
29938       }
29939     }
29940   }
29941 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)29942   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
29943     for (size_t k = 16; k <= 80; k += 8) {
29944       GemmMicrokernelTester()
29945         .mr(3)
29946         .nr(4)
29947         .kr(2)
29948         .sr(1)
29949         .m(3)
29950         .n(4)
29951         .k(k)
29952         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29953     }
29954   }
29955 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)29956   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
29957     for (size_t k = 16; k <= 80; k += 8) {
29958       for (uint32_t n = 1; n <= 4; n++) {
29959         for (uint32_t m = 1; m <= 3; m++) {
29960           GemmMicrokernelTester()
29961             .mr(3)
29962             .nr(4)
29963             .kr(2)
29964             .sr(1)
29965             .m(m)
29966             .n(n)
29967             .k(k)
29968             .iterations(1)
29969             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29970         }
29971       }
29972     }
29973   }
29974 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)29975   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
29976     for (uint32_t n = 5; n < 8; n++) {
29977       for (size_t k = 1; k <= 40; k += 9) {
29978         GemmMicrokernelTester()
29979           .mr(3)
29980           .nr(4)
29981           .kr(2)
29982           .sr(1)
29983           .m(3)
29984           .n(n)
29985           .k(k)
29986           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
29987       }
29988     }
29989   }
29990 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)29991   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
29992     for (uint32_t n = 5; n < 8; n++) {
29993       for (size_t k = 1; k <= 40; k += 9) {
29994         GemmMicrokernelTester()
29995           .mr(3)
29996           .nr(4)
29997           .kr(2)
29998           .sr(1)
29999           .m(3)
30000           .n(n)
30001           .k(k)
30002           .cn_stride(7)
30003           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30004       }
30005     }
30006   }
30007 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30008   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30009     for (uint32_t n = 5; n < 8; n++) {
30010       for (size_t k = 1; k <= 40; k += 9) {
30011         for (uint32_t m = 1; m <= 3; m++) {
30012           GemmMicrokernelTester()
30013             .mr(3)
30014             .nr(4)
30015             .kr(2)
30016             .sr(1)
30017             .m(m)
30018             .n(n)
30019             .k(k)
30020             .iterations(1)
30021             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30022         }
30023       }
30024     }
30025   }
30026 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)30027   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
30028     for (uint32_t n = 8; n <= 12; n += 4) {
30029       for (size_t k = 1; k <= 40; k += 9) {
30030         GemmMicrokernelTester()
30031           .mr(3)
30032           .nr(4)
30033           .kr(2)
30034           .sr(1)
30035           .m(3)
30036           .n(n)
30037           .k(k)
30038           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30039       }
30040     }
30041   }
30042 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30043   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30044     for (uint32_t n = 8; n <= 12; n += 4) {
30045       for (size_t k = 1; k <= 40; k += 9) {
30046         GemmMicrokernelTester()
30047           .mr(3)
30048           .nr(4)
30049           .kr(2)
30050           .sr(1)
30051           .m(3)
30052           .n(n)
30053           .k(k)
30054           .cn_stride(7)
30055           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30056       }
30057     }
30058   }
30059 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30060   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30061     for (uint32_t n = 8; n <= 12; n += 4) {
30062       for (size_t k = 1; k <= 40; k += 9) {
30063         for (uint32_t m = 1; m <= 3; m++) {
30064           GemmMicrokernelTester()
30065             .mr(3)
30066             .nr(4)
30067             .kr(2)
30068             .sr(1)
30069             .m(m)
30070             .n(n)
30071             .k(k)
30072             .iterations(1)
30073             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30074         }
30075       }
30076     }
30077   }
30078 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)30079   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
30080     for (size_t k = 1; k <= 40; k += 9) {
30081       GemmMicrokernelTester()
30082         .mr(3)
30083         .nr(4)
30084         .kr(2)
30085         .sr(1)
30086         .m(3)
30087         .n(4)
30088         .k(k)
30089         .ks(3)
30090         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30091     }
30092   }
30093 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30094   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30095     for (size_t k = 1; k <= 40; k += 9) {
30096       for (uint32_t n = 1; n <= 4; n++) {
30097         for (uint32_t m = 1; m <= 3; m++) {
30098           GemmMicrokernelTester()
30099             .mr(3)
30100             .nr(4)
30101             .kr(2)
30102             .sr(1)
30103             .m(m)
30104             .n(n)
30105             .k(k)
30106             .ks(3)
30107             .iterations(1)
30108             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30109         }
30110       }
30111     }
30112   }
30113 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30114   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30115     for (uint32_t n = 5; n < 8; n++) {
30116       for (size_t k = 1; k <= 40; k += 9) {
30117         GemmMicrokernelTester()
30118           .mr(3)
30119           .nr(4)
30120           .kr(2)
30121           .sr(1)
30122           .m(3)
30123           .n(n)
30124           .k(k)
30125           .ks(3)
30126           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30127       }
30128     }
30129   }
30130 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30131   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30132     for (uint32_t n = 8; n <= 12; n += 4) {
30133       for (size_t k = 1; k <= 40; k += 9) {
30134         GemmMicrokernelTester()
30135           .mr(3)
30136           .nr(4)
30137           .kr(2)
30138           .sr(1)
30139           .m(3)
30140           .n(n)
30141           .k(k)
30142           .ks(3)
30143           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30144       }
30145     }
30146   }
30147 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30148   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30149     for (size_t k = 1; k <= 40; k += 9) {
30150       for (uint32_t n = 1; n <= 4; n++) {
30151         for (uint32_t m = 1; m <= 3; m++) {
30152           GemmMicrokernelTester()
30153             .mr(3)
30154             .nr(4)
30155             .kr(2)
30156             .sr(1)
30157             .m(m)
30158             .n(n)
30159             .k(k)
30160             .cm_stride(7)
30161             .iterations(1)
30162             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30163         }
30164       }
30165     }
30166   }
30167 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,a_offset)30168   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
30169     for (size_t k = 1; k <= 40; k += 9) {
30170       GemmMicrokernelTester()
30171         .mr(3)
30172         .nr(4)
30173         .kr(2)
30174         .sr(1)
30175         .m(3)
30176         .n(4)
30177         .k(k)
30178         .ks(3)
30179         .a_offset(127)
30180         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30181     }
30182   }
30183 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,zero)30184   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
30185     for (size_t k = 1; k <= 40; k += 9) {
30186       for (uint32_t mz = 0; mz < 3; mz++) {
30187         GemmMicrokernelTester()
30188           .mr(3)
30189           .nr(4)
30190           .kr(2)
30191           .sr(1)
30192           .m(3)
30193           .n(4)
30194           .k(k)
30195           .ks(3)
30196           .a_offset(127)
30197           .zero_index(mz)
30198           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30199       }
30200     }
30201   }
30202 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)30203   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
30204     GemmMicrokernelTester()
30205       .mr(3)
30206       .nr(4)
30207       .kr(2)
30208       .sr(1)
30209       .m(3)
30210       .n(4)
30211       .k(8)
30212       .qmin(128)
30213       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30214   }
30215 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)30216   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
30217     GemmMicrokernelTester()
30218       .mr(3)
30219       .nr(4)
30220       .kr(2)
30221       .sr(1)
30222       .m(3)
30223       .n(4)
30224       .k(8)
30225       .qmax(128)
30226       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30227   }
30228 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)30229   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
30230     GemmMicrokernelTester()
30231       .mr(3)
30232       .nr(4)
30233       .kr(2)
30234       .sr(1)
30235       .m(3)
30236       .n(4)
30237       .k(8)
30238       .cm_stride(7)
30239       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30240   }
30241 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30242   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30243     for (size_t k = 1; k <= 40; k += 9) {
30244       GemmMicrokernelTester()
30245         .mr(3)
30246         .nr(4)
30247         .kr(2)
30248         .sr(1)
30249         .m(3)
30250         .n(4)
30251         .k(k)
30252         .a_zero_point(0)
30253         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30254     }
30255   }
30256 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30257   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30258     for (size_t k = 1; k <= 40; k += 9) {
30259       GemmMicrokernelTester()
30260         .mr(3)
30261         .nr(4)
30262         .kr(2)
30263         .sr(1)
30264         .m(3)
30265         .n(4)
30266         .k(k)
30267         .b_zero_point(0)
30268         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30269     }
30270   }
30271 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,no_zero_point)30272   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30273     for (size_t k = 1; k <= 40; k += 9) {
30274       GemmMicrokernelTester()
30275         .mr(3)
30276         .nr(4)
30277         .kr(2)
30278         .sr(1)
30279         .m(3)
30280         .n(4)
30281         .k(k)
30282         .a_zero_point(0)
30283         .b_zero_point(0)
30284         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30285     }
30286   }
30287 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30288 
30289 
30290 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)30291   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
30292     GemmMicrokernelTester()
30293       .mr(3)
30294       .nr(4)
30295       .kr(2)
30296       .sr(4)
30297       .m(3)
30298       .n(4)
30299       .k(8)
30300       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30301   }
30302 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)30303   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
30304     GemmMicrokernelTester()
30305       .mr(3)
30306       .nr(4)
30307       .kr(2)
30308       .sr(4)
30309       .m(3)
30310       .n(4)
30311       .k(8)
30312       .cn_stride(7)
30313       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30314   }
30315 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30316   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30317     for (uint32_t n = 1; n <= 4; n++) {
30318       for (uint32_t m = 1; m <= 3; m++) {
30319         GemmMicrokernelTester()
30320           .mr(3)
30321           .nr(4)
30322           .kr(2)
30323           .sr(4)
30324           .m(m)
30325           .n(n)
30326           .k(8)
30327           .iterations(1)
30328           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30329       }
30330     }
30331   }
30332 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30333   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30334     for (uint32_t m = 1; m <= 3; m++) {
30335       GemmMicrokernelTester()
30336         .mr(3)
30337         .nr(4)
30338         .kr(2)
30339         .sr(4)
30340         .m(m)
30341         .n(4)
30342         .k(8)
30343         .iterations(1)
30344         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30345     }
30346   }
30347 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30348   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30349     for (uint32_t n = 1; n <= 4; n++) {
30350       GemmMicrokernelTester()
30351         .mr(3)
30352         .nr(4)
30353         .kr(2)
30354         .sr(4)
30355         .m(3)
30356         .n(n)
30357         .k(8)
30358         .iterations(1)
30359         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30360     }
30361   }
30362 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)30363   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30364     for (size_t k = 1; k < 8; k++) {
30365       GemmMicrokernelTester()
30366         .mr(3)
30367         .nr(4)
30368         .kr(2)
30369         .sr(4)
30370         .m(3)
30371         .n(4)
30372         .k(k)
30373         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30374     }
30375   }
30376 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30377   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30378     for (size_t k = 1; k < 8; k++) {
30379       for (uint32_t n = 1; n <= 4; n++) {
30380         for (uint32_t m = 1; m <= 3; m++) {
30381           GemmMicrokernelTester()
30382             .mr(3)
30383             .nr(4)
30384             .kr(2)
30385             .sr(4)
30386             .m(m)
30387             .n(n)
30388             .k(k)
30389             .iterations(1)
30390             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30391         }
30392       }
30393     }
30394   }
30395 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)30396   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
30397     for (size_t k = 9; k < 16; k++) {
30398       GemmMicrokernelTester()
30399         .mr(3)
30400         .nr(4)
30401         .kr(2)
30402         .sr(4)
30403         .m(3)
30404         .n(4)
30405         .k(k)
30406         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30407     }
30408   }
30409 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)30410   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
30411     for (size_t k = 9; k < 16; k++) {
30412       for (uint32_t n = 1; n <= 4; n++) {
30413         for (uint32_t m = 1; m <= 3; m++) {
30414           GemmMicrokernelTester()
30415             .mr(3)
30416             .nr(4)
30417             .kr(2)
30418             .sr(4)
30419             .m(m)
30420             .n(n)
30421             .k(k)
30422             .iterations(1)
30423             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30424         }
30425       }
30426     }
30427   }
30428 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)30429   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
30430     for (size_t k = 16; k <= 80; k += 8) {
30431       GemmMicrokernelTester()
30432         .mr(3)
30433         .nr(4)
30434         .kr(2)
30435         .sr(4)
30436         .m(3)
30437         .n(4)
30438         .k(k)
30439         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30440     }
30441   }
30442 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)30443   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
30444     for (size_t k = 16; k <= 80; k += 8) {
30445       for (uint32_t n = 1; n <= 4; n++) {
30446         for (uint32_t m = 1; m <= 3; m++) {
30447           GemmMicrokernelTester()
30448             .mr(3)
30449             .nr(4)
30450             .kr(2)
30451             .sr(4)
30452             .m(m)
30453             .n(n)
30454             .k(k)
30455             .iterations(1)
30456             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30457         }
30458       }
30459     }
30460   }
30461 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)30462   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
30463     for (uint32_t n = 5; n < 8; n++) {
30464       for (size_t k = 1; k <= 40; k += 9) {
30465         GemmMicrokernelTester()
30466           .mr(3)
30467           .nr(4)
30468           .kr(2)
30469           .sr(4)
30470           .m(3)
30471           .n(n)
30472           .k(k)
30473           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30474       }
30475     }
30476   }
30477 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)30478   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
30479     for (uint32_t n = 5; n < 8; n++) {
30480       for (size_t k = 1; k <= 40; k += 9) {
30481         GemmMicrokernelTester()
30482           .mr(3)
30483           .nr(4)
30484           .kr(2)
30485           .sr(4)
30486           .m(3)
30487           .n(n)
30488           .k(k)
30489           .cn_stride(7)
30490           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30491       }
30492     }
30493   }
30494 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30495   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30496     for (uint32_t n = 5; n < 8; n++) {
30497       for (size_t k = 1; k <= 40; k += 9) {
30498         for (uint32_t m = 1; m <= 3; m++) {
30499           GemmMicrokernelTester()
30500             .mr(3)
30501             .nr(4)
30502             .kr(2)
30503             .sr(4)
30504             .m(m)
30505             .n(n)
30506             .k(k)
30507             .iterations(1)
30508             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30509         }
30510       }
30511     }
30512   }
30513 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)30514   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
30515     for (uint32_t n = 8; n <= 12; n += 4) {
30516       for (size_t k = 1; k <= 40; k += 9) {
30517         GemmMicrokernelTester()
30518           .mr(3)
30519           .nr(4)
30520           .kr(2)
30521           .sr(4)
30522           .m(3)
30523           .n(n)
30524           .k(k)
30525           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30526       }
30527     }
30528   }
30529 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30530   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30531     for (uint32_t n = 8; n <= 12; n += 4) {
30532       for (size_t k = 1; k <= 40; k += 9) {
30533         GemmMicrokernelTester()
30534           .mr(3)
30535           .nr(4)
30536           .kr(2)
30537           .sr(4)
30538           .m(3)
30539           .n(n)
30540           .k(k)
30541           .cn_stride(7)
30542           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30543       }
30544     }
30545   }
30546 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30547   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30548     for (uint32_t n = 8; n <= 12; n += 4) {
30549       for (size_t k = 1; k <= 40; k += 9) {
30550         for (uint32_t m = 1; m <= 3; m++) {
30551           GemmMicrokernelTester()
30552             .mr(3)
30553             .nr(4)
30554             .kr(2)
30555             .sr(4)
30556             .m(m)
30557             .n(n)
30558             .k(k)
30559             .iterations(1)
30560             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30561         }
30562       }
30563     }
30564   }
30565 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)30566   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
30567     for (size_t k = 1; k <= 40; k += 9) {
30568       GemmMicrokernelTester()
30569         .mr(3)
30570         .nr(4)
30571         .kr(2)
30572         .sr(4)
30573         .m(3)
30574         .n(4)
30575         .k(k)
30576         .ks(3)
30577         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30578     }
30579   }
30580 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30581   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30582     for (size_t k = 1; k <= 40; k += 9) {
30583       for (uint32_t n = 1; n <= 4; n++) {
30584         for (uint32_t m = 1; m <= 3; m++) {
30585           GemmMicrokernelTester()
30586             .mr(3)
30587             .nr(4)
30588             .kr(2)
30589             .sr(4)
30590             .m(m)
30591             .n(n)
30592             .k(k)
30593             .ks(3)
30594             .iterations(1)
30595             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30596         }
30597       }
30598     }
30599   }
30600 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30601   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30602     for (uint32_t n = 5; n < 8; n++) {
30603       for (size_t k = 1; k <= 40; k += 9) {
30604         GemmMicrokernelTester()
30605           .mr(3)
30606           .nr(4)
30607           .kr(2)
30608           .sr(4)
30609           .m(3)
30610           .n(n)
30611           .k(k)
30612           .ks(3)
30613           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30614       }
30615     }
30616   }
30617 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30618   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30619     for (uint32_t n = 8; n <= 12; n += 4) {
30620       for (size_t k = 1; k <= 40; k += 9) {
30621         GemmMicrokernelTester()
30622           .mr(3)
30623           .nr(4)
30624           .kr(2)
30625           .sr(4)
30626           .m(3)
30627           .n(n)
30628           .k(k)
30629           .ks(3)
30630           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30631       }
30632     }
30633   }
30634 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30635   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30636     for (size_t k = 1; k <= 40; k += 9) {
30637       for (uint32_t n = 1; n <= 4; n++) {
30638         for (uint32_t m = 1; m <= 3; m++) {
30639           GemmMicrokernelTester()
30640             .mr(3)
30641             .nr(4)
30642             .kr(2)
30643             .sr(4)
30644             .m(m)
30645             .n(n)
30646             .k(k)
30647             .cm_stride(7)
30648             .iterations(1)
30649             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30650         }
30651       }
30652     }
30653   }
30654 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)30655   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
30656     for (size_t k = 1; k <= 40; k += 9) {
30657       GemmMicrokernelTester()
30658         .mr(3)
30659         .nr(4)
30660         .kr(2)
30661         .sr(4)
30662         .m(3)
30663         .n(4)
30664         .k(k)
30665         .ks(3)
30666         .a_offset(127)
30667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30668     }
30669   }
30670 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)30671   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
30672     for (size_t k = 1; k <= 40; k += 9) {
30673       for (uint32_t mz = 0; mz < 3; mz++) {
30674         GemmMicrokernelTester()
30675           .mr(3)
30676           .nr(4)
30677           .kr(2)
30678           .sr(4)
30679           .m(3)
30680           .n(4)
30681           .k(k)
30682           .ks(3)
30683           .a_offset(127)
30684           .zero_index(mz)
30685           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30686       }
30687     }
30688   }
30689 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)30690   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
30691     GemmMicrokernelTester()
30692       .mr(3)
30693       .nr(4)
30694       .kr(2)
30695       .sr(4)
30696       .m(3)
30697       .n(4)
30698       .k(8)
30699       .qmin(128)
30700       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30701   }
30702 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)30703   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
30704     GemmMicrokernelTester()
30705       .mr(3)
30706       .nr(4)
30707       .kr(2)
30708       .sr(4)
30709       .m(3)
30710       .n(4)
30711       .k(8)
30712       .qmax(128)
30713       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30714   }
30715 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)30716   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
30717     GemmMicrokernelTester()
30718       .mr(3)
30719       .nr(4)
30720       .kr(2)
30721       .sr(4)
30722       .m(3)
30723       .n(4)
30724       .k(8)
30725       .cm_stride(7)
30726       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30727   }
30728 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_a_zero_point)30729   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
30730     for (size_t k = 1; k <= 40; k += 9) {
30731       GemmMicrokernelTester()
30732         .mr(3)
30733         .nr(4)
30734         .kr(2)
30735         .sr(4)
30736         .m(3)
30737         .n(4)
30738         .k(k)
30739         .a_zero_point(0)
30740         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30741     }
30742   }
30743 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_b_zero_point)30744   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
30745     for (size_t k = 1; k <= 40; k += 9) {
30746       GemmMicrokernelTester()
30747         .mr(3)
30748         .nr(4)
30749         .kr(2)
30750         .sr(4)
30751         .m(3)
30752         .n(4)
30753         .k(k)
30754         .b_zero_point(0)
30755         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30756     }
30757   }
30758 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,no_zero_point)30759   TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
30760     for (size_t k = 1; k <= 40; k += 9) {
30761       GemmMicrokernelTester()
30762         .mr(3)
30763         .nr(4)
30764         .kr(2)
30765         .sr(4)
30766         .m(3)
30767         .n(4)
30768         .k(k)
30769         .a_zero_point(0)
30770         .b_zero_point(0)
30771         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30772     }
30773   }
30774 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30775 
30776 
30777 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)30778   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30779     GemmMicrokernelTester()
30780       .mr(3)
30781       .nr(4)
30782       .kr(8)
30783       .sr(1)
30784       .m(3)
30785       .n(4)
30786       .k(8)
30787       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30788   }
30789 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)30790   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
30791     GemmMicrokernelTester()
30792       .mr(3)
30793       .nr(4)
30794       .kr(8)
30795       .sr(1)
30796       .m(3)
30797       .n(4)
30798       .k(8)
30799       .cn_stride(7)
30800       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30801   }
30802 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30803   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30804     for (uint32_t n = 1; n <= 4; n++) {
30805       for (uint32_t m = 1; m <= 3; m++) {
30806         GemmMicrokernelTester()
30807           .mr(3)
30808           .nr(4)
30809           .kr(8)
30810           .sr(1)
30811           .m(m)
30812           .n(n)
30813           .k(8)
30814           .iterations(1)
30815           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30816       }
30817     }
30818   }
30819 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30820   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30821     for (uint32_t m = 1; m <= 3; m++) {
30822       GemmMicrokernelTester()
30823         .mr(3)
30824         .nr(4)
30825         .kr(8)
30826         .sr(1)
30827         .m(m)
30828         .n(4)
30829         .k(8)
30830         .iterations(1)
30831         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30832     }
30833   }
30834 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30835   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30836     for (uint32_t n = 1; n <= 4; n++) {
30837       GemmMicrokernelTester()
30838         .mr(3)
30839         .nr(4)
30840         .kr(8)
30841         .sr(1)
30842         .m(3)
30843         .n(n)
30844         .k(8)
30845         .iterations(1)
30846         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30847     }
30848   }
30849 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)30850   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30851     for (size_t k = 1; k < 8; k++) {
30852       GemmMicrokernelTester()
30853         .mr(3)
30854         .nr(4)
30855         .kr(8)
30856         .sr(1)
30857         .m(3)
30858         .n(4)
30859         .k(k)
30860         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30861     }
30862   }
30863 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30864   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30865     for (size_t k = 1; k < 8; k++) {
30866       for (uint32_t n = 1; n <= 4; n++) {
30867         for (uint32_t m = 1; m <= 3; m++) {
30868           GemmMicrokernelTester()
30869             .mr(3)
30870             .nr(4)
30871             .kr(8)
30872             .sr(1)
30873             .m(m)
30874             .n(n)
30875             .k(k)
30876             .iterations(1)
30877             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30878         }
30879       }
30880     }
30881   }
30882 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)30883   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30884     for (size_t k = 9; k < 16; k++) {
30885       GemmMicrokernelTester()
30886         .mr(3)
30887         .nr(4)
30888         .kr(8)
30889         .sr(1)
30890         .m(3)
30891         .n(4)
30892         .k(k)
30893         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30894     }
30895   }
30896 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30897   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30898     for (size_t k = 9; k < 16; k++) {
30899       for (uint32_t n = 1; n <= 4; n++) {
30900         for (uint32_t m = 1; m <= 3; m++) {
30901           GemmMicrokernelTester()
30902             .mr(3)
30903             .nr(4)
30904             .kr(8)
30905             .sr(1)
30906             .m(m)
30907             .n(n)
30908             .k(k)
30909             .iterations(1)
30910             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30911         }
30912       }
30913     }
30914   }
30915 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)30916   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
30917     for (size_t k = 16; k <= 80; k += 8) {
30918       GemmMicrokernelTester()
30919         .mr(3)
30920         .nr(4)
30921         .kr(8)
30922         .sr(1)
30923         .m(3)
30924         .n(4)
30925         .k(k)
30926         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30927     }
30928   }
30929 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)30930   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
30931     for (size_t k = 16; k <= 80; k += 8) {
30932       for (uint32_t n = 1; n <= 4; n++) {
30933         for (uint32_t m = 1; m <= 3; m++) {
30934           GemmMicrokernelTester()
30935             .mr(3)
30936             .nr(4)
30937             .kr(8)
30938             .sr(1)
30939             .m(m)
30940             .n(n)
30941             .k(k)
30942             .iterations(1)
30943             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30944         }
30945       }
30946     }
30947   }
30948 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)30949   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
30950     for (uint32_t n = 5; n < 8; n++) {
30951       for (size_t k = 1; k <= 40; k += 9) {
30952         GemmMicrokernelTester()
30953           .mr(3)
30954           .nr(4)
30955           .kr(8)
30956           .sr(1)
30957           .m(3)
30958           .n(n)
30959           .k(k)
30960           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30961       }
30962     }
30963   }
30964 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)30965   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
30966     for (uint32_t n = 5; n < 8; n++) {
30967       for (size_t k = 1; k <= 40; k += 9) {
30968         GemmMicrokernelTester()
30969           .mr(3)
30970           .nr(4)
30971           .kr(8)
30972           .sr(1)
30973           .m(3)
30974           .n(n)
30975           .k(k)
30976           .cn_stride(7)
30977           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30978       }
30979     }
30980   }
30981 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)30982   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
30983     for (uint32_t n = 5; n < 8; n++) {
30984       for (size_t k = 1; k <= 40; k += 9) {
30985         for (uint32_t m = 1; m <= 3; m++) {
30986           GemmMicrokernelTester()
30987             .mr(3)
30988             .nr(4)
30989             .kr(8)
30990             .sr(1)
30991             .m(m)
30992             .n(n)
30993             .k(k)
30994             .iterations(1)
30995             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
30996         }
30997       }
30998     }
30999   }
31000 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)31001   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
31002     for (uint32_t n = 8; n <= 12; n += 4) {
31003       for (size_t k = 1; k <= 40; k += 9) {
31004         GemmMicrokernelTester()
31005           .mr(3)
31006           .nr(4)
31007           .kr(8)
31008           .sr(1)
31009           .m(3)
31010           .n(n)
31011           .k(k)
31012           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31013       }
31014     }
31015   }
31016 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31017   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31018     for (uint32_t n = 8; n <= 12; n += 4) {
31019       for (size_t k = 1; k <= 40; k += 9) {
31020         GemmMicrokernelTester()
31021           .mr(3)
31022           .nr(4)
31023           .kr(8)
31024           .sr(1)
31025           .m(3)
31026           .n(n)
31027           .k(k)
31028           .cn_stride(7)
31029           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31030       }
31031     }
31032   }
31033 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31034   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31035     for (uint32_t n = 8; n <= 12; n += 4) {
31036       for (size_t k = 1; k <= 40; k += 9) {
31037         for (uint32_t m = 1; m <= 3; m++) {
31038           GemmMicrokernelTester()
31039             .mr(3)
31040             .nr(4)
31041             .kr(8)
31042             .sr(1)
31043             .m(m)
31044             .n(n)
31045             .k(k)
31046             .iterations(1)
31047             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31048         }
31049       }
31050     }
31051   }
31052 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)31053   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
31054     for (size_t k = 1; k <= 40; k += 9) {
31055       GemmMicrokernelTester()
31056         .mr(3)
31057         .nr(4)
31058         .kr(8)
31059         .sr(1)
31060         .m(3)
31061         .n(4)
31062         .k(k)
31063         .ks(3)
31064         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31065     }
31066   }
31067 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31068   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31069     for (size_t k = 1; k <= 40; k += 9) {
31070       for (uint32_t n = 1; n <= 4; n++) {
31071         for (uint32_t m = 1; m <= 3; m++) {
31072           GemmMicrokernelTester()
31073             .mr(3)
31074             .nr(4)
31075             .kr(8)
31076             .sr(1)
31077             .m(m)
31078             .n(n)
31079             .k(k)
31080             .ks(3)
31081             .iterations(1)
31082             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31083         }
31084       }
31085     }
31086   }
31087 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31088   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31089     for (uint32_t n = 5; n < 8; n++) {
31090       for (size_t k = 1; k <= 40; k += 9) {
31091         GemmMicrokernelTester()
31092           .mr(3)
31093           .nr(4)
31094           .kr(8)
31095           .sr(1)
31096           .m(3)
31097           .n(n)
31098           .k(k)
31099           .ks(3)
31100           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31101       }
31102     }
31103   }
31104 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31105   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31106     for (uint32_t n = 8; n <= 12; n += 4) {
31107       for (size_t k = 1; k <= 40; k += 9) {
31108         GemmMicrokernelTester()
31109           .mr(3)
31110           .nr(4)
31111           .kr(8)
31112           .sr(1)
31113           .m(3)
31114           .n(n)
31115           .k(k)
31116           .ks(3)
31117           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31118       }
31119     }
31120   }
31121 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31122   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31123     for (size_t k = 1; k <= 40; k += 9) {
31124       for (uint32_t n = 1; n <= 4; n++) {
31125         for (uint32_t m = 1; m <= 3; m++) {
31126           GemmMicrokernelTester()
31127             .mr(3)
31128             .nr(4)
31129             .kr(8)
31130             .sr(1)
31131             .m(m)
31132             .n(n)
31133             .k(k)
31134             .cm_stride(7)
31135             .iterations(1)
31136             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31137         }
31138       }
31139     }
31140   }
31141 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,a_offset)31142   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
31143     for (size_t k = 1; k <= 40; k += 9) {
31144       GemmMicrokernelTester()
31145         .mr(3)
31146         .nr(4)
31147         .kr(8)
31148         .sr(1)
31149         .m(3)
31150         .n(4)
31151         .k(k)
31152         .ks(3)
31153         .a_offset(127)
31154         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31155     }
31156   }
31157 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,zero)31158   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, zero) {
31159     for (size_t k = 1; k <= 40; k += 9) {
31160       for (uint32_t mz = 0; mz < 3; mz++) {
31161         GemmMicrokernelTester()
31162           .mr(3)
31163           .nr(4)
31164           .kr(8)
31165           .sr(1)
31166           .m(3)
31167           .n(4)
31168           .k(k)
31169           .ks(3)
31170           .a_offset(127)
31171           .zero_index(mz)
31172           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31173       }
31174     }
31175   }
31176 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)31177   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
31178     GemmMicrokernelTester()
31179       .mr(3)
31180       .nr(4)
31181       .kr(8)
31182       .sr(1)
31183       .m(3)
31184       .n(4)
31185       .k(8)
31186       .qmin(128)
31187       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31188   }
31189 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)31190   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
31191     GemmMicrokernelTester()
31192       .mr(3)
31193       .nr(4)
31194       .kr(8)
31195       .sr(1)
31196       .m(3)
31197       .n(4)
31198       .k(8)
31199       .qmax(128)
31200       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31201   }
31202 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)31203   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
31204     GemmMicrokernelTester()
31205       .mr(3)
31206       .nr(4)
31207       .kr(8)
31208       .sr(1)
31209       .m(3)
31210       .n(4)
31211       .k(8)
31212       .cm_stride(7)
31213       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31214   }
31215 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_a_zero_point)31216   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
31217     for (size_t k = 1; k <= 40; k += 9) {
31218       GemmMicrokernelTester()
31219         .mr(3)
31220         .nr(4)
31221         .kr(8)
31222         .sr(1)
31223         .m(3)
31224         .n(4)
31225         .k(k)
31226         .a_zero_point(0)
31227         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31228     }
31229   }
31230 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_b_zero_point)31231   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
31232     for (size_t k = 1; k <= 40; k += 9) {
31233       GemmMicrokernelTester()
31234         .mr(3)
31235         .nr(4)
31236         .kr(8)
31237         .sr(1)
31238         .m(3)
31239         .n(4)
31240         .k(k)
31241         .b_zero_point(0)
31242         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31243     }
31244   }
31245 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,no_zero_point)31246   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
31247     for (size_t k = 1; k <= 40; k += 9) {
31248       GemmMicrokernelTester()
31249         .mr(3)
31250         .nr(4)
31251         .kr(8)
31252         .sr(1)
31253         .m(3)
31254         .n(4)
31255         .k(k)
31256         .a_zero_point(0)
31257         .b_zero_point(0)
31258         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31259     }
31260   }
31261 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31262 
31263 
31264 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)31265   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31266     GemmMicrokernelTester()
31267       .mr(3)
31268       .nr(4)
31269       .kr(8)
31270       .sr(1)
31271       .m(3)
31272       .n(4)
31273       .k(8)
31274       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31275   }
31276 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)31277   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
31278     GemmMicrokernelTester()
31279       .mr(3)
31280       .nr(4)
31281       .kr(8)
31282       .sr(1)
31283       .m(3)
31284       .n(4)
31285       .k(8)
31286       .cn_stride(7)
31287       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31288   }
31289 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)31290   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
31291     for (uint32_t n = 1; n <= 4; n++) {
31292       for (uint32_t m = 1; m <= 3; m++) {
31293         GemmMicrokernelTester()
31294           .mr(3)
31295           .nr(4)
31296           .kr(8)
31297           .sr(1)
31298           .m(m)
31299           .n(n)
31300           .k(8)
31301           .iterations(1)
31302           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31303       }
31304     }
31305   }
31306 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)31307   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31308     for (uint32_t m = 1; m <= 3; m++) {
31309       GemmMicrokernelTester()
31310         .mr(3)
31311         .nr(4)
31312         .kr(8)
31313         .sr(1)
31314         .m(m)
31315         .n(4)
31316         .k(8)
31317         .iterations(1)
31318         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31319     }
31320   }
31321 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)31322   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31323     for (uint32_t n = 1; n <= 4; n++) {
31324       GemmMicrokernelTester()
31325         .mr(3)
31326         .nr(4)
31327         .kr(8)
31328         .sr(1)
31329         .m(3)
31330         .n(n)
31331         .k(8)
31332         .iterations(1)
31333         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31334     }
31335   }
31336 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)31337   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31338     for (size_t k = 1; k < 8; k++) {
31339       GemmMicrokernelTester()
31340         .mr(3)
31341         .nr(4)
31342         .kr(8)
31343         .sr(1)
31344         .m(3)
31345         .n(4)
31346         .k(k)
31347         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31348     }
31349   }
31350 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)31351   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31352     for (size_t k = 1; k < 8; k++) {
31353       for (uint32_t n = 1; n <= 4; n++) {
31354         for (uint32_t m = 1; m <= 3; m++) {
31355           GemmMicrokernelTester()
31356             .mr(3)
31357             .nr(4)
31358             .kr(8)
31359             .sr(1)
31360             .m(m)
31361             .n(n)
31362             .k(k)
31363             .iterations(1)
31364             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31365         }
31366       }
31367     }
31368   }
31369 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)31370   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31371     for (size_t k = 9; k < 16; k++) {
31372       GemmMicrokernelTester()
31373         .mr(3)
31374         .nr(4)
31375         .kr(8)
31376         .sr(1)
31377         .m(3)
31378         .n(4)
31379         .k(k)
31380         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31381     }
31382   }
31383 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31384   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31385     for (size_t k = 9; k < 16; k++) {
31386       for (uint32_t n = 1; n <= 4; n++) {
31387         for (uint32_t m = 1; m <= 3; m++) {
31388           GemmMicrokernelTester()
31389             .mr(3)
31390             .nr(4)
31391             .kr(8)
31392             .sr(1)
31393             .m(m)
31394             .n(n)
31395             .k(k)
31396             .iterations(1)
31397             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31398         }
31399       }
31400     }
31401   }
31402 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)31403   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
31404     for (size_t k = 16; k <= 80; k += 8) {
31405       GemmMicrokernelTester()
31406         .mr(3)
31407         .nr(4)
31408         .kr(8)
31409         .sr(1)
31410         .m(3)
31411         .n(4)
31412         .k(k)
31413         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31414     }
31415   }
31416 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31417   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31418     for (size_t k = 16; k <= 80; k += 8) {
31419       for (uint32_t n = 1; n <= 4; n++) {
31420         for (uint32_t m = 1; m <= 3; m++) {
31421           GemmMicrokernelTester()
31422             .mr(3)
31423             .nr(4)
31424             .kr(8)
31425             .sr(1)
31426             .m(m)
31427             .n(n)
31428             .k(k)
31429             .iterations(1)
31430             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31431         }
31432       }
31433     }
31434   }
31435 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)31436   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31437     for (uint32_t n = 5; n < 8; n++) {
31438       for (size_t k = 1; k <= 40; k += 9) {
31439         GemmMicrokernelTester()
31440           .mr(3)
31441           .nr(4)
31442           .kr(8)
31443           .sr(1)
31444           .m(3)
31445           .n(n)
31446           .k(k)
31447           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31448       }
31449     }
31450   }
31451 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31452   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31453     for (uint32_t n = 5; n < 8; n++) {
31454       for (size_t k = 1; k <= 40; k += 9) {
31455         GemmMicrokernelTester()
31456           .mr(3)
31457           .nr(4)
31458           .kr(8)
31459           .sr(1)
31460           .m(3)
31461           .n(n)
31462           .k(k)
31463           .cn_stride(7)
31464           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31465       }
31466     }
31467   }
31468 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31469   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31470     for (uint32_t n = 5; n < 8; n++) {
31471       for (size_t k = 1; k <= 40; k += 9) {
31472         for (uint32_t m = 1; m <= 3; m++) {
31473           GemmMicrokernelTester()
31474             .mr(3)
31475             .nr(4)
31476             .kr(8)
31477             .sr(1)
31478             .m(m)
31479             .n(n)
31480             .k(k)
31481             .iterations(1)
31482             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31483         }
31484       }
31485     }
31486   }
31487 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)31488   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
31489     for (uint32_t n = 8; n <= 12; n += 4) {
31490       for (size_t k = 1; k <= 40; k += 9) {
31491         GemmMicrokernelTester()
31492           .mr(3)
31493           .nr(4)
31494           .kr(8)
31495           .sr(1)
31496           .m(3)
31497           .n(n)
31498           .k(k)
31499           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31500       }
31501     }
31502   }
31503 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31504   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31505     for (uint32_t n = 8; n <= 12; n += 4) {
31506       for (size_t k = 1; k <= 40; k += 9) {
31507         GemmMicrokernelTester()
31508           .mr(3)
31509           .nr(4)
31510           .kr(8)
31511           .sr(1)
31512           .m(3)
31513           .n(n)
31514           .k(k)
31515           .cn_stride(7)
31516           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31517       }
31518     }
31519   }
31520 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)31521   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
31522     for (uint32_t n = 8; n <= 12; n += 4) {
31523       for (size_t k = 1; k <= 40; k += 9) {
31524         for (uint32_t m = 1; m <= 3; m++) {
31525           GemmMicrokernelTester()
31526             .mr(3)
31527             .nr(4)
31528             .kr(8)
31529             .sr(1)
31530             .m(m)
31531             .n(n)
31532             .k(k)
31533             .iterations(1)
31534             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31535         }
31536       }
31537     }
31538   }
31539 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)31540   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
31541     for (size_t k = 1; k <= 40; k += 9) {
31542       GemmMicrokernelTester()
31543         .mr(3)
31544         .nr(4)
31545         .kr(8)
31546         .sr(1)
31547         .m(3)
31548         .n(4)
31549         .k(k)
31550         .ks(3)
31551         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31552     }
31553   }
31554 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)31555   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
31556     for (size_t k = 1; k <= 40; k += 9) {
31557       for (uint32_t n = 1; n <= 4; n++) {
31558         for (uint32_t m = 1; m <= 3; m++) {
31559           GemmMicrokernelTester()
31560             .mr(3)
31561             .nr(4)
31562             .kr(8)
31563             .sr(1)
31564             .m(m)
31565             .n(n)
31566             .k(k)
31567             .ks(3)
31568             .iterations(1)
31569             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31570         }
31571       }
31572     }
31573   }
31574 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)31575   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
31576     for (uint32_t n = 5; n < 8; n++) {
31577       for (size_t k = 1; k <= 40; k += 9) {
31578         GemmMicrokernelTester()
31579           .mr(3)
31580           .nr(4)
31581           .kr(8)
31582           .sr(1)
31583           .m(3)
31584           .n(n)
31585           .k(k)
31586           .ks(3)
31587           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31588       }
31589     }
31590   }
31591 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)31592   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
31593     for (uint32_t n = 8; n <= 12; n += 4) {
31594       for (size_t k = 1; k <= 40; k += 9) {
31595         GemmMicrokernelTester()
31596           .mr(3)
31597           .nr(4)
31598           .kr(8)
31599           .sr(1)
31600           .m(3)
31601           .n(n)
31602           .k(k)
31603           .ks(3)
31604           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31605       }
31606     }
31607   }
31608 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)31609   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
31610     for (size_t k = 1; k <= 40; k += 9) {
31611       for (uint32_t n = 1; n <= 4; n++) {
31612         for (uint32_t m = 1; m <= 3; m++) {
31613           GemmMicrokernelTester()
31614             .mr(3)
31615             .nr(4)
31616             .kr(8)
31617             .sr(1)
31618             .m(m)
31619             .n(n)
31620             .k(k)
31621             .cm_stride(7)
31622             .iterations(1)
31623             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31624         }
31625       }
31626     }
31627   }
31628 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,a_offset)31629   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
31630     for (size_t k = 1; k <= 40; k += 9) {
31631       GemmMicrokernelTester()
31632         .mr(3)
31633         .nr(4)
31634         .kr(8)
31635         .sr(1)
31636         .m(3)
31637         .n(4)
31638         .k(k)
31639         .ks(3)
31640         .a_offset(127)
31641         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31642     }
31643   }
31644 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,zero)31645   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
31646     for (size_t k = 1; k <= 40; k += 9) {
31647       for (uint32_t mz = 0; mz < 3; mz++) {
31648         GemmMicrokernelTester()
31649           .mr(3)
31650           .nr(4)
31651           .kr(8)
31652           .sr(1)
31653           .m(3)
31654           .n(4)
31655           .k(k)
31656           .ks(3)
31657           .a_offset(127)
31658           .zero_index(mz)
31659           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31660       }
31661     }
31662   }
31663 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)31664   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
31665     GemmMicrokernelTester()
31666       .mr(3)
31667       .nr(4)
31668       .kr(8)
31669       .sr(1)
31670       .m(3)
31671       .n(4)
31672       .k(8)
31673       .qmin(128)
31674       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31675   }
31676 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)31677   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
31678     GemmMicrokernelTester()
31679       .mr(3)
31680       .nr(4)
31681       .kr(8)
31682       .sr(1)
31683       .m(3)
31684       .n(4)
31685       .k(8)
31686       .qmax(128)
31687       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31688   }
31689 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)31690   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
31691     GemmMicrokernelTester()
31692       .mr(3)
31693       .nr(4)
31694       .kr(8)
31695       .sr(1)
31696       .m(3)
31697       .n(4)
31698       .k(8)
31699       .cm_stride(7)
31700       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31701   }
31702 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_a_zero_point)31703   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
31704     for (size_t k = 1; k <= 40; k += 9) {
31705       GemmMicrokernelTester()
31706         .mr(3)
31707         .nr(4)
31708         .kr(8)
31709         .sr(1)
31710         .m(3)
31711         .n(4)
31712         .k(k)
31713         .a_zero_point(0)
31714         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31715     }
31716   }
31717 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_b_zero_point)31718   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
31719     for (size_t k = 1; k <= 40; k += 9) {
31720       GemmMicrokernelTester()
31721         .mr(3)
31722         .nr(4)
31723         .kr(8)
31724         .sr(1)
31725         .m(3)
31726         .n(4)
31727         .k(k)
31728         .b_zero_point(0)
31729         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31730     }
31731   }
31732 
TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,no_zero_point)31733   TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
31734     for (size_t k = 1; k <= 40; k += 9) {
31735       GemmMicrokernelTester()
31736         .mr(3)
31737         .nr(4)
31738         .kr(8)
31739         .sr(1)
31740         .m(3)
31741         .n(4)
31742         .k(k)
31743         .a_zero_point(0)
31744         .b_zero_point(0)
31745         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31746     }
31747   }
31748 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31749 
31750 
31751 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)31752   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31753     GemmMicrokernelTester()
31754       .mr(4)
31755       .nr(4)
31756       .kr(2)
31757       .sr(1)
31758       .m(4)
31759       .n(4)
31760       .k(8)
31761       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31762   }
31763 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)31764   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
31765     GemmMicrokernelTester()
31766       .mr(4)
31767       .nr(4)
31768       .kr(2)
31769       .sr(1)
31770       .m(4)
31771       .n(4)
31772       .k(8)
31773       .cn_stride(7)
31774       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31775   }
31776 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)31777   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
31778     for (uint32_t n = 1; n <= 4; n++) {
31779       for (uint32_t m = 1; m <= 4; m++) {
31780         GemmMicrokernelTester()
31781           .mr(4)
31782           .nr(4)
31783           .kr(2)
31784           .sr(1)
31785           .m(m)
31786           .n(n)
31787           .k(8)
31788           .iterations(1)
31789           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31790       }
31791     }
31792   }
31793 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)31794   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31795     for (uint32_t m = 1; m <= 4; m++) {
31796       GemmMicrokernelTester()
31797         .mr(4)
31798         .nr(4)
31799         .kr(2)
31800         .sr(1)
31801         .m(m)
31802         .n(4)
31803         .k(8)
31804         .iterations(1)
31805         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31806     }
31807   }
31808 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)31809   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31810     for (uint32_t n = 1; n <= 4; n++) {
31811       GemmMicrokernelTester()
31812         .mr(4)
31813         .nr(4)
31814         .kr(2)
31815         .sr(1)
31816         .m(4)
31817         .n(n)
31818         .k(8)
31819         .iterations(1)
31820         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31821     }
31822   }
31823 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)31824   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31825     for (size_t k = 1; k < 8; k++) {
31826       GemmMicrokernelTester()
31827         .mr(4)
31828         .nr(4)
31829         .kr(2)
31830         .sr(1)
31831         .m(4)
31832         .n(4)
31833         .k(k)
31834         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31835     }
31836   }
31837 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)31838   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31839     for (size_t k = 1; k < 8; k++) {
31840       for (uint32_t n = 1; n <= 4; n++) {
31841         for (uint32_t m = 1; m <= 4; m++) {
31842           GemmMicrokernelTester()
31843             .mr(4)
31844             .nr(4)
31845             .kr(2)
31846             .sr(1)
31847             .m(m)
31848             .n(n)
31849             .k(k)
31850             .iterations(1)
31851             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31852         }
31853       }
31854     }
31855   }
31856 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)31857   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31858     for (size_t k = 9; k < 16; k++) {
31859       GemmMicrokernelTester()
31860         .mr(4)
31861         .nr(4)
31862         .kr(2)
31863         .sr(1)
31864         .m(4)
31865         .n(4)
31866         .k(k)
31867         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31868     }
31869   }
31870 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)31871   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31872     for (size_t k = 9; k < 16; k++) {
31873       for (uint32_t n = 1; n <= 4; n++) {
31874         for (uint32_t m = 1; m <= 4; m++) {
31875           GemmMicrokernelTester()
31876             .mr(4)
31877             .nr(4)
31878             .kr(2)
31879             .sr(1)
31880             .m(m)
31881             .n(n)
31882             .k(k)
31883             .iterations(1)
31884             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31885         }
31886       }
31887     }
31888   }
31889 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)31890   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
31891     for (size_t k = 16; k <= 80; k += 8) {
31892       GemmMicrokernelTester()
31893         .mr(4)
31894         .nr(4)
31895         .kr(2)
31896         .sr(1)
31897         .m(4)
31898         .n(4)
31899         .k(k)
31900         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31901     }
31902   }
31903 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31904   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31905     for (size_t k = 16; k <= 80; k += 8) {
31906       for (uint32_t n = 1; n <= 4; n++) {
31907         for (uint32_t m = 1; m <= 4; m++) {
31908           GemmMicrokernelTester()
31909             .mr(4)
31910             .nr(4)
31911             .kr(2)
31912             .sr(1)
31913             .m(m)
31914             .n(n)
31915             .k(k)
31916             .iterations(1)
31917             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31918         }
31919       }
31920     }
31921   }
31922 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)31923   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31924     for (uint32_t n = 5; n < 8; n++) {
31925       for (size_t k = 1; k <= 40; k += 9) {
31926         GemmMicrokernelTester()
31927           .mr(4)
31928           .nr(4)
31929           .kr(2)
31930           .sr(1)
31931           .m(4)
31932           .n(n)
31933           .k(k)
31934           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31935       }
31936     }
31937   }
31938 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31939   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31940     for (uint32_t n = 5; n < 8; n++) {
31941       for (size_t k = 1; k <= 40; k += 9) {
31942         GemmMicrokernelTester()
31943           .mr(4)
31944           .nr(4)
31945           .kr(2)
31946           .sr(1)
31947           .m(4)
31948           .n(n)
31949           .k(k)
31950           .cn_stride(7)
31951           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31952       }
31953     }
31954   }
31955 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31956   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31957     for (uint32_t n = 5; n < 8; n++) {
31958       for (size_t k = 1; k <= 40; k += 9) {
31959         for (uint32_t m = 1; m <= 4; m++) {
31960           GemmMicrokernelTester()
31961             .mr(4)
31962             .nr(4)
31963             .kr(2)
31964             .sr(1)
31965             .m(m)
31966             .n(n)
31967             .k(k)
31968             .iterations(1)
31969             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31970         }
31971       }
31972     }
31973   }
31974 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)31975   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
31976     for (uint32_t n = 8; n <= 12; n += 4) {
31977       for (size_t k = 1; k <= 40; k += 9) {
31978         GemmMicrokernelTester()
31979           .mr(4)
31980           .nr(4)
31981           .kr(2)
31982           .sr(1)
31983           .m(4)
31984           .n(n)
31985           .k(k)
31986           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
31987       }
31988     }
31989   }
31990 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31991   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31992     for (uint32_t n = 8; n <= 12; n += 4) {
31993       for (size_t k = 1; k <= 40; k += 9) {
31994         GemmMicrokernelTester()
31995           .mr(4)
31996           .nr(4)
31997           .kr(2)
31998           .sr(1)
31999           .m(4)
32000           .n(n)
32001           .k(k)
32002           .cn_stride(7)
32003           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32004       }
32005     }
32006   }
32007 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)32008   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
32009     for (uint32_t n = 8; n <= 12; n += 4) {
32010       for (size_t k = 1; k <= 40; k += 9) {
32011         for (uint32_t m = 1; m <= 4; m++) {
32012           GemmMicrokernelTester()
32013             .mr(4)
32014             .nr(4)
32015             .kr(2)
32016             .sr(1)
32017             .m(m)
32018             .n(n)
32019             .k(k)
32020             .iterations(1)
32021             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32022         }
32023       }
32024     }
32025   }
32026 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)32027   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
32028     for (size_t k = 1; k <= 40; k += 9) {
32029       GemmMicrokernelTester()
32030         .mr(4)
32031         .nr(4)
32032         .kr(2)
32033         .sr(1)
32034         .m(4)
32035         .n(4)
32036         .k(k)
32037         .ks(3)
32038         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32039     }
32040   }
32041 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)32042   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
32043     for (size_t k = 1; k <= 40; k += 9) {
32044       for (uint32_t n = 1; n <= 4; n++) {
32045         for (uint32_t m = 1; m <= 4; m++) {
32046           GemmMicrokernelTester()
32047             .mr(4)
32048             .nr(4)
32049             .kr(2)
32050             .sr(1)
32051             .m(m)
32052             .n(n)
32053             .k(k)
32054             .ks(3)
32055             .iterations(1)
32056             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32057         }
32058       }
32059     }
32060   }
32061 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)32062   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
32063     for (uint32_t n = 5; n < 8; n++) {
32064       for (size_t k = 1; k <= 40; k += 9) {
32065         GemmMicrokernelTester()
32066           .mr(4)
32067           .nr(4)
32068           .kr(2)
32069           .sr(1)
32070           .m(4)
32071           .n(n)
32072           .k(k)
32073           .ks(3)
32074           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32075       }
32076     }
32077   }
32078 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)32079   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
32080     for (uint32_t n = 8; n <= 12; n += 4) {
32081       for (size_t k = 1; k <= 40; k += 9) {
32082         GemmMicrokernelTester()
32083           .mr(4)
32084           .nr(4)
32085           .kr(2)
32086           .sr(1)
32087           .m(4)
32088           .n(n)
32089           .k(k)
32090           .ks(3)
32091           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32092       }
32093     }
32094   }
32095 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)32096   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
32097     for (size_t k = 1; k <= 40; k += 9) {
32098       for (uint32_t n = 1; n <= 4; n++) {
32099         for (uint32_t m = 1; m <= 4; m++) {
32100           GemmMicrokernelTester()
32101             .mr(4)
32102             .nr(4)
32103             .kr(2)
32104             .sr(1)
32105             .m(m)
32106             .n(n)
32107             .k(k)
32108             .cm_stride(7)
32109             .iterations(1)
32110             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32111         }
32112       }
32113     }
32114   }
32115 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,a_offset)32116   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
32117     for (size_t k = 1; k <= 40; k += 9) {
32118       GemmMicrokernelTester()
32119         .mr(4)
32120         .nr(4)
32121         .kr(2)
32122         .sr(1)
32123         .m(4)
32124         .n(4)
32125         .k(k)
32126         .ks(3)
32127         .a_offset(163)
32128         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32129     }
32130   }
32131 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,zero)32132   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, zero) {
32133     for (size_t k = 1; k <= 40; k += 9) {
32134       for (uint32_t mz = 0; mz < 4; mz++) {
32135         GemmMicrokernelTester()
32136           .mr(4)
32137           .nr(4)
32138           .kr(2)
32139           .sr(1)
32140           .m(4)
32141           .n(4)
32142           .k(k)
32143           .ks(3)
32144           .a_offset(163)
32145           .zero_index(mz)
32146           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32147       }
32148     }
32149   }
32150 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)32151   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
32152     GemmMicrokernelTester()
32153       .mr(4)
32154       .nr(4)
32155       .kr(2)
32156       .sr(1)
32157       .m(4)
32158       .n(4)
32159       .k(8)
32160       .qmin(128)
32161       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32162   }
32163 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)32164   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
32165     GemmMicrokernelTester()
32166       .mr(4)
32167       .nr(4)
32168       .kr(2)
32169       .sr(1)
32170       .m(4)
32171       .n(4)
32172       .k(8)
32173       .qmax(128)
32174       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32175   }
32176 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)32177   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
32178     GemmMicrokernelTester()
32179       .mr(4)
32180       .nr(4)
32181       .kr(2)
32182       .sr(1)
32183       .m(4)
32184       .n(4)
32185       .k(8)
32186       .cm_stride(7)
32187       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32188   }
32189 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_a_zero_point)32190   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
32191     for (size_t k = 1; k <= 40; k += 9) {
32192       GemmMicrokernelTester()
32193         .mr(4)
32194         .nr(4)
32195         .kr(2)
32196         .sr(1)
32197         .m(4)
32198         .n(4)
32199         .k(k)
32200         .a_zero_point(0)
32201         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32202     }
32203   }
32204 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_b_zero_point)32205   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
32206     for (size_t k = 1; k <= 40; k += 9) {
32207       GemmMicrokernelTester()
32208         .mr(4)
32209         .nr(4)
32210         .kr(2)
32211         .sr(1)
32212         .m(4)
32213         .n(4)
32214         .k(k)
32215         .b_zero_point(0)
32216         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32217     }
32218   }
32219 
TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,no_zero_point)32220   TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
32221     for (size_t k = 1; k <= 40; k += 9) {
32222       GemmMicrokernelTester()
32223         .mr(4)
32224         .nr(4)
32225         .kr(2)
32226         .sr(1)
32227         .m(4)
32228         .n(4)
32229         .k(k)
32230         .a_zero_point(0)
32231         .b_zero_point(0)
32232         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
32233     }
32234   }
32235 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32236 
32237 
32238 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1)32239   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
32240     GemmMicrokernelTester()
32241       .mr(3)
32242       .nr(2)
32243       .kr(1)
32244       .sr(1)
32245       .m(3)
32246       .n(2)
32247       .k(1)
32248       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32249   }
32250 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cn)32251   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
32252     GemmMicrokernelTester()
32253       .mr(3)
32254       .nr(2)
32255       .kr(1)
32256       .sr(1)
32257       .m(3)
32258       .n(2)
32259       .k(1)
32260       .cn_stride(5)
32261       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32262   }
32263 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile)32264   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
32265     for (uint32_t n = 1; n <= 2; n++) {
32266       for (uint32_t m = 1; m <= 3; m++) {
32267         GemmMicrokernelTester()
32268           .mr(3)
32269           .nr(2)
32270           .kr(1)
32271           .sr(1)
32272           .m(m)
32273           .n(n)
32274           .k(1)
32275           .iterations(1)
32276           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32277       }
32278     }
32279   }
32280 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_m)32281   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
32282     for (uint32_t m = 1; m <= 3; m++) {
32283       GemmMicrokernelTester()
32284         .mr(3)
32285         .nr(2)
32286         .kr(1)
32287         .sr(1)
32288         .m(m)
32289         .n(2)
32290         .k(1)
32291         .iterations(1)
32292         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32293     }
32294   }
32295 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_n)32296   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
32297     for (uint32_t n = 1; n <= 2; n++) {
32298       GemmMicrokernelTester()
32299         .mr(3)
32300         .nr(2)
32301         .kr(1)
32302         .sr(1)
32303         .m(3)
32304         .n(n)
32305         .k(1)
32306         .iterations(1)
32307         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32308     }
32309   }
32310 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1)32311   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
32312     for (size_t k = 2; k < 10; k++) {
32313       GemmMicrokernelTester()
32314         .mr(3)
32315         .nr(2)
32316         .kr(1)
32317         .sr(1)
32318         .m(3)
32319         .n(2)
32320         .k(k)
32321         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32322     }
32323   }
32324 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_subtile)32325   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
32326     for (size_t k = 2; k < 10; k++) {
32327       for (uint32_t n = 1; n <= 2; n++) {
32328         for (uint32_t m = 1; m <= 3; m++) {
32329           GemmMicrokernelTester()
32330             .mr(3)
32331             .nr(2)
32332             .kr(1)
32333             .sr(1)
32334             .m(m)
32335             .n(n)
32336             .k(k)
32337             .iterations(1)
32338             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32339         }
32340       }
32341     }
32342   }
32343 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2)32344   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
32345     for (uint32_t n = 3; n < 4; n++) {
32346       for (size_t k = 1; k <= 5; k += 2) {
32347         GemmMicrokernelTester()
32348           .mr(3)
32349           .nr(2)
32350           .kr(1)
32351           .sr(1)
32352           .m(3)
32353           .n(n)
32354           .k(k)
32355           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32356       }
32357     }
32358   }
32359 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_cn)32360   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
32361     for (uint32_t n = 3; n < 4; n++) {
32362       for (size_t k = 1; k <= 5; k += 2) {
32363         GemmMicrokernelTester()
32364           .mr(3)
32365           .nr(2)
32366           .kr(1)
32367           .sr(1)
32368           .m(3)
32369           .n(n)
32370           .k(k)
32371           .cn_stride(5)
32372           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32373       }
32374     }
32375   }
32376 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_subtile)32377   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
32378     for (uint32_t n = 3; n < 4; n++) {
32379       for (size_t k = 1; k <= 5; k += 2) {
32380         for (uint32_t m = 1; m <= 3; m++) {
32381           GemmMicrokernelTester()
32382             .mr(3)
32383             .nr(2)
32384             .kr(1)
32385             .sr(1)
32386             .m(m)
32387             .n(n)
32388             .k(k)
32389             .iterations(1)
32390             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32391         }
32392       }
32393     }
32394   }
32395 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2)32396   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
32397     for (uint32_t n = 4; n <= 6; n += 2) {
32398       for (size_t k = 1; k <= 5; k += 2) {
32399         GemmMicrokernelTester()
32400           .mr(3)
32401           .nr(2)
32402           .kr(1)
32403           .sr(1)
32404           .m(3)
32405           .n(n)
32406           .k(k)
32407           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32408       }
32409     }
32410   }
32411 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_cn)32412   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
32413     for (uint32_t n = 4; n <= 6; n += 2) {
32414       for (size_t k = 1; k <= 5; k += 2) {
32415         GemmMicrokernelTester()
32416           .mr(3)
32417           .nr(2)
32418           .kr(1)
32419           .sr(1)
32420           .m(3)
32421           .n(n)
32422           .k(k)
32423           .cn_stride(5)
32424           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32425       }
32426     }
32427   }
32428 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_subtile)32429   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
32430     for (uint32_t n = 4; n <= 6; n += 2) {
32431       for (size_t k = 1; k <= 5; k += 2) {
32432         for (uint32_t m = 1; m <= 3; m++) {
32433           GemmMicrokernelTester()
32434             .mr(3)
32435             .nr(2)
32436             .kr(1)
32437             .sr(1)
32438             .m(m)
32439             .n(n)
32440             .k(k)
32441             .iterations(1)
32442             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32443         }
32444       }
32445     }
32446   }
32447 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel)32448   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel) {
32449     for (size_t k = 1; k <= 5; k += 2) {
32450       GemmMicrokernelTester()
32451         .mr(3)
32452         .nr(2)
32453         .kr(1)
32454         .sr(1)
32455         .m(3)
32456         .n(2)
32457         .k(k)
32458         .ks(3)
32459         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32460     }
32461   }
32462 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel_subtile)32463   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel_subtile) {
32464     for (size_t k = 1; k <= 5; k += 2) {
32465       for (uint32_t n = 1; n <= 2; n++) {
32466         for (uint32_t m = 1; m <= 3; m++) {
32467           GemmMicrokernelTester()
32468             .mr(3)
32469             .nr(2)
32470             .kr(1)
32471             .sr(1)
32472             .m(m)
32473             .n(n)
32474             .k(k)
32475             .ks(3)
32476             .iterations(1)
32477             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32478         }
32479       }
32480     }
32481   }
32482 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_small_kernel)32483   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_small_kernel) {
32484     for (uint32_t n = 3; n < 4; n++) {
32485       for (size_t k = 1; k <= 5; k += 2) {
32486         GemmMicrokernelTester()
32487           .mr(3)
32488           .nr(2)
32489           .kr(1)
32490           .sr(1)
32491           .m(3)
32492           .n(n)
32493           .k(k)
32494           .ks(3)
32495           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32496       }
32497     }
32498   }
32499 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_small_kernel)32500   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_small_kernel) {
32501     for (uint32_t n = 4; n <= 6; n += 2) {
32502       for (size_t k = 1; k <= 5; k += 2) {
32503         GemmMicrokernelTester()
32504           .mr(3)
32505           .nr(2)
32506           .kr(1)
32507           .sr(1)
32508           .m(3)
32509           .n(n)
32510           .k(k)
32511           .ks(3)
32512           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32513       }
32514     }
32515   }
32516 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm_subtile)32517   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
32518     for (size_t k = 1; k <= 5; k += 2) {
32519       for (uint32_t n = 1; n <= 2; n++) {
32520         for (uint32_t m = 1; m <= 3; m++) {
32521           GemmMicrokernelTester()
32522             .mr(3)
32523             .nr(2)
32524             .kr(1)
32525             .sr(1)
32526             .m(m)
32527             .n(n)
32528             .k(k)
32529             .cm_stride(5)
32530             .iterations(1)
32531             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32532         }
32533       }
32534     }
32535   }
32536 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,a_offset)32537   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, a_offset) {
32538     for (size_t k = 1; k <= 5; k += 2) {
32539       GemmMicrokernelTester()
32540         .mr(3)
32541         .nr(2)
32542         .kr(1)
32543         .sr(1)
32544         .m(3)
32545         .n(2)
32546         .k(k)
32547         .ks(3)
32548         .a_offset(17)
32549         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32550     }
32551   }
32552 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,zero)32553   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, zero) {
32554     for (size_t k = 1; k <= 5; k += 2) {
32555       for (uint32_t mz = 0; mz < 3; mz++) {
32556         GemmMicrokernelTester()
32557           .mr(3)
32558           .nr(2)
32559           .kr(1)
32560           .sr(1)
32561           .m(3)
32562           .n(2)
32563           .k(k)
32564           .ks(3)
32565           .a_offset(17)
32566           .zero_index(mz)
32567           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32568       }
32569     }
32570   }
32571 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmin)32572   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
32573     GemmMicrokernelTester()
32574       .mr(3)
32575       .nr(2)
32576       .kr(1)
32577       .sr(1)
32578       .m(3)
32579       .n(2)
32580       .k(1)
32581       .qmin(128)
32582       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32583   }
32584 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmax)32585   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
32586     GemmMicrokernelTester()
32587       .mr(3)
32588       .nr(2)
32589       .kr(1)
32590       .sr(1)
32591       .m(3)
32592       .n(2)
32593       .k(1)
32594       .qmax(128)
32595       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32596   }
32597 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm)32598   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
32599     GemmMicrokernelTester()
32600       .mr(3)
32601       .nr(2)
32602       .kr(1)
32603       .sr(1)
32604       .m(3)
32605       .n(2)
32606       .k(1)
32607       .cm_stride(5)
32608       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32609   }
32610 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_a_zero_point)32611   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_a_zero_point) {
32612     for (size_t k = 1; k <= 5; k += 2) {
32613       GemmMicrokernelTester()
32614         .mr(3)
32615         .nr(2)
32616         .kr(1)
32617         .sr(1)
32618         .m(3)
32619         .n(2)
32620         .k(k)
32621         .a_zero_point(0)
32622         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32623     }
32624   }
32625 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_b_zero_point)32626   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_b_zero_point) {
32627     for (size_t k = 1; k <= 5; k += 2) {
32628       GemmMicrokernelTester()
32629         .mr(3)
32630         .nr(2)
32631         .kr(1)
32632         .sr(1)
32633         .m(3)
32634         .n(2)
32635         .k(k)
32636         .b_zero_point(0)
32637         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32638     }
32639   }
32640 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,no_zero_point)32641   TEST(QU8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, no_zero_point) {
32642     for (size_t k = 1; k <= 5; k += 2) {
32643       GemmMicrokernelTester()
32644         .mr(3)
32645         .nr(2)
32646         .kr(1)
32647         .sr(1)
32648         .m(3)
32649         .n(2)
32650         .k(k)
32651         .a_zero_point(0)
32652         .b_zero_point(0)
32653         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32654     }
32655   }
32656 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32657 
32658 
32659 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1)32660   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
32661     GemmMicrokernelTester()
32662       .mr(3)
32663       .nr(4)
32664       .kr(1)
32665       .sr(1)
32666       .m(3)
32667       .n(4)
32668       .k(1)
32669       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32670   }
32671 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cn)32672   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
32673     GemmMicrokernelTester()
32674       .mr(3)
32675       .nr(4)
32676       .kr(1)
32677       .sr(1)
32678       .m(3)
32679       .n(4)
32680       .k(1)
32681       .cn_stride(7)
32682       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32683   }
32684 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile)32685   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
32686     for (uint32_t n = 1; n <= 4; n++) {
32687       for (uint32_t m = 1; m <= 3; m++) {
32688         GemmMicrokernelTester()
32689           .mr(3)
32690           .nr(4)
32691           .kr(1)
32692           .sr(1)
32693           .m(m)
32694           .n(n)
32695           .k(1)
32696           .iterations(1)
32697           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32698       }
32699     }
32700   }
32701 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_m)32702   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
32703     for (uint32_t m = 1; m <= 3; m++) {
32704       GemmMicrokernelTester()
32705         .mr(3)
32706         .nr(4)
32707         .kr(1)
32708         .sr(1)
32709         .m(m)
32710         .n(4)
32711         .k(1)
32712         .iterations(1)
32713         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32714     }
32715   }
32716 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_n)32717   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
32718     for (uint32_t n = 1; n <= 4; n++) {
32719       GemmMicrokernelTester()
32720         .mr(3)
32721         .nr(4)
32722         .kr(1)
32723         .sr(1)
32724         .m(3)
32725         .n(n)
32726         .k(1)
32727         .iterations(1)
32728         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32729     }
32730   }
32731 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1)32732   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
32733     for (size_t k = 2; k < 10; k++) {
32734       GemmMicrokernelTester()
32735         .mr(3)
32736         .nr(4)
32737         .kr(1)
32738         .sr(1)
32739         .m(3)
32740         .n(4)
32741         .k(k)
32742         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32743     }
32744   }
32745 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_subtile)32746   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
32747     for (size_t k = 2; k < 10; k++) {
32748       for (uint32_t n = 1; n <= 4; n++) {
32749         for (uint32_t m = 1; m <= 3; m++) {
32750           GemmMicrokernelTester()
32751             .mr(3)
32752             .nr(4)
32753             .kr(1)
32754             .sr(1)
32755             .m(m)
32756             .n(n)
32757             .k(k)
32758             .iterations(1)
32759             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32760         }
32761       }
32762     }
32763   }
32764 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4)32765   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
32766     for (uint32_t n = 5; n < 8; n++) {
32767       for (size_t k = 1; k <= 5; k += 2) {
32768         GemmMicrokernelTester()
32769           .mr(3)
32770           .nr(4)
32771           .kr(1)
32772           .sr(1)
32773           .m(3)
32774           .n(n)
32775           .k(k)
32776           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32777       }
32778     }
32779   }
32780 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_cn)32781   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
32782     for (uint32_t n = 5; n < 8; n++) {
32783       for (size_t k = 1; k <= 5; k += 2) {
32784         GemmMicrokernelTester()
32785           .mr(3)
32786           .nr(4)
32787           .kr(1)
32788           .sr(1)
32789           .m(3)
32790           .n(n)
32791           .k(k)
32792           .cn_stride(7)
32793           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32794       }
32795     }
32796   }
32797 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_subtile)32798   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
32799     for (uint32_t n = 5; n < 8; n++) {
32800       for (size_t k = 1; k <= 5; k += 2) {
32801         for (uint32_t m = 1; m <= 3; m++) {
32802           GemmMicrokernelTester()
32803             .mr(3)
32804             .nr(4)
32805             .kr(1)
32806             .sr(1)
32807             .m(m)
32808             .n(n)
32809             .k(k)
32810             .iterations(1)
32811             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32812         }
32813       }
32814     }
32815   }
32816 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4)32817   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
32818     for (uint32_t n = 8; n <= 12; n += 4) {
32819       for (size_t k = 1; k <= 5; k += 2) {
32820         GemmMicrokernelTester()
32821           .mr(3)
32822           .nr(4)
32823           .kr(1)
32824           .sr(1)
32825           .m(3)
32826           .n(n)
32827           .k(k)
32828           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32829       }
32830     }
32831   }
32832 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_cn)32833   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
32834     for (uint32_t n = 8; n <= 12; n += 4) {
32835       for (size_t k = 1; k <= 5; k += 2) {
32836         GemmMicrokernelTester()
32837           .mr(3)
32838           .nr(4)
32839           .kr(1)
32840           .sr(1)
32841           .m(3)
32842           .n(n)
32843           .k(k)
32844           .cn_stride(7)
32845           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32846       }
32847     }
32848   }
32849 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_subtile)32850   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
32851     for (uint32_t n = 8; n <= 12; n += 4) {
32852       for (size_t k = 1; k <= 5; k += 2) {
32853         for (uint32_t m = 1; m <= 3; m++) {
32854           GemmMicrokernelTester()
32855             .mr(3)
32856             .nr(4)
32857             .kr(1)
32858             .sr(1)
32859             .m(m)
32860             .n(n)
32861             .k(k)
32862             .iterations(1)
32863             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32864         }
32865       }
32866     }
32867   }
32868 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel)32869   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel) {
32870     for (size_t k = 1; k <= 5; k += 2) {
32871       GemmMicrokernelTester()
32872         .mr(3)
32873         .nr(4)
32874         .kr(1)
32875         .sr(1)
32876         .m(3)
32877         .n(4)
32878         .k(k)
32879         .ks(3)
32880         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32881     }
32882   }
32883 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel_subtile)32884   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel_subtile) {
32885     for (size_t k = 1; k <= 5; k += 2) {
32886       for (uint32_t n = 1; n <= 4; n++) {
32887         for (uint32_t m = 1; m <= 3; m++) {
32888           GemmMicrokernelTester()
32889             .mr(3)
32890             .nr(4)
32891             .kr(1)
32892             .sr(1)
32893             .m(m)
32894             .n(n)
32895             .k(k)
32896             .ks(3)
32897             .iterations(1)
32898             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32899         }
32900       }
32901     }
32902   }
32903 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_small_kernel)32904   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_small_kernel) {
32905     for (uint32_t n = 5; n < 8; n++) {
32906       for (size_t k = 1; k <= 5; k += 2) {
32907         GemmMicrokernelTester()
32908           .mr(3)
32909           .nr(4)
32910           .kr(1)
32911           .sr(1)
32912           .m(3)
32913           .n(n)
32914           .k(k)
32915           .ks(3)
32916           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32917       }
32918     }
32919   }
32920 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_small_kernel)32921   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_small_kernel) {
32922     for (uint32_t n = 8; n <= 12; n += 4) {
32923       for (size_t k = 1; k <= 5; k += 2) {
32924         GemmMicrokernelTester()
32925           .mr(3)
32926           .nr(4)
32927           .kr(1)
32928           .sr(1)
32929           .m(3)
32930           .n(n)
32931           .k(k)
32932           .ks(3)
32933           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32934       }
32935     }
32936   }
32937 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm_subtile)32938   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
32939     for (size_t k = 1; k <= 5; k += 2) {
32940       for (uint32_t n = 1; n <= 4; n++) {
32941         for (uint32_t m = 1; m <= 3; m++) {
32942           GemmMicrokernelTester()
32943             .mr(3)
32944             .nr(4)
32945             .kr(1)
32946             .sr(1)
32947             .m(m)
32948             .n(n)
32949             .k(k)
32950             .cm_stride(7)
32951             .iterations(1)
32952             .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32953         }
32954       }
32955     }
32956   }
32957 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,a_offset)32958   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, a_offset) {
32959     for (size_t k = 1; k <= 5; k += 2) {
32960       GemmMicrokernelTester()
32961         .mr(3)
32962         .nr(4)
32963         .kr(1)
32964         .sr(1)
32965         .m(3)
32966         .n(4)
32967         .k(k)
32968         .ks(3)
32969         .a_offset(17)
32970         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32971     }
32972   }
32973 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,zero)32974   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, zero) {
32975     for (size_t k = 1; k <= 5; k += 2) {
32976       for (uint32_t mz = 0; mz < 3; mz++) {
32977         GemmMicrokernelTester()
32978           .mr(3)
32979           .nr(4)
32980           .kr(1)
32981           .sr(1)
32982           .m(3)
32983           .n(4)
32984           .k(k)
32985           .ks(3)
32986           .a_offset(17)
32987           .zero_index(mz)
32988           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
32989       }
32990     }
32991   }
32992 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmin)32993   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
32994     GemmMicrokernelTester()
32995       .mr(3)
32996       .nr(4)
32997       .kr(1)
32998       .sr(1)
32999       .m(3)
33000       .n(4)
33001       .k(1)
33002       .qmin(128)
33003       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33004   }
33005 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmax)33006   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
33007     GemmMicrokernelTester()
33008       .mr(3)
33009       .nr(4)
33010       .kr(1)
33011       .sr(1)
33012       .m(3)
33013       .n(4)
33014       .k(1)
33015       .qmax(128)
33016       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33017   }
33018 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm)33019   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
33020     GemmMicrokernelTester()
33021       .mr(3)
33022       .nr(4)
33023       .kr(1)
33024       .sr(1)
33025       .m(3)
33026       .n(4)
33027       .k(1)
33028       .cm_stride(7)
33029       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33030   }
33031 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_a_zero_point)33032   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_a_zero_point) {
33033     for (size_t k = 1; k <= 5; k += 2) {
33034       GemmMicrokernelTester()
33035         .mr(3)
33036         .nr(4)
33037         .kr(1)
33038         .sr(1)
33039         .m(3)
33040         .n(4)
33041         .k(k)
33042         .a_zero_point(0)
33043         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33044     }
33045   }
33046 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_b_zero_point)33047   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_b_zero_point) {
33048     for (size_t k = 1; k <= 5; k += 2) {
33049       GemmMicrokernelTester()
33050         .mr(3)
33051         .nr(4)
33052         .kr(1)
33053         .sr(1)
33054         .m(3)
33055         .n(4)
33056         .k(k)
33057         .b_zero_point(0)
33058         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33059     }
33060   }
33061 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,no_zero_point)33062   TEST(QU8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, no_zero_point) {
33063     for (size_t k = 1; k <= 5; k += 2) {
33064       GemmMicrokernelTester()
33065         .mr(3)
33066         .nr(4)
33067         .kr(1)
33068         .sr(1)
33069         .m(3)
33070         .n(4)
33071         .k(k)
33072         .a_zero_point(0)
33073         .b_zero_point(0)
33074         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
33075     }
33076   }
33077 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33078 
33079 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)33080 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
33081   GemmMicrokernelTester()
33082     .mr(1)
33083     .nr(2)
33084     .kr(1)
33085     .sr(1)
33086     .m(1)
33087     .n(2)
33088     .k(1)
33089     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33090 }
33091 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)33092 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
33093   GemmMicrokernelTester()
33094     .mr(1)
33095     .nr(2)
33096     .kr(1)
33097     .sr(1)
33098     .m(1)
33099     .n(2)
33100     .k(1)
33101     .cn_stride(5)
33102     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33103 }
33104 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)33105 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
33106   for (uint32_t n = 1; n <= 2; n++) {
33107     for (uint32_t m = 1; m <= 1; m++) {
33108       GemmMicrokernelTester()
33109         .mr(1)
33110         .nr(2)
33111         .kr(1)
33112         .sr(1)
33113         .m(m)
33114         .n(n)
33115         .k(1)
33116         .iterations(1)
33117         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33118     }
33119   }
33120 }
33121 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)33122 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33123   for (uint32_t m = 1; m <= 1; m++) {
33124     GemmMicrokernelTester()
33125       .mr(1)
33126       .nr(2)
33127       .kr(1)
33128       .sr(1)
33129       .m(m)
33130       .n(2)
33131       .k(1)
33132       .iterations(1)
33133       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33134   }
33135 }
33136 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)33137 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33138   for (uint32_t n = 1; n <= 2; n++) {
33139     GemmMicrokernelTester()
33140       .mr(1)
33141       .nr(2)
33142       .kr(1)
33143       .sr(1)
33144       .m(1)
33145       .n(n)
33146       .k(1)
33147       .iterations(1)
33148       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33149   }
33150 }
33151 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)33152 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
33153   for (size_t k = 2; k < 10; k++) {
33154     GemmMicrokernelTester()
33155       .mr(1)
33156       .nr(2)
33157       .kr(1)
33158       .sr(1)
33159       .m(1)
33160       .n(2)
33161       .k(k)
33162       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33163   }
33164 }
33165 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)33166 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
33167   for (size_t k = 2; k < 10; k++) {
33168     for (uint32_t n = 1; n <= 2; n++) {
33169       for (uint32_t m = 1; m <= 1; m++) {
33170         GemmMicrokernelTester()
33171           .mr(1)
33172           .nr(2)
33173           .kr(1)
33174           .sr(1)
33175           .m(m)
33176           .n(n)
33177           .k(k)
33178           .iterations(1)
33179           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33180       }
33181     }
33182   }
33183 }
33184 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)33185 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
33186   for (uint32_t n = 3; n < 4; n++) {
33187     for (size_t k = 1; k <= 5; k += 2) {
33188       GemmMicrokernelTester()
33189         .mr(1)
33190         .nr(2)
33191         .kr(1)
33192         .sr(1)
33193         .m(1)
33194         .n(n)
33195         .k(k)
33196         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33197     }
33198   }
33199 }
33200 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)33201 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
33202   for (uint32_t n = 3; n < 4; n++) {
33203     for (size_t k = 1; k <= 5; k += 2) {
33204       GemmMicrokernelTester()
33205         .mr(1)
33206         .nr(2)
33207         .kr(1)
33208         .sr(1)
33209         .m(1)
33210         .n(n)
33211         .k(k)
33212         .cn_stride(5)
33213         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33214     }
33215   }
33216 }
33217 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)33218 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
33219   for (uint32_t n = 3; n < 4; n++) {
33220     for (size_t k = 1; k <= 5; k += 2) {
33221       for (uint32_t m = 1; m <= 1; m++) {
33222         GemmMicrokernelTester()
33223           .mr(1)
33224           .nr(2)
33225           .kr(1)
33226           .sr(1)
33227           .m(m)
33228           .n(n)
33229           .k(k)
33230           .iterations(1)
33231           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33232       }
33233     }
33234   }
33235 }
33236 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)33237 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
33238   for (uint32_t n = 4; n <= 6; n += 2) {
33239     for (size_t k = 1; k <= 5; k += 2) {
33240       GemmMicrokernelTester()
33241         .mr(1)
33242         .nr(2)
33243         .kr(1)
33244         .sr(1)
33245         .m(1)
33246         .n(n)
33247         .k(k)
33248         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33249     }
33250   }
33251 }
33252 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)33253 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
33254   for (uint32_t n = 4; n <= 6; n += 2) {
33255     for (size_t k = 1; k <= 5; k += 2) {
33256       GemmMicrokernelTester()
33257         .mr(1)
33258         .nr(2)
33259         .kr(1)
33260         .sr(1)
33261         .m(1)
33262         .n(n)
33263         .k(k)
33264         .cn_stride(5)
33265         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33266     }
33267   }
33268 }
33269 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)33270 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
33271   for (uint32_t n = 4; n <= 6; n += 2) {
33272     for (size_t k = 1; k <= 5; k += 2) {
33273       for (uint32_t m = 1; m <= 1; m++) {
33274         GemmMicrokernelTester()
33275           .mr(1)
33276           .nr(2)
33277           .kr(1)
33278           .sr(1)
33279           .m(m)
33280           .n(n)
33281           .k(k)
33282           .iterations(1)
33283           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33284       }
33285     }
33286   }
33287 }
33288 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel)33289 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
33290   for (size_t k = 1; k <= 5; k += 2) {
33291     GemmMicrokernelTester()
33292       .mr(1)
33293       .nr(2)
33294       .kr(1)
33295       .sr(1)
33296       .m(1)
33297       .n(2)
33298       .k(k)
33299       .ks(3)
33300       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33301   }
33302 }
33303 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel_subtile)33304 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
33305   for (size_t k = 1; k <= 5; k += 2) {
33306     for (uint32_t n = 1; n <= 2; n++) {
33307       for (uint32_t m = 1; m <= 1; m++) {
33308         GemmMicrokernelTester()
33309           .mr(1)
33310           .nr(2)
33311           .kr(1)
33312           .sr(1)
33313           .m(m)
33314           .n(n)
33315           .k(k)
33316           .ks(3)
33317           .iterations(1)
33318           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33319       }
33320     }
33321   }
33322 }
33323 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_small_kernel)33324 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
33325   for (uint32_t n = 3; n < 4; n++) {
33326     for (size_t k = 1; k <= 5; k += 2) {
33327       GemmMicrokernelTester()
33328         .mr(1)
33329         .nr(2)
33330         .kr(1)
33331         .sr(1)
33332         .m(1)
33333         .n(n)
33334         .k(k)
33335         .ks(3)
33336         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33337     }
33338   }
33339 }
33340 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_small_kernel)33341 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
33342   for (uint32_t n = 4; n <= 6; n += 2) {
33343     for (size_t k = 1; k <= 5; k += 2) {
33344       GemmMicrokernelTester()
33345         .mr(1)
33346         .nr(2)
33347         .kr(1)
33348         .sr(1)
33349         .m(1)
33350         .n(n)
33351         .k(k)
33352         .ks(3)
33353         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33354     }
33355   }
33356 }
33357 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)33358 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
33359   for (size_t k = 1; k <= 5; k += 2) {
33360     for (uint32_t n = 1; n <= 2; n++) {
33361       for (uint32_t m = 1; m <= 1; m++) {
33362         GemmMicrokernelTester()
33363           .mr(1)
33364           .nr(2)
33365           .kr(1)
33366           .sr(1)
33367           .m(m)
33368           .n(n)
33369           .k(k)
33370           .cm_stride(5)
33371           .iterations(1)
33372           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33373       }
33374     }
33375   }
33376 }
33377 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,a_offset)33378 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
33379   for (size_t k = 1; k <= 5; k += 2) {
33380     GemmMicrokernelTester()
33381       .mr(1)
33382       .nr(2)
33383       .kr(1)
33384       .sr(1)
33385       .m(1)
33386       .n(2)
33387       .k(k)
33388       .ks(3)
33389       .a_offset(7)
33390       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33391   }
33392 }
33393 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,zero)33394 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
33395   for (size_t k = 1; k <= 5; k += 2) {
33396     for (uint32_t mz = 0; mz < 1; mz++) {
33397       GemmMicrokernelTester()
33398         .mr(1)
33399         .nr(2)
33400         .kr(1)
33401         .sr(1)
33402         .m(1)
33403         .n(2)
33404         .k(k)
33405         .ks(3)
33406         .a_offset(7)
33407         .zero_index(mz)
33408         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33409     }
33410   }
33411 }
33412 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)33413 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
33414   GemmMicrokernelTester()
33415     .mr(1)
33416     .nr(2)
33417     .kr(1)
33418     .sr(1)
33419     .m(1)
33420     .n(2)
33421     .k(1)
33422     .qmin(128)
33423     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33424 }
33425 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)33426 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
33427   GemmMicrokernelTester()
33428     .mr(1)
33429     .nr(2)
33430     .kr(1)
33431     .sr(1)
33432     .m(1)
33433     .n(2)
33434     .k(1)
33435     .qmax(128)
33436     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33437 }
33438 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)33439 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
33440   GemmMicrokernelTester()
33441     .mr(1)
33442     .nr(2)
33443     .kr(1)
33444     .sr(1)
33445     .m(1)
33446     .n(2)
33447     .k(1)
33448     .cm_stride(5)
33449     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33450 }
33451 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_a_zero_point)33452 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_a_zero_point) {
33453   for (size_t k = 1; k <= 5; k += 2) {
33454     GemmMicrokernelTester()
33455       .mr(1)
33456       .nr(2)
33457       .kr(1)
33458       .sr(1)
33459       .m(1)
33460       .n(2)
33461       .k(k)
33462       .a_zero_point(0)
33463       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33464   }
33465 }
33466 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_b_zero_point)33467 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_b_zero_point) {
33468   for (size_t k = 1; k <= 5; k += 2) {
33469     GemmMicrokernelTester()
33470       .mr(1)
33471       .nr(2)
33472       .kr(1)
33473       .sr(1)
33474       .m(1)
33475       .n(2)
33476       .k(k)
33477       .b_zero_point(0)
33478       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33479   }
33480 }
33481 
TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,no_zero_point)33482 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_zero_point) {
33483   for (size_t k = 1; k <= 5; k += 2) {
33484     GemmMicrokernelTester()
33485       .mr(1)
33486       .nr(2)
33487       .kr(1)
33488       .sr(1)
33489       .m(1)
33490       .n(2)
33491       .k(k)
33492       .a_zero_point(0)
33493       .b_zero_point(0)
33494       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33495   }
33496 }
33497 
33498 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)33499 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
33500   GemmMicrokernelTester()
33501     .mr(1)
33502     .nr(4)
33503     .kr(1)
33504     .sr(1)
33505     .m(1)
33506     .n(4)
33507     .k(1)
33508     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33509 }
33510 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)33511 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
33512   GemmMicrokernelTester()
33513     .mr(1)
33514     .nr(4)
33515     .kr(1)
33516     .sr(1)
33517     .m(1)
33518     .n(4)
33519     .k(1)
33520     .cn_stride(7)
33521     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33522 }
33523 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)33524 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
33525   for (uint32_t n = 1; n <= 4; n++) {
33526     for (uint32_t m = 1; m <= 1; m++) {
33527       GemmMicrokernelTester()
33528         .mr(1)
33529         .nr(4)
33530         .kr(1)
33531         .sr(1)
33532         .m(m)
33533         .n(n)
33534         .k(1)
33535         .iterations(1)
33536         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33537     }
33538   }
33539 }
33540 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)33541 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33542   for (uint32_t m = 1; m <= 1; m++) {
33543     GemmMicrokernelTester()
33544       .mr(1)
33545       .nr(4)
33546       .kr(1)
33547       .sr(1)
33548       .m(m)
33549       .n(4)
33550       .k(1)
33551       .iterations(1)
33552       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33553   }
33554 }
33555 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)33556 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33557   for (uint32_t n = 1; n <= 4; n++) {
33558     GemmMicrokernelTester()
33559       .mr(1)
33560       .nr(4)
33561       .kr(1)
33562       .sr(1)
33563       .m(1)
33564       .n(n)
33565       .k(1)
33566       .iterations(1)
33567       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33568   }
33569 }
33570 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)33571 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
33572   for (size_t k = 2; k < 10; k++) {
33573     GemmMicrokernelTester()
33574       .mr(1)
33575       .nr(4)
33576       .kr(1)
33577       .sr(1)
33578       .m(1)
33579       .n(4)
33580       .k(k)
33581       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33582   }
33583 }
33584 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)33585 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
33586   for (size_t k = 2; k < 10; k++) {
33587     for (uint32_t n = 1; n <= 4; n++) {
33588       for (uint32_t m = 1; m <= 1; m++) {
33589         GemmMicrokernelTester()
33590           .mr(1)
33591           .nr(4)
33592           .kr(1)
33593           .sr(1)
33594           .m(m)
33595           .n(n)
33596           .k(k)
33597           .iterations(1)
33598           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33599       }
33600     }
33601   }
33602 }
33603 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)33604 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
33605   for (uint32_t n = 5; n < 8; n++) {
33606     for (size_t k = 1; k <= 5; k += 2) {
33607       GemmMicrokernelTester()
33608         .mr(1)
33609         .nr(4)
33610         .kr(1)
33611         .sr(1)
33612         .m(1)
33613         .n(n)
33614         .k(k)
33615         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33616     }
33617   }
33618 }
33619 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)33620 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
33621   for (uint32_t n = 5; n < 8; n++) {
33622     for (size_t k = 1; k <= 5; k += 2) {
33623       GemmMicrokernelTester()
33624         .mr(1)
33625         .nr(4)
33626         .kr(1)
33627         .sr(1)
33628         .m(1)
33629         .n(n)
33630         .k(k)
33631         .cn_stride(7)
33632         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33633     }
33634   }
33635 }
33636 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)33637 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
33638   for (uint32_t n = 5; n < 8; n++) {
33639     for (size_t k = 1; k <= 5; k += 2) {
33640       for (uint32_t m = 1; m <= 1; m++) {
33641         GemmMicrokernelTester()
33642           .mr(1)
33643           .nr(4)
33644           .kr(1)
33645           .sr(1)
33646           .m(m)
33647           .n(n)
33648           .k(k)
33649           .iterations(1)
33650           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33651       }
33652     }
33653   }
33654 }
33655 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)33656 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
33657   for (uint32_t n = 8; n <= 12; n += 4) {
33658     for (size_t k = 1; k <= 5; k += 2) {
33659       GemmMicrokernelTester()
33660         .mr(1)
33661         .nr(4)
33662         .kr(1)
33663         .sr(1)
33664         .m(1)
33665         .n(n)
33666         .k(k)
33667         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33668     }
33669   }
33670 }
33671 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)33672 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
33673   for (uint32_t n = 8; n <= 12; n += 4) {
33674     for (size_t k = 1; k <= 5; k += 2) {
33675       GemmMicrokernelTester()
33676         .mr(1)
33677         .nr(4)
33678         .kr(1)
33679         .sr(1)
33680         .m(1)
33681         .n(n)
33682         .k(k)
33683         .cn_stride(7)
33684         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33685     }
33686   }
33687 }
33688 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)33689 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
33690   for (uint32_t n = 8; n <= 12; n += 4) {
33691     for (size_t k = 1; k <= 5; k += 2) {
33692       for (uint32_t m = 1; m <= 1; m++) {
33693         GemmMicrokernelTester()
33694           .mr(1)
33695           .nr(4)
33696           .kr(1)
33697           .sr(1)
33698           .m(m)
33699           .n(n)
33700           .k(k)
33701           .iterations(1)
33702           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33703       }
33704     }
33705   }
33706 }
33707 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)33708 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
33709   for (size_t k = 1; k <= 5; k += 2) {
33710     GemmMicrokernelTester()
33711       .mr(1)
33712       .nr(4)
33713       .kr(1)
33714       .sr(1)
33715       .m(1)
33716       .n(4)
33717       .k(k)
33718       .ks(3)
33719       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33720   }
33721 }
33722 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)33723 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
33724   for (size_t k = 1; k <= 5; k += 2) {
33725     for (uint32_t n = 1; n <= 4; n++) {
33726       for (uint32_t m = 1; m <= 1; m++) {
33727         GemmMicrokernelTester()
33728           .mr(1)
33729           .nr(4)
33730           .kr(1)
33731           .sr(1)
33732           .m(m)
33733           .n(n)
33734           .k(k)
33735           .ks(3)
33736           .iterations(1)
33737           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33738       }
33739     }
33740   }
33741 }
33742 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)33743 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
33744   for (uint32_t n = 5; n < 8; n++) {
33745     for (size_t k = 1; k <= 5; k += 2) {
33746       GemmMicrokernelTester()
33747         .mr(1)
33748         .nr(4)
33749         .kr(1)
33750         .sr(1)
33751         .m(1)
33752         .n(n)
33753         .k(k)
33754         .ks(3)
33755         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33756     }
33757   }
33758 }
33759 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)33760 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
33761   for (uint32_t n = 8; n <= 12; n += 4) {
33762     for (size_t k = 1; k <= 5; k += 2) {
33763       GemmMicrokernelTester()
33764         .mr(1)
33765         .nr(4)
33766         .kr(1)
33767         .sr(1)
33768         .m(1)
33769         .n(n)
33770         .k(k)
33771         .ks(3)
33772         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33773     }
33774   }
33775 }
33776 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)33777 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
33778   for (size_t k = 1; k <= 5; k += 2) {
33779     for (uint32_t n = 1; n <= 4; n++) {
33780       for (uint32_t m = 1; m <= 1; m++) {
33781         GemmMicrokernelTester()
33782           .mr(1)
33783           .nr(4)
33784           .kr(1)
33785           .sr(1)
33786           .m(m)
33787           .n(n)
33788           .k(k)
33789           .cm_stride(7)
33790           .iterations(1)
33791           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33792       }
33793     }
33794   }
33795 }
33796 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)33797 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
33798   for (size_t k = 1; k <= 5; k += 2) {
33799     GemmMicrokernelTester()
33800       .mr(1)
33801       .nr(4)
33802       .kr(1)
33803       .sr(1)
33804       .m(1)
33805       .n(4)
33806       .k(k)
33807       .ks(3)
33808       .a_offset(7)
33809       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33810   }
33811 }
33812 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)33813 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
33814   for (size_t k = 1; k <= 5; k += 2) {
33815     for (uint32_t mz = 0; mz < 1; mz++) {
33816       GemmMicrokernelTester()
33817         .mr(1)
33818         .nr(4)
33819         .kr(1)
33820         .sr(1)
33821         .m(1)
33822         .n(4)
33823         .k(k)
33824         .ks(3)
33825         .a_offset(7)
33826         .zero_index(mz)
33827         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33828     }
33829   }
33830 }
33831 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)33832 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
33833   GemmMicrokernelTester()
33834     .mr(1)
33835     .nr(4)
33836     .kr(1)
33837     .sr(1)
33838     .m(1)
33839     .n(4)
33840     .k(1)
33841     .qmin(128)
33842     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33843 }
33844 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)33845 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
33846   GemmMicrokernelTester()
33847     .mr(1)
33848     .nr(4)
33849     .kr(1)
33850     .sr(1)
33851     .m(1)
33852     .n(4)
33853     .k(1)
33854     .qmax(128)
33855     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33856 }
33857 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)33858 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
33859   GemmMicrokernelTester()
33860     .mr(1)
33861     .nr(4)
33862     .kr(1)
33863     .sr(1)
33864     .m(1)
33865     .n(4)
33866     .k(1)
33867     .cm_stride(7)
33868     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33869 }
33870 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_a_zero_point)33871 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_a_zero_point) {
33872   for (size_t k = 1; k <= 5; k += 2) {
33873     GemmMicrokernelTester()
33874       .mr(1)
33875       .nr(4)
33876       .kr(1)
33877       .sr(1)
33878       .m(1)
33879       .n(4)
33880       .k(k)
33881       .a_zero_point(0)
33882       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33883   }
33884 }
33885 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_b_zero_point)33886 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_b_zero_point) {
33887   for (size_t k = 1; k <= 5; k += 2) {
33888     GemmMicrokernelTester()
33889       .mr(1)
33890       .nr(4)
33891       .kr(1)
33892       .sr(1)
33893       .m(1)
33894       .n(4)
33895       .k(k)
33896       .b_zero_point(0)
33897       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33898   }
33899 }
33900 
TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,no_zero_point)33901 TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_zero_point) {
33902   for (size_t k = 1; k <= 5; k += 2) {
33903     GemmMicrokernelTester()
33904       .mr(1)
33905       .nr(4)
33906       .kr(1)
33907       .sr(1)
33908       .m(1)
33909       .n(4)
33910       .k(k)
33911       .a_zero_point(0)
33912       .b_zero_point(0)
33913       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33914   }
33915 }
33916 
33917 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)33918 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
33919   GemmMicrokernelTester()
33920     .mr(2)
33921     .nr(2)
33922     .kr(1)
33923     .sr(1)
33924     .m(2)
33925     .n(2)
33926     .k(1)
33927     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33928 }
33929 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)33930 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
33931   GemmMicrokernelTester()
33932     .mr(2)
33933     .nr(2)
33934     .kr(1)
33935     .sr(1)
33936     .m(2)
33937     .n(2)
33938     .k(1)
33939     .cn_stride(5)
33940     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33941 }
33942 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)33943 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
33944   for (uint32_t n = 1; n <= 2; n++) {
33945     for (uint32_t m = 1; m <= 2; m++) {
33946       GemmMicrokernelTester()
33947         .mr(2)
33948         .nr(2)
33949         .kr(1)
33950         .sr(1)
33951         .m(m)
33952         .n(n)
33953         .k(1)
33954         .iterations(1)
33955         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33956     }
33957   }
33958 }
33959 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)33960 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33961   for (uint32_t m = 1; m <= 2; m++) {
33962     GemmMicrokernelTester()
33963       .mr(2)
33964       .nr(2)
33965       .kr(1)
33966       .sr(1)
33967       .m(m)
33968       .n(2)
33969       .k(1)
33970       .iterations(1)
33971       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33972   }
33973 }
33974 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)33975 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33976   for (uint32_t n = 1; n <= 2; n++) {
33977     GemmMicrokernelTester()
33978       .mr(2)
33979       .nr(2)
33980       .kr(1)
33981       .sr(1)
33982       .m(2)
33983       .n(n)
33984       .k(1)
33985       .iterations(1)
33986       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
33987   }
33988 }
33989 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)33990 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
33991   for (size_t k = 2; k < 10; k++) {
33992     GemmMicrokernelTester()
33993       .mr(2)
33994       .nr(2)
33995       .kr(1)
33996       .sr(1)
33997       .m(2)
33998       .n(2)
33999       .k(k)
34000       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34001   }
34002 }
34003 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)34004 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
34005   for (size_t k = 2; k < 10; k++) {
34006     for (uint32_t n = 1; n <= 2; n++) {
34007       for (uint32_t m = 1; m <= 2; m++) {
34008         GemmMicrokernelTester()
34009           .mr(2)
34010           .nr(2)
34011           .kr(1)
34012           .sr(1)
34013           .m(m)
34014           .n(n)
34015           .k(k)
34016           .iterations(1)
34017           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34018       }
34019     }
34020   }
34021 }
34022 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)34023 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
34024   for (uint32_t n = 3; n < 4; n++) {
34025     for (size_t k = 1; k <= 5; k += 2) {
34026       GemmMicrokernelTester()
34027         .mr(2)
34028         .nr(2)
34029         .kr(1)
34030         .sr(1)
34031         .m(2)
34032         .n(n)
34033         .k(k)
34034         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34035     }
34036   }
34037 }
34038 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)34039 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
34040   for (uint32_t n = 3; n < 4; n++) {
34041     for (size_t k = 1; k <= 5; k += 2) {
34042       GemmMicrokernelTester()
34043         .mr(2)
34044         .nr(2)
34045         .kr(1)
34046         .sr(1)
34047         .m(2)
34048         .n(n)
34049         .k(k)
34050         .cn_stride(5)
34051         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34052     }
34053   }
34054 }
34055 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)34056 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
34057   for (uint32_t n = 3; n < 4; n++) {
34058     for (size_t k = 1; k <= 5; k += 2) {
34059       for (uint32_t m = 1; m <= 2; m++) {
34060         GemmMicrokernelTester()
34061           .mr(2)
34062           .nr(2)
34063           .kr(1)
34064           .sr(1)
34065           .m(m)
34066           .n(n)
34067           .k(k)
34068           .iterations(1)
34069           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34070       }
34071     }
34072   }
34073 }
34074 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)34075 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
34076   for (uint32_t n = 4; n <= 6; n += 2) {
34077     for (size_t k = 1; k <= 5; k += 2) {
34078       GemmMicrokernelTester()
34079         .mr(2)
34080         .nr(2)
34081         .kr(1)
34082         .sr(1)
34083         .m(2)
34084         .n(n)
34085         .k(k)
34086         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34087     }
34088   }
34089 }
34090 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)34091 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
34092   for (uint32_t n = 4; n <= 6; n += 2) {
34093     for (size_t k = 1; k <= 5; k += 2) {
34094       GemmMicrokernelTester()
34095         .mr(2)
34096         .nr(2)
34097         .kr(1)
34098         .sr(1)
34099         .m(2)
34100         .n(n)
34101         .k(k)
34102         .cn_stride(5)
34103         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34104     }
34105   }
34106 }
34107 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)34108 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
34109   for (uint32_t n = 4; n <= 6; n += 2) {
34110     for (size_t k = 1; k <= 5; k += 2) {
34111       for (uint32_t m = 1; m <= 2; m++) {
34112         GemmMicrokernelTester()
34113           .mr(2)
34114           .nr(2)
34115           .kr(1)
34116           .sr(1)
34117           .m(m)
34118           .n(n)
34119           .k(k)
34120           .iterations(1)
34121           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34122       }
34123     }
34124   }
34125 }
34126 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel)34127 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
34128   for (size_t k = 1; k <= 5; k += 2) {
34129     GemmMicrokernelTester()
34130       .mr(2)
34131       .nr(2)
34132       .kr(1)
34133       .sr(1)
34134       .m(2)
34135       .n(2)
34136       .k(k)
34137       .ks(3)
34138       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34139   }
34140 }
34141 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel_subtile)34142 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
34143   for (size_t k = 1; k <= 5; k += 2) {
34144     for (uint32_t n = 1; n <= 2; n++) {
34145       for (uint32_t m = 1; m <= 2; m++) {
34146         GemmMicrokernelTester()
34147           .mr(2)
34148           .nr(2)
34149           .kr(1)
34150           .sr(1)
34151           .m(m)
34152           .n(n)
34153           .k(k)
34154           .ks(3)
34155           .iterations(1)
34156           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34157       }
34158     }
34159   }
34160 }
34161 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_small_kernel)34162 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
34163   for (uint32_t n = 3; n < 4; n++) {
34164     for (size_t k = 1; k <= 5; k += 2) {
34165       GemmMicrokernelTester()
34166         .mr(2)
34167         .nr(2)
34168         .kr(1)
34169         .sr(1)
34170         .m(2)
34171         .n(n)
34172         .k(k)
34173         .ks(3)
34174         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34175     }
34176   }
34177 }
34178 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_small_kernel)34179 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
34180   for (uint32_t n = 4; n <= 6; n += 2) {
34181     for (size_t k = 1; k <= 5; k += 2) {
34182       GemmMicrokernelTester()
34183         .mr(2)
34184         .nr(2)
34185         .kr(1)
34186         .sr(1)
34187         .m(2)
34188         .n(n)
34189         .k(k)
34190         .ks(3)
34191         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34192     }
34193   }
34194 }
34195 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)34196 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
34197   for (size_t k = 1; k <= 5; k += 2) {
34198     for (uint32_t n = 1; n <= 2; n++) {
34199       for (uint32_t m = 1; m <= 2; m++) {
34200         GemmMicrokernelTester()
34201           .mr(2)
34202           .nr(2)
34203           .kr(1)
34204           .sr(1)
34205           .m(m)
34206           .n(n)
34207           .k(k)
34208           .cm_stride(5)
34209           .iterations(1)
34210           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34211       }
34212     }
34213   }
34214 }
34215 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,a_offset)34216 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
34217   for (size_t k = 1; k <= 5; k += 2) {
34218     GemmMicrokernelTester()
34219       .mr(2)
34220       .nr(2)
34221       .kr(1)
34222       .sr(1)
34223       .m(2)
34224       .n(2)
34225       .k(k)
34226       .ks(3)
34227       .a_offset(13)
34228       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34229   }
34230 }
34231 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,zero)34232 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
34233   for (size_t k = 1; k <= 5; k += 2) {
34234     for (uint32_t mz = 0; mz < 2; mz++) {
34235       GemmMicrokernelTester()
34236         .mr(2)
34237         .nr(2)
34238         .kr(1)
34239         .sr(1)
34240         .m(2)
34241         .n(2)
34242         .k(k)
34243         .ks(3)
34244         .a_offset(13)
34245         .zero_index(mz)
34246         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34247     }
34248   }
34249 }
34250 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)34251 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
34252   GemmMicrokernelTester()
34253     .mr(2)
34254     .nr(2)
34255     .kr(1)
34256     .sr(1)
34257     .m(2)
34258     .n(2)
34259     .k(1)
34260     .qmin(128)
34261     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34262 }
34263 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)34264 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
34265   GemmMicrokernelTester()
34266     .mr(2)
34267     .nr(2)
34268     .kr(1)
34269     .sr(1)
34270     .m(2)
34271     .n(2)
34272     .k(1)
34273     .qmax(128)
34274     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34275 }
34276 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)34277 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
34278   GemmMicrokernelTester()
34279     .mr(2)
34280     .nr(2)
34281     .kr(1)
34282     .sr(1)
34283     .m(2)
34284     .n(2)
34285     .k(1)
34286     .cm_stride(5)
34287     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34288 }
34289 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_a_zero_point)34290 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_a_zero_point) {
34291   for (size_t k = 1; k <= 5; k += 2) {
34292     GemmMicrokernelTester()
34293       .mr(2)
34294       .nr(2)
34295       .kr(1)
34296       .sr(1)
34297       .m(2)
34298       .n(2)
34299       .k(k)
34300       .a_zero_point(0)
34301       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34302   }
34303 }
34304 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_b_zero_point)34305 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_b_zero_point) {
34306   for (size_t k = 1; k <= 5; k += 2) {
34307     GemmMicrokernelTester()
34308       .mr(2)
34309       .nr(2)
34310       .kr(1)
34311       .sr(1)
34312       .m(2)
34313       .n(2)
34314       .k(k)
34315       .b_zero_point(0)
34316       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34317   }
34318 }
34319 
TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,no_zero_point)34320 TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_zero_point) {
34321   for (size_t k = 1; k <= 5; k += 2) {
34322     GemmMicrokernelTester()
34323       .mr(2)
34324       .nr(2)
34325       .kr(1)
34326       .sr(1)
34327       .m(2)
34328       .n(2)
34329       .k(k)
34330       .a_zero_point(0)
34331       .b_zero_point(0)
34332       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34333   }
34334 }
34335 
34336 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)34337 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
34338   GemmMicrokernelTester()
34339     .mr(2)
34340     .nr(4)
34341     .kr(1)
34342     .sr(1)
34343     .m(2)
34344     .n(4)
34345     .k(1)
34346     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34347 }
34348 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)34349 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
34350   GemmMicrokernelTester()
34351     .mr(2)
34352     .nr(4)
34353     .kr(1)
34354     .sr(1)
34355     .m(2)
34356     .n(4)
34357     .k(1)
34358     .cn_stride(7)
34359     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34360 }
34361 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)34362 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
34363   for (uint32_t n = 1; n <= 4; n++) {
34364     for (uint32_t m = 1; m <= 2; m++) {
34365       GemmMicrokernelTester()
34366         .mr(2)
34367         .nr(4)
34368         .kr(1)
34369         .sr(1)
34370         .m(m)
34371         .n(n)
34372         .k(1)
34373         .iterations(1)
34374         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34375     }
34376   }
34377 }
34378 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)34379 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
34380   for (uint32_t m = 1; m <= 2; m++) {
34381     GemmMicrokernelTester()
34382       .mr(2)
34383       .nr(4)
34384       .kr(1)
34385       .sr(1)
34386       .m(m)
34387       .n(4)
34388       .k(1)
34389       .iterations(1)
34390       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34391   }
34392 }
34393 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)34394 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
34395   for (uint32_t n = 1; n <= 4; n++) {
34396     GemmMicrokernelTester()
34397       .mr(2)
34398       .nr(4)
34399       .kr(1)
34400       .sr(1)
34401       .m(2)
34402       .n(n)
34403       .k(1)
34404       .iterations(1)
34405       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34406   }
34407 }
34408 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)34409 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
34410   for (size_t k = 2; k < 10; k++) {
34411     GemmMicrokernelTester()
34412       .mr(2)
34413       .nr(4)
34414       .kr(1)
34415       .sr(1)
34416       .m(2)
34417       .n(4)
34418       .k(k)
34419       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34420   }
34421 }
34422 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)34423 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
34424   for (size_t k = 2; k < 10; k++) {
34425     for (uint32_t n = 1; n <= 4; n++) {
34426       for (uint32_t m = 1; m <= 2; m++) {
34427         GemmMicrokernelTester()
34428           .mr(2)
34429           .nr(4)
34430           .kr(1)
34431           .sr(1)
34432           .m(m)
34433           .n(n)
34434           .k(k)
34435           .iterations(1)
34436           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34437       }
34438     }
34439   }
34440 }
34441 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)34442 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
34443   for (uint32_t n = 5; n < 8; n++) {
34444     for (size_t k = 1; k <= 5; k += 2) {
34445       GemmMicrokernelTester()
34446         .mr(2)
34447         .nr(4)
34448         .kr(1)
34449         .sr(1)
34450         .m(2)
34451         .n(n)
34452         .k(k)
34453         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34454     }
34455   }
34456 }
34457 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)34458 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
34459   for (uint32_t n = 5; n < 8; n++) {
34460     for (size_t k = 1; k <= 5; k += 2) {
34461       GemmMicrokernelTester()
34462         .mr(2)
34463         .nr(4)
34464         .kr(1)
34465         .sr(1)
34466         .m(2)
34467         .n(n)
34468         .k(k)
34469         .cn_stride(7)
34470         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34471     }
34472   }
34473 }
34474 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)34475 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
34476   for (uint32_t n = 5; n < 8; n++) {
34477     for (size_t k = 1; k <= 5; k += 2) {
34478       for (uint32_t m = 1; m <= 2; m++) {
34479         GemmMicrokernelTester()
34480           .mr(2)
34481           .nr(4)
34482           .kr(1)
34483           .sr(1)
34484           .m(m)
34485           .n(n)
34486           .k(k)
34487           .iterations(1)
34488           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34489       }
34490     }
34491   }
34492 }
34493 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)34494 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
34495   for (uint32_t n = 8; n <= 12; n += 4) {
34496     for (size_t k = 1; k <= 5; k += 2) {
34497       GemmMicrokernelTester()
34498         .mr(2)
34499         .nr(4)
34500         .kr(1)
34501         .sr(1)
34502         .m(2)
34503         .n(n)
34504         .k(k)
34505         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34506     }
34507   }
34508 }
34509 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)34510 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
34511   for (uint32_t n = 8; n <= 12; n += 4) {
34512     for (size_t k = 1; k <= 5; k += 2) {
34513       GemmMicrokernelTester()
34514         .mr(2)
34515         .nr(4)
34516         .kr(1)
34517         .sr(1)
34518         .m(2)
34519         .n(n)
34520         .k(k)
34521         .cn_stride(7)
34522         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34523     }
34524   }
34525 }
34526 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)34527 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
34528   for (uint32_t n = 8; n <= 12; n += 4) {
34529     for (size_t k = 1; k <= 5; k += 2) {
34530       for (uint32_t m = 1; m <= 2; m++) {
34531         GemmMicrokernelTester()
34532           .mr(2)
34533           .nr(4)
34534           .kr(1)
34535           .sr(1)
34536           .m(m)
34537           .n(n)
34538           .k(k)
34539           .iterations(1)
34540           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34541       }
34542     }
34543   }
34544 }
34545 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)34546 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
34547   for (size_t k = 1; k <= 5; k += 2) {
34548     GemmMicrokernelTester()
34549       .mr(2)
34550       .nr(4)
34551       .kr(1)
34552       .sr(1)
34553       .m(2)
34554       .n(4)
34555       .k(k)
34556       .ks(3)
34557       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34558   }
34559 }
34560 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)34561 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
34562   for (size_t k = 1; k <= 5; k += 2) {
34563     for (uint32_t n = 1; n <= 4; n++) {
34564       for (uint32_t m = 1; m <= 2; m++) {
34565         GemmMicrokernelTester()
34566           .mr(2)
34567           .nr(4)
34568           .kr(1)
34569           .sr(1)
34570           .m(m)
34571           .n(n)
34572           .k(k)
34573           .ks(3)
34574           .iterations(1)
34575           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34576       }
34577     }
34578   }
34579 }
34580 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)34581 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
34582   for (uint32_t n = 5; n < 8; n++) {
34583     for (size_t k = 1; k <= 5; k += 2) {
34584       GemmMicrokernelTester()
34585         .mr(2)
34586         .nr(4)
34587         .kr(1)
34588         .sr(1)
34589         .m(2)
34590         .n(n)
34591         .k(k)
34592         .ks(3)
34593         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34594     }
34595   }
34596 }
34597 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)34598 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
34599   for (uint32_t n = 8; n <= 12; n += 4) {
34600     for (size_t k = 1; k <= 5; k += 2) {
34601       GemmMicrokernelTester()
34602         .mr(2)
34603         .nr(4)
34604         .kr(1)
34605         .sr(1)
34606         .m(2)
34607         .n(n)
34608         .k(k)
34609         .ks(3)
34610         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34611     }
34612   }
34613 }
34614 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)34615 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
34616   for (size_t k = 1; k <= 5; k += 2) {
34617     for (uint32_t n = 1; n <= 4; n++) {
34618       for (uint32_t m = 1; m <= 2; m++) {
34619         GemmMicrokernelTester()
34620           .mr(2)
34621           .nr(4)
34622           .kr(1)
34623           .sr(1)
34624           .m(m)
34625           .n(n)
34626           .k(k)
34627           .cm_stride(7)
34628           .iterations(1)
34629           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34630       }
34631     }
34632   }
34633 }
34634 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)34635 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
34636   for (size_t k = 1; k <= 5; k += 2) {
34637     GemmMicrokernelTester()
34638       .mr(2)
34639       .nr(4)
34640       .kr(1)
34641       .sr(1)
34642       .m(2)
34643       .n(4)
34644       .k(k)
34645       .ks(3)
34646       .a_offset(13)
34647       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34648   }
34649 }
34650 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)34651 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
34652   for (size_t k = 1; k <= 5; k += 2) {
34653     for (uint32_t mz = 0; mz < 2; mz++) {
34654       GemmMicrokernelTester()
34655         .mr(2)
34656         .nr(4)
34657         .kr(1)
34658         .sr(1)
34659         .m(2)
34660         .n(4)
34661         .k(k)
34662         .ks(3)
34663         .a_offset(13)
34664         .zero_index(mz)
34665         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34666     }
34667   }
34668 }
34669 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)34670 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
34671   GemmMicrokernelTester()
34672     .mr(2)
34673     .nr(4)
34674     .kr(1)
34675     .sr(1)
34676     .m(2)
34677     .n(4)
34678     .k(1)
34679     .qmin(128)
34680     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34681 }
34682 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)34683 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
34684   GemmMicrokernelTester()
34685     .mr(2)
34686     .nr(4)
34687     .kr(1)
34688     .sr(1)
34689     .m(2)
34690     .n(4)
34691     .k(1)
34692     .qmax(128)
34693     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34694 }
34695 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)34696 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
34697   GemmMicrokernelTester()
34698     .mr(2)
34699     .nr(4)
34700     .kr(1)
34701     .sr(1)
34702     .m(2)
34703     .n(4)
34704     .k(1)
34705     .cm_stride(7)
34706     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34707 }
34708 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_a_zero_point)34709 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_a_zero_point) {
34710   for (size_t k = 1; k <= 5; k += 2) {
34711     GemmMicrokernelTester()
34712       .mr(2)
34713       .nr(4)
34714       .kr(1)
34715       .sr(1)
34716       .m(2)
34717       .n(4)
34718       .k(k)
34719       .a_zero_point(0)
34720       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34721   }
34722 }
34723 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_b_zero_point)34724 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_b_zero_point) {
34725   for (size_t k = 1; k <= 5; k += 2) {
34726     GemmMicrokernelTester()
34727       .mr(2)
34728       .nr(4)
34729       .kr(1)
34730       .sr(1)
34731       .m(2)
34732       .n(4)
34733       .k(k)
34734       .b_zero_point(0)
34735       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34736   }
34737 }
34738 
TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,no_zero_point)34739 TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_zero_point) {
34740   for (size_t k = 1; k <= 5; k += 2) {
34741     GemmMicrokernelTester()
34742       .mr(2)
34743       .nr(4)
34744       .kr(1)
34745       .sr(1)
34746       .m(2)
34747       .n(4)
34748       .k(k)
34749       .a_zero_point(0)
34750       .b_zero_point(0)
34751       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
34752   }
34753 }
34754 
34755 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)34756 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
34757   GemmMicrokernelTester()
34758     .mr(3)
34759     .nr(2)
34760     .kr(1)
34761     .sr(1)
34762     .m(3)
34763     .n(2)
34764     .k(1)
34765     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34766 }
34767 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)34768 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
34769   GemmMicrokernelTester()
34770     .mr(3)
34771     .nr(2)
34772     .kr(1)
34773     .sr(1)
34774     .m(3)
34775     .n(2)
34776     .k(1)
34777     .cn_stride(5)
34778     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34779 }
34780 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)34781 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
34782   for (uint32_t n = 1; n <= 2; n++) {
34783     for (uint32_t m = 1; m <= 3; m++) {
34784       GemmMicrokernelTester()
34785         .mr(3)
34786         .nr(2)
34787         .kr(1)
34788         .sr(1)
34789         .m(m)
34790         .n(n)
34791         .k(1)
34792         .iterations(1)
34793         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34794     }
34795   }
34796 }
34797 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)34798 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
34799   for (uint32_t m = 1; m <= 3; m++) {
34800     GemmMicrokernelTester()
34801       .mr(3)
34802       .nr(2)
34803       .kr(1)
34804       .sr(1)
34805       .m(m)
34806       .n(2)
34807       .k(1)
34808       .iterations(1)
34809       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34810   }
34811 }
34812 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)34813 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
34814   for (uint32_t n = 1; n <= 2; n++) {
34815     GemmMicrokernelTester()
34816       .mr(3)
34817       .nr(2)
34818       .kr(1)
34819       .sr(1)
34820       .m(3)
34821       .n(n)
34822       .k(1)
34823       .iterations(1)
34824       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34825   }
34826 }
34827 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)34828 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
34829   for (size_t k = 2; k < 10; k++) {
34830     GemmMicrokernelTester()
34831       .mr(3)
34832       .nr(2)
34833       .kr(1)
34834       .sr(1)
34835       .m(3)
34836       .n(2)
34837       .k(k)
34838       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34839   }
34840 }
34841 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)34842 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
34843   for (size_t k = 2; k < 10; k++) {
34844     for (uint32_t n = 1; n <= 2; n++) {
34845       for (uint32_t m = 1; m <= 3; m++) {
34846         GemmMicrokernelTester()
34847           .mr(3)
34848           .nr(2)
34849           .kr(1)
34850           .sr(1)
34851           .m(m)
34852           .n(n)
34853           .k(k)
34854           .iterations(1)
34855           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34856       }
34857     }
34858   }
34859 }
34860 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)34861 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
34862   for (uint32_t n = 3; n < 4; n++) {
34863     for (size_t k = 1; k <= 5; k += 2) {
34864       GemmMicrokernelTester()
34865         .mr(3)
34866         .nr(2)
34867         .kr(1)
34868         .sr(1)
34869         .m(3)
34870         .n(n)
34871         .k(k)
34872         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34873     }
34874   }
34875 }
34876 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)34877 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
34878   for (uint32_t n = 3; n < 4; n++) {
34879     for (size_t k = 1; k <= 5; k += 2) {
34880       GemmMicrokernelTester()
34881         .mr(3)
34882         .nr(2)
34883         .kr(1)
34884         .sr(1)
34885         .m(3)
34886         .n(n)
34887         .k(k)
34888         .cn_stride(5)
34889         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34890     }
34891   }
34892 }
34893 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)34894 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
34895   for (uint32_t n = 3; n < 4; n++) {
34896     for (size_t k = 1; k <= 5; k += 2) {
34897       for (uint32_t m = 1; m <= 3; m++) {
34898         GemmMicrokernelTester()
34899           .mr(3)
34900           .nr(2)
34901           .kr(1)
34902           .sr(1)
34903           .m(m)
34904           .n(n)
34905           .k(k)
34906           .iterations(1)
34907           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34908       }
34909     }
34910   }
34911 }
34912 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)34913 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
34914   for (uint32_t n = 4; n <= 6; n += 2) {
34915     for (size_t k = 1; k <= 5; k += 2) {
34916       GemmMicrokernelTester()
34917         .mr(3)
34918         .nr(2)
34919         .kr(1)
34920         .sr(1)
34921         .m(3)
34922         .n(n)
34923         .k(k)
34924         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34925     }
34926   }
34927 }
34928 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)34929 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
34930   for (uint32_t n = 4; n <= 6; n += 2) {
34931     for (size_t k = 1; k <= 5; k += 2) {
34932       GemmMicrokernelTester()
34933         .mr(3)
34934         .nr(2)
34935         .kr(1)
34936         .sr(1)
34937         .m(3)
34938         .n(n)
34939         .k(k)
34940         .cn_stride(5)
34941         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34942     }
34943   }
34944 }
34945 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)34946 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
34947   for (uint32_t n = 4; n <= 6; n += 2) {
34948     for (size_t k = 1; k <= 5; k += 2) {
34949       for (uint32_t m = 1; m <= 3; m++) {
34950         GemmMicrokernelTester()
34951           .mr(3)
34952           .nr(2)
34953           .kr(1)
34954           .sr(1)
34955           .m(m)
34956           .n(n)
34957           .k(k)
34958           .iterations(1)
34959           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34960       }
34961     }
34962   }
34963 }
34964 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)34965 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
34966   for (size_t k = 1; k <= 5; k += 2) {
34967     GemmMicrokernelTester()
34968       .mr(3)
34969       .nr(2)
34970       .kr(1)
34971       .sr(1)
34972       .m(3)
34973       .n(2)
34974       .k(k)
34975       .ks(3)
34976       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34977   }
34978 }
34979 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)34980 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
34981   for (size_t k = 1; k <= 5; k += 2) {
34982     for (uint32_t n = 1; n <= 2; n++) {
34983       for (uint32_t m = 1; m <= 3; m++) {
34984         GemmMicrokernelTester()
34985           .mr(3)
34986           .nr(2)
34987           .kr(1)
34988           .sr(1)
34989           .m(m)
34990           .n(n)
34991           .k(k)
34992           .ks(3)
34993           .iterations(1)
34994           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
34995       }
34996     }
34997   }
34998 }
34999 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)35000 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
35001   for (uint32_t n = 3; n < 4; n++) {
35002     for (size_t k = 1; k <= 5; k += 2) {
35003       GemmMicrokernelTester()
35004         .mr(3)
35005         .nr(2)
35006         .kr(1)
35007         .sr(1)
35008         .m(3)
35009         .n(n)
35010         .k(k)
35011         .ks(3)
35012         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35013     }
35014   }
35015 }
35016 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)35017 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
35018   for (uint32_t n = 4; n <= 6; n += 2) {
35019     for (size_t k = 1; k <= 5; k += 2) {
35020       GemmMicrokernelTester()
35021         .mr(3)
35022         .nr(2)
35023         .kr(1)
35024         .sr(1)
35025         .m(3)
35026         .n(n)
35027         .k(k)
35028         .ks(3)
35029         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35030     }
35031   }
35032 }
35033 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)35034 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
35035   for (size_t k = 1; k <= 5; k += 2) {
35036     for (uint32_t n = 1; n <= 2; n++) {
35037       for (uint32_t m = 1; m <= 3; m++) {
35038         GemmMicrokernelTester()
35039           .mr(3)
35040           .nr(2)
35041           .kr(1)
35042           .sr(1)
35043           .m(m)
35044           .n(n)
35045           .k(k)
35046           .cm_stride(5)
35047           .iterations(1)
35048           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35049       }
35050     }
35051   }
35052 }
35053 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)35054 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
35055   for (size_t k = 1; k <= 5; k += 2) {
35056     GemmMicrokernelTester()
35057       .mr(3)
35058       .nr(2)
35059       .kr(1)
35060       .sr(1)
35061       .m(3)
35062       .n(2)
35063       .k(k)
35064       .ks(3)
35065       .a_offset(17)
35066       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35067   }
35068 }
35069 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)35070 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
35071   for (size_t k = 1; k <= 5; k += 2) {
35072     for (uint32_t mz = 0; mz < 3; mz++) {
35073       GemmMicrokernelTester()
35074         .mr(3)
35075         .nr(2)
35076         .kr(1)
35077         .sr(1)
35078         .m(3)
35079         .n(2)
35080         .k(k)
35081         .ks(3)
35082         .a_offset(17)
35083         .zero_index(mz)
35084         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35085     }
35086   }
35087 }
35088 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)35089 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
35090   GemmMicrokernelTester()
35091     .mr(3)
35092     .nr(2)
35093     .kr(1)
35094     .sr(1)
35095     .m(3)
35096     .n(2)
35097     .k(1)
35098     .qmin(128)
35099     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35100 }
35101 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)35102 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
35103   GemmMicrokernelTester()
35104     .mr(3)
35105     .nr(2)
35106     .kr(1)
35107     .sr(1)
35108     .m(3)
35109     .n(2)
35110     .k(1)
35111     .qmax(128)
35112     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35113 }
35114 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)35115 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
35116   GemmMicrokernelTester()
35117     .mr(3)
35118     .nr(2)
35119     .kr(1)
35120     .sr(1)
35121     .m(3)
35122     .n(2)
35123     .k(1)
35124     .cm_stride(5)
35125     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35126 }
35127 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_a_zero_point)35128 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_a_zero_point) {
35129   for (size_t k = 1; k <= 5; k += 2) {
35130     GemmMicrokernelTester()
35131       .mr(3)
35132       .nr(2)
35133       .kr(1)
35134       .sr(1)
35135       .m(3)
35136       .n(2)
35137       .k(k)
35138       .a_zero_point(0)
35139       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35140   }
35141 }
35142 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_b_zero_point)35143 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_b_zero_point) {
35144   for (size_t k = 1; k <= 5; k += 2) {
35145     GemmMicrokernelTester()
35146       .mr(3)
35147       .nr(2)
35148       .kr(1)
35149       .sr(1)
35150       .m(3)
35151       .n(2)
35152       .k(k)
35153       .b_zero_point(0)
35154       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35155   }
35156 }
35157 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,no_zero_point)35158 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, no_zero_point) {
35159   for (size_t k = 1; k <= 5; k += 2) {
35160     GemmMicrokernelTester()
35161       .mr(3)
35162       .nr(2)
35163       .kr(1)
35164       .sr(1)
35165       .m(3)
35166       .n(2)
35167       .k(k)
35168       .a_zero_point(0)
35169       .b_zero_point(0)
35170       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35171   }
35172 }
35173 
35174 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)35175 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
35176   GemmMicrokernelTester()
35177     .mr(3)
35178     .nr(2)
35179     .kr(1)
35180     .sr(1)
35181     .m(3)
35182     .n(2)
35183     .k(1)
35184     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35185 }
35186 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)35187 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
35188   GemmMicrokernelTester()
35189     .mr(3)
35190     .nr(2)
35191     .kr(1)
35192     .sr(1)
35193     .m(3)
35194     .n(2)
35195     .k(1)
35196     .cn_stride(5)
35197     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35198 }
35199 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)35200 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
35201   for (uint32_t n = 1; n <= 2; n++) {
35202     for (uint32_t m = 1; m <= 3; m++) {
35203       GemmMicrokernelTester()
35204         .mr(3)
35205         .nr(2)
35206         .kr(1)
35207         .sr(1)
35208         .m(m)
35209         .n(n)
35210         .k(1)
35211         .iterations(1)
35212         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35213     }
35214   }
35215 }
35216 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)35217 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
35218   for (uint32_t m = 1; m <= 3; m++) {
35219     GemmMicrokernelTester()
35220       .mr(3)
35221       .nr(2)
35222       .kr(1)
35223       .sr(1)
35224       .m(m)
35225       .n(2)
35226       .k(1)
35227       .iterations(1)
35228       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35229   }
35230 }
35231 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)35232 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
35233   for (uint32_t n = 1; n <= 2; n++) {
35234     GemmMicrokernelTester()
35235       .mr(3)
35236       .nr(2)
35237       .kr(1)
35238       .sr(1)
35239       .m(3)
35240       .n(n)
35241       .k(1)
35242       .iterations(1)
35243       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35244   }
35245 }
35246 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)35247 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
35248   for (size_t k = 2; k < 10; k++) {
35249     GemmMicrokernelTester()
35250       .mr(3)
35251       .nr(2)
35252       .kr(1)
35253       .sr(1)
35254       .m(3)
35255       .n(2)
35256       .k(k)
35257       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35258   }
35259 }
35260 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)35261 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
35262   for (size_t k = 2; k < 10; k++) {
35263     for (uint32_t n = 1; n <= 2; n++) {
35264       for (uint32_t m = 1; m <= 3; m++) {
35265         GemmMicrokernelTester()
35266           .mr(3)
35267           .nr(2)
35268           .kr(1)
35269           .sr(1)
35270           .m(m)
35271           .n(n)
35272           .k(k)
35273           .iterations(1)
35274           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35275       }
35276     }
35277   }
35278 }
35279 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)35280 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
35281   for (uint32_t n = 3; n < 4; n++) {
35282     for (size_t k = 1; k <= 5; k += 2) {
35283       GemmMicrokernelTester()
35284         .mr(3)
35285         .nr(2)
35286         .kr(1)
35287         .sr(1)
35288         .m(3)
35289         .n(n)
35290         .k(k)
35291         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35292     }
35293   }
35294 }
35295 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)35296 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
35297   for (uint32_t n = 3; n < 4; n++) {
35298     for (size_t k = 1; k <= 5; k += 2) {
35299       GemmMicrokernelTester()
35300         .mr(3)
35301         .nr(2)
35302         .kr(1)
35303         .sr(1)
35304         .m(3)
35305         .n(n)
35306         .k(k)
35307         .cn_stride(5)
35308         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35309     }
35310   }
35311 }
35312 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)35313 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
35314   for (uint32_t n = 3; n < 4; n++) {
35315     for (size_t k = 1; k <= 5; k += 2) {
35316       for (uint32_t m = 1; m <= 3; m++) {
35317         GemmMicrokernelTester()
35318           .mr(3)
35319           .nr(2)
35320           .kr(1)
35321           .sr(1)
35322           .m(m)
35323           .n(n)
35324           .k(k)
35325           .iterations(1)
35326           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35327       }
35328     }
35329   }
35330 }
35331 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)35332 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
35333   for (uint32_t n = 4; n <= 6; n += 2) {
35334     for (size_t k = 1; k <= 5; k += 2) {
35335       GemmMicrokernelTester()
35336         .mr(3)
35337         .nr(2)
35338         .kr(1)
35339         .sr(1)
35340         .m(3)
35341         .n(n)
35342         .k(k)
35343         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35344     }
35345   }
35346 }
35347 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)35348 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
35349   for (uint32_t n = 4; n <= 6; n += 2) {
35350     for (size_t k = 1; k <= 5; k += 2) {
35351       GemmMicrokernelTester()
35352         .mr(3)
35353         .nr(2)
35354         .kr(1)
35355         .sr(1)
35356         .m(3)
35357         .n(n)
35358         .k(k)
35359         .cn_stride(5)
35360         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35361     }
35362   }
35363 }
35364 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)35365 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
35366   for (uint32_t n = 4; n <= 6; n += 2) {
35367     for (size_t k = 1; k <= 5; k += 2) {
35368       for (uint32_t m = 1; m <= 3; m++) {
35369         GemmMicrokernelTester()
35370           .mr(3)
35371           .nr(2)
35372           .kr(1)
35373           .sr(1)
35374           .m(m)
35375           .n(n)
35376           .k(k)
35377           .iterations(1)
35378           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35379       }
35380     }
35381   }
35382 }
35383 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)35384 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
35385   for (size_t k = 1; k <= 5; k += 2) {
35386     GemmMicrokernelTester()
35387       .mr(3)
35388       .nr(2)
35389       .kr(1)
35390       .sr(1)
35391       .m(3)
35392       .n(2)
35393       .k(k)
35394       .ks(3)
35395       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35396   }
35397 }
35398 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)35399 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
35400   for (size_t k = 1; k <= 5; k += 2) {
35401     for (uint32_t n = 1; n <= 2; n++) {
35402       for (uint32_t m = 1; m <= 3; m++) {
35403         GemmMicrokernelTester()
35404           .mr(3)
35405           .nr(2)
35406           .kr(1)
35407           .sr(1)
35408           .m(m)
35409           .n(n)
35410           .k(k)
35411           .ks(3)
35412           .iterations(1)
35413           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35414       }
35415     }
35416   }
35417 }
35418 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)35419 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
35420   for (uint32_t n = 3; n < 4; n++) {
35421     for (size_t k = 1; k <= 5; k += 2) {
35422       GemmMicrokernelTester()
35423         .mr(3)
35424         .nr(2)
35425         .kr(1)
35426         .sr(1)
35427         .m(3)
35428         .n(n)
35429         .k(k)
35430         .ks(3)
35431         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35432     }
35433   }
35434 }
35435 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)35436 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
35437   for (uint32_t n = 4; n <= 6; n += 2) {
35438     for (size_t k = 1; k <= 5; k += 2) {
35439       GemmMicrokernelTester()
35440         .mr(3)
35441         .nr(2)
35442         .kr(1)
35443         .sr(1)
35444         .m(3)
35445         .n(n)
35446         .k(k)
35447         .ks(3)
35448         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35449     }
35450   }
35451 }
35452 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)35453 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
35454   for (size_t k = 1; k <= 5; k += 2) {
35455     for (uint32_t n = 1; n <= 2; n++) {
35456       for (uint32_t m = 1; m <= 3; m++) {
35457         GemmMicrokernelTester()
35458           .mr(3)
35459           .nr(2)
35460           .kr(1)
35461           .sr(1)
35462           .m(m)
35463           .n(n)
35464           .k(k)
35465           .cm_stride(5)
35466           .iterations(1)
35467           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35468       }
35469     }
35470   }
35471 }
35472 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)35473 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
35474   for (size_t k = 1; k <= 5; k += 2) {
35475     GemmMicrokernelTester()
35476       .mr(3)
35477       .nr(2)
35478       .kr(1)
35479       .sr(1)
35480       .m(3)
35481       .n(2)
35482       .k(k)
35483       .ks(3)
35484       .a_offset(17)
35485       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35486   }
35487 }
35488 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)35489 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
35490   for (size_t k = 1; k <= 5; k += 2) {
35491     for (uint32_t mz = 0; mz < 3; mz++) {
35492       GemmMicrokernelTester()
35493         .mr(3)
35494         .nr(2)
35495         .kr(1)
35496         .sr(1)
35497         .m(3)
35498         .n(2)
35499         .k(k)
35500         .ks(3)
35501         .a_offset(17)
35502         .zero_index(mz)
35503         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35504     }
35505   }
35506 }
35507 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)35508 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
35509   GemmMicrokernelTester()
35510     .mr(3)
35511     .nr(2)
35512     .kr(1)
35513     .sr(1)
35514     .m(3)
35515     .n(2)
35516     .k(1)
35517     .qmin(128)
35518     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35519 }
35520 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)35521 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
35522   GemmMicrokernelTester()
35523     .mr(3)
35524     .nr(2)
35525     .kr(1)
35526     .sr(1)
35527     .m(3)
35528     .n(2)
35529     .k(1)
35530     .qmax(128)
35531     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35532 }
35533 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)35534 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
35535   GemmMicrokernelTester()
35536     .mr(3)
35537     .nr(2)
35538     .kr(1)
35539     .sr(1)
35540     .m(3)
35541     .n(2)
35542     .k(1)
35543     .cm_stride(5)
35544     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35545 }
35546 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_a_zero_point)35547 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
35548   for (size_t k = 1; k <= 5; k += 2) {
35549     GemmMicrokernelTester()
35550       .mr(3)
35551       .nr(2)
35552       .kr(1)
35553       .sr(1)
35554       .m(3)
35555       .n(2)
35556       .k(k)
35557       .a_zero_point(0)
35558       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35559   }
35560 }
35561 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_b_zero_point)35562 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
35563   for (size_t k = 1; k <= 5; k += 2) {
35564     GemmMicrokernelTester()
35565       .mr(3)
35566       .nr(2)
35567       .kr(1)
35568       .sr(1)
35569       .m(3)
35570       .n(2)
35571       .k(k)
35572       .b_zero_point(0)
35573       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35574   }
35575 }
35576 
TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,no_zero_point)35577 TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
35578   for (size_t k = 1; k <= 5; k += 2) {
35579     GemmMicrokernelTester()
35580       .mr(3)
35581       .nr(2)
35582       .kr(1)
35583       .sr(1)
35584       .m(3)
35585       .n(2)
35586       .k(k)
35587       .a_zero_point(0)
35588       .b_zero_point(0)
35589       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
35590   }
35591 }
35592 
35593 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)35594 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
35595   GemmMicrokernelTester()
35596     .mr(3)
35597     .nr(4)
35598     .kr(1)
35599     .sr(1)
35600     .m(3)
35601     .n(4)
35602     .k(1)
35603     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35604 }
35605 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)35606 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
35607   GemmMicrokernelTester()
35608     .mr(3)
35609     .nr(4)
35610     .kr(1)
35611     .sr(1)
35612     .m(3)
35613     .n(4)
35614     .k(1)
35615     .cn_stride(7)
35616     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35617 }
35618 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)35619 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
35620   for (uint32_t n = 1; n <= 4; n++) {
35621     for (uint32_t m = 1; m <= 3; m++) {
35622       GemmMicrokernelTester()
35623         .mr(3)
35624         .nr(4)
35625         .kr(1)
35626         .sr(1)
35627         .m(m)
35628         .n(n)
35629         .k(1)
35630         .iterations(1)
35631         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35632     }
35633   }
35634 }
35635 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)35636 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
35637   for (uint32_t m = 1; m <= 3; m++) {
35638     GemmMicrokernelTester()
35639       .mr(3)
35640       .nr(4)
35641       .kr(1)
35642       .sr(1)
35643       .m(m)
35644       .n(4)
35645       .k(1)
35646       .iterations(1)
35647       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35648   }
35649 }
35650 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)35651 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
35652   for (uint32_t n = 1; n <= 4; n++) {
35653     GemmMicrokernelTester()
35654       .mr(3)
35655       .nr(4)
35656       .kr(1)
35657       .sr(1)
35658       .m(3)
35659       .n(n)
35660       .k(1)
35661       .iterations(1)
35662       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35663   }
35664 }
35665 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)35666 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
35667   for (size_t k = 2; k < 10; k++) {
35668     GemmMicrokernelTester()
35669       .mr(3)
35670       .nr(4)
35671       .kr(1)
35672       .sr(1)
35673       .m(3)
35674       .n(4)
35675       .k(k)
35676       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35677   }
35678 }
35679 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)35680 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
35681   for (size_t k = 2; k < 10; k++) {
35682     for (uint32_t n = 1; n <= 4; n++) {
35683       for (uint32_t m = 1; m <= 3; m++) {
35684         GemmMicrokernelTester()
35685           .mr(3)
35686           .nr(4)
35687           .kr(1)
35688           .sr(1)
35689           .m(m)
35690           .n(n)
35691           .k(k)
35692           .iterations(1)
35693           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35694       }
35695     }
35696   }
35697 }
35698 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)35699 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
35700   for (uint32_t n = 5; n < 8; n++) {
35701     for (size_t k = 1; k <= 5; k += 2) {
35702       GemmMicrokernelTester()
35703         .mr(3)
35704         .nr(4)
35705         .kr(1)
35706         .sr(1)
35707         .m(3)
35708         .n(n)
35709         .k(k)
35710         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35711     }
35712   }
35713 }
35714 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)35715 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
35716   for (uint32_t n = 5; n < 8; n++) {
35717     for (size_t k = 1; k <= 5; k += 2) {
35718       GemmMicrokernelTester()
35719         .mr(3)
35720         .nr(4)
35721         .kr(1)
35722         .sr(1)
35723         .m(3)
35724         .n(n)
35725         .k(k)
35726         .cn_stride(7)
35727         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35728     }
35729   }
35730 }
35731 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)35732 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
35733   for (uint32_t n = 5; n < 8; n++) {
35734     for (size_t k = 1; k <= 5; k += 2) {
35735       for (uint32_t m = 1; m <= 3; m++) {
35736         GemmMicrokernelTester()
35737           .mr(3)
35738           .nr(4)
35739           .kr(1)
35740           .sr(1)
35741           .m(m)
35742           .n(n)
35743           .k(k)
35744           .iterations(1)
35745           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35746       }
35747     }
35748   }
35749 }
35750 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)35751 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
35752   for (uint32_t n = 8; n <= 12; n += 4) {
35753     for (size_t k = 1; k <= 5; k += 2) {
35754       GemmMicrokernelTester()
35755         .mr(3)
35756         .nr(4)
35757         .kr(1)
35758         .sr(1)
35759         .m(3)
35760         .n(n)
35761         .k(k)
35762         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35763     }
35764   }
35765 }
35766 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)35767 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
35768   for (uint32_t n = 8; n <= 12; n += 4) {
35769     for (size_t k = 1; k <= 5; k += 2) {
35770       GemmMicrokernelTester()
35771         .mr(3)
35772         .nr(4)
35773         .kr(1)
35774         .sr(1)
35775         .m(3)
35776         .n(n)
35777         .k(k)
35778         .cn_stride(7)
35779         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35780     }
35781   }
35782 }
35783 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)35784 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
35785   for (uint32_t n = 8; n <= 12; n += 4) {
35786     for (size_t k = 1; k <= 5; k += 2) {
35787       for (uint32_t m = 1; m <= 3; m++) {
35788         GemmMicrokernelTester()
35789           .mr(3)
35790           .nr(4)
35791           .kr(1)
35792           .sr(1)
35793           .m(m)
35794           .n(n)
35795           .k(k)
35796           .iterations(1)
35797           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35798       }
35799     }
35800   }
35801 }
35802 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel)35803 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
35804   for (size_t k = 1; k <= 5; k += 2) {
35805     GemmMicrokernelTester()
35806       .mr(3)
35807       .nr(4)
35808       .kr(1)
35809       .sr(1)
35810       .m(3)
35811       .n(4)
35812       .k(k)
35813       .ks(3)
35814       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35815   }
35816 }
35817 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel_subtile)35818 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
35819   for (size_t k = 1; k <= 5; k += 2) {
35820     for (uint32_t n = 1; n <= 4; n++) {
35821       for (uint32_t m = 1; m <= 3; m++) {
35822         GemmMicrokernelTester()
35823           .mr(3)
35824           .nr(4)
35825           .kr(1)
35826           .sr(1)
35827           .m(m)
35828           .n(n)
35829           .k(k)
35830           .ks(3)
35831           .iterations(1)
35832           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35833       }
35834     }
35835   }
35836 }
35837 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_small_kernel)35838 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
35839   for (uint32_t n = 5; n < 8; n++) {
35840     for (size_t k = 1; k <= 5; k += 2) {
35841       GemmMicrokernelTester()
35842         .mr(3)
35843         .nr(4)
35844         .kr(1)
35845         .sr(1)
35846         .m(3)
35847         .n(n)
35848         .k(k)
35849         .ks(3)
35850         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35851     }
35852   }
35853 }
35854 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_small_kernel)35855 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
35856   for (uint32_t n = 8; n <= 12; n += 4) {
35857     for (size_t k = 1; k <= 5; k += 2) {
35858       GemmMicrokernelTester()
35859         .mr(3)
35860         .nr(4)
35861         .kr(1)
35862         .sr(1)
35863         .m(3)
35864         .n(n)
35865         .k(k)
35866         .ks(3)
35867         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35868     }
35869   }
35870 }
35871 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)35872 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
35873   for (size_t k = 1; k <= 5; k += 2) {
35874     for (uint32_t n = 1; n <= 4; n++) {
35875       for (uint32_t m = 1; m <= 3; m++) {
35876         GemmMicrokernelTester()
35877           .mr(3)
35878           .nr(4)
35879           .kr(1)
35880           .sr(1)
35881           .m(m)
35882           .n(n)
35883           .k(k)
35884           .cm_stride(7)
35885           .iterations(1)
35886           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35887       }
35888     }
35889   }
35890 }
35891 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,a_offset)35892 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
35893   for (size_t k = 1; k <= 5; k += 2) {
35894     GemmMicrokernelTester()
35895       .mr(3)
35896       .nr(4)
35897       .kr(1)
35898       .sr(1)
35899       .m(3)
35900       .n(4)
35901       .k(k)
35902       .ks(3)
35903       .a_offset(17)
35904       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35905   }
35906 }
35907 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,zero)35908 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
35909   for (size_t k = 1; k <= 5; k += 2) {
35910     for (uint32_t mz = 0; mz < 3; mz++) {
35911       GemmMicrokernelTester()
35912         .mr(3)
35913         .nr(4)
35914         .kr(1)
35915         .sr(1)
35916         .m(3)
35917         .n(4)
35918         .k(k)
35919         .ks(3)
35920         .a_offset(17)
35921         .zero_index(mz)
35922         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35923     }
35924   }
35925 }
35926 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)35927 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
35928   GemmMicrokernelTester()
35929     .mr(3)
35930     .nr(4)
35931     .kr(1)
35932     .sr(1)
35933     .m(3)
35934     .n(4)
35935     .k(1)
35936     .qmin(128)
35937     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35938 }
35939 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)35940 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
35941   GemmMicrokernelTester()
35942     .mr(3)
35943     .nr(4)
35944     .kr(1)
35945     .sr(1)
35946     .m(3)
35947     .n(4)
35948     .k(1)
35949     .qmax(128)
35950     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35951 }
35952 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)35953 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
35954   GemmMicrokernelTester()
35955     .mr(3)
35956     .nr(4)
35957     .kr(1)
35958     .sr(1)
35959     .m(3)
35960     .n(4)
35961     .k(1)
35962     .cm_stride(7)
35963     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35964 }
35965 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_a_zero_point)35966 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_a_zero_point) {
35967   for (size_t k = 1; k <= 5; k += 2) {
35968     GemmMicrokernelTester()
35969       .mr(3)
35970       .nr(4)
35971       .kr(1)
35972       .sr(1)
35973       .m(3)
35974       .n(4)
35975       .k(k)
35976       .a_zero_point(0)
35977       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35978   }
35979 }
35980 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_b_zero_point)35981 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_b_zero_point) {
35982   for (size_t k = 1; k <= 5; k += 2) {
35983     GemmMicrokernelTester()
35984       .mr(3)
35985       .nr(4)
35986       .kr(1)
35987       .sr(1)
35988       .m(3)
35989       .n(4)
35990       .k(k)
35991       .b_zero_point(0)
35992       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
35993   }
35994 }
35995 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,no_zero_point)35996 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, no_zero_point) {
35997   for (size_t k = 1; k <= 5; k += 2) {
35998     GemmMicrokernelTester()
35999       .mr(3)
36000       .nr(4)
36001       .kr(1)
36002       .sr(1)
36003       .m(3)
36004       .n(4)
36005       .k(k)
36006       .a_zero_point(0)
36007       .b_zero_point(0)
36008       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36009   }
36010 }
36011 
36012 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)36013 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
36014   GemmMicrokernelTester()
36015     .mr(3)
36016     .nr(4)
36017     .kr(1)
36018     .sr(1)
36019     .m(3)
36020     .n(4)
36021     .k(1)
36022     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36023 }
36024 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)36025 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
36026   GemmMicrokernelTester()
36027     .mr(3)
36028     .nr(4)
36029     .kr(1)
36030     .sr(1)
36031     .m(3)
36032     .n(4)
36033     .k(1)
36034     .cn_stride(7)
36035     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36036 }
36037 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)36038 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
36039   for (uint32_t n = 1; n <= 4; n++) {
36040     for (uint32_t m = 1; m <= 3; m++) {
36041       GemmMicrokernelTester()
36042         .mr(3)
36043         .nr(4)
36044         .kr(1)
36045         .sr(1)
36046         .m(m)
36047         .n(n)
36048         .k(1)
36049         .iterations(1)
36050         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36051     }
36052   }
36053 }
36054 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)36055 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
36056   for (uint32_t m = 1; m <= 3; m++) {
36057     GemmMicrokernelTester()
36058       .mr(3)
36059       .nr(4)
36060       .kr(1)
36061       .sr(1)
36062       .m(m)
36063       .n(4)
36064       .k(1)
36065       .iterations(1)
36066       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36067   }
36068 }
36069 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)36070 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
36071   for (uint32_t n = 1; n <= 4; n++) {
36072     GemmMicrokernelTester()
36073       .mr(3)
36074       .nr(4)
36075       .kr(1)
36076       .sr(1)
36077       .m(3)
36078       .n(n)
36079       .k(1)
36080       .iterations(1)
36081       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36082   }
36083 }
36084 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)36085 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
36086   for (size_t k = 2; k < 10; k++) {
36087     GemmMicrokernelTester()
36088       .mr(3)
36089       .nr(4)
36090       .kr(1)
36091       .sr(1)
36092       .m(3)
36093       .n(4)
36094       .k(k)
36095       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36096   }
36097 }
36098 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)36099 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
36100   for (size_t k = 2; k < 10; k++) {
36101     for (uint32_t n = 1; n <= 4; n++) {
36102       for (uint32_t m = 1; m <= 3; m++) {
36103         GemmMicrokernelTester()
36104           .mr(3)
36105           .nr(4)
36106           .kr(1)
36107           .sr(1)
36108           .m(m)
36109           .n(n)
36110           .k(k)
36111           .iterations(1)
36112           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36113       }
36114     }
36115   }
36116 }
36117 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)36118 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
36119   for (uint32_t n = 5; n < 8; n++) {
36120     for (size_t k = 1; k <= 5; k += 2) {
36121       GemmMicrokernelTester()
36122         .mr(3)
36123         .nr(4)
36124         .kr(1)
36125         .sr(1)
36126         .m(3)
36127         .n(n)
36128         .k(k)
36129         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36130     }
36131   }
36132 }
36133 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)36134 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
36135   for (uint32_t n = 5; n < 8; n++) {
36136     for (size_t k = 1; k <= 5; k += 2) {
36137       GemmMicrokernelTester()
36138         .mr(3)
36139         .nr(4)
36140         .kr(1)
36141         .sr(1)
36142         .m(3)
36143         .n(n)
36144         .k(k)
36145         .cn_stride(7)
36146         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36147     }
36148   }
36149 }
36150 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)36151 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
36152   for (uint32_t n = 5; n < 8; n++) {
36153     for (size_t k = 1; k <= 5; k += 2) {
36154       for (uint32_t m = 1; m <= 3; m++) {
36155         GemmMicrokernelTester()
36156           .mr(3)
36157           .nr(4)
36158           .kr(1)
36159           .sr(1)
36160           .m(m)
36161           .n(n)
36162           .k(k)
36163           .iterations(1)
36164           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36165       }
36166     }
36167   }
36168 }
36169 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)36170 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
36171   for (uint32_t n = 8; n <= 12; n += 4) {
36172     for (size_t k = 1; k <= 5; k += 2) {
36173       GemmMicrokernelTester()
36174         .mr(3)
36175         .nr(4)
36176         .kr(1)
36177         .sr(1)
36178         .m(3)
36179         .n(n)
36180         .k(k)
36181         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36182     }
36183   }
36184 }
36185 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)36186 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
36187   for (uint32_t n = 8; n <= 12; n += 4) {
36188     for (size_t k = 1; k <= 5; k += 2) {
36189       GemmMicrokernelTester()
36190         .mr(3)
36191         .nr(4)
36192         .kr(1)
36193         .sr(1)
36194         .m(3)
36195         .n(n)
36196         .k(k)
36197         .cn_stride(7)
36198         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36199     }
36200   }
36201 }
36202 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)36203 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
36204   for (uint32_t n = 8; n <= 12; n += 4) {
36205     for (size_t k = 1; k <= 5; k += 2) {
36206       for (uint32_t m = 1; m <= 3; m++) {
36207         GemmMicrokernelTester()
36208           .mr(3)
36209           .nr(4)
36210           .kr(1)
36211           .sr(1)
36212           .m(m)
36213           .n(n)
36214           .k(k)
36215           .iterations(1)
36216           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36217       }
36218     }
36219   }
36220 }
36221 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel)36222 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
36223   for (size_t k = 1; k <= 5; k += 2) {
36224     GemmMicrokernelTester()
36225       .mr(3)
36226       .nr(4)
36227       .kr(1)
36228       .sr(1)
36229       .m(3)
36230       .n(4)
36231       .k(k)
36232       .ks(3)
36233       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36234   }
36235 }
36236 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel_subtile)36237 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
36238   for (size_t k = 1; k <= 5; k += 2) {
36239     for (uint32_t n = 1; n <= 4; n++) {
36240       for (uint32_t m = 1; m <= 3; m++) {
36241         GemmMicrokernelTester()
36242           .mr(3)
36243           .nr(4)
36244           .kr(1)
36245           .sr(1)
36246           .m(m)
36247           .n(n)
36248           .k(k)
36249           .ks(3)
36250           .iterations(1)
36251           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36252       }
36253     }
36254   }
36255 }
36256 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_small_kernel)36257 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
36258   for (uint32_t n = 5; n < 8; n++) {
36259     for (size_t k = 1; k <= 5; k += 2) {
36260       GemmMicrokernelTester()
36261         .mr(3)
36262         .nr(4)
36263         .kr(1)
36264         .sr(1)
36265         .m(3)
36266         .n(n)
36267         .k(k)
36268         .ks(3)
36269         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36270     }
36271   }
36272 }
36273 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_small_kernel)36274 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
36275   for (uint32_t n = 8; n <= 12; n += 4) {
36276     for (size_t k = 1; k <= 5; k += 2) {
36277       GemmMicrokernelTester()
36278         .mr(3)
36279         .nr(4)
36280         .kr(1)
36281         .sr(1)
36282         .m(3)
36283         .n(n)
36284         .k(k)
36285         .ks(3)
36286         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36287     }
36288   }
36289 }
36290 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)36291 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
36292   for (size_t k = 1; k <= 5; k += 2) {
36293     for (uint32_t n = 1; n <= 4; n++) {
36294       for (uint32_t m = 1; m <= 3; m++) {
36295         GemmMicrokernelTester()
36296           .mr(3)
36297           .nr(4)
36298           .kr(1)
36299           .sr(1)
36300           .m(m)
36301           .n(n)
36302           .k(k)
36303           .cm_stride(7)
36304           .iterations(1)
36305           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36306       }
36307     }
36308   }
36309 }
36310 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,a_offset)36311 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
36312   for (size_t k = 1; k <= 5; k += 2) {
36313     GemmMicrokernelTester()
36314       .mr(3)
36315       .nr(4)
36316       .kr(1)
36317       .sr(1)
36318       .m(3)
36319       .n(4)
36320       .k(k)
36321       .ks(3)
36322       .a_offset(17)
36323       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36324   }
36325 }
36326 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,zero)36327 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
36328   for (size_t k = 1; k <= 5; k += 2) {
36329     for (uint32_t mz = 0; mz < 3; mz++) {
36330       GemmMicrokernelTester()
36331         .mr(3)
36332         .nr(4)
36333         .kr(1)
36334         .sr(1)
36335         .m(3)
36336         .n(4)
36337         .k(k)
36338         .ks(3)
36339         .a_offset(17)
36340         .zero_index(mz)
36341         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36342     }
36343   }
36344 }
36345 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)36346 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
36347   GemmMicrokernelTester()
36348     .mr(3)
36349     .nr(4)
36350     .kr(1)
36351     .sr(1)
36352     .m(3)
36353     .n(4)
36354     .k(1)
36355     .qmin(128)
36356     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36357 }
36358 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)36359 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
36360   GemmMicrokernelTester()
36361     .mr(3)
36362     .nr(4)
36363     .kr(1)
36364     .sr(1)
36365     .m(3)
36366     .n(4)
36367     .k(1)
36368     .qmax(128)
36369     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36370 }
36371 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)36372 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
36373   GemmMicrokernelTester()
36374     .mr(3)
36375     .nr(4)
36376     .kr(1)
36377     .sr(1)
36378     .m(3)
36379     .n(4)
36380     .k(1)
36381     .cm_stride(7)
36382     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36383 }
36384 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_a_zero_point)36385 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
36386   for (size_t k = 1; k <= 5; k += 2) {
36387     GemmMicrokernelTester()
36388       .mr(3)
36389       .nr(4)
36390       .kr(1)
36391       .sr(1)
36392       .m(3)
36393       .n(4)
36394       .k(k)
36395       .a_zero_point(0)
36396       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36397   }
36398 }
36399 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_b_zero_point)36400 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
36401   for (size_t k = 1; k <= 5; k += 2) {
36402     GemmMicrokernelTester()
36403       .mr(3)
36404       .nr(4)
36405       .kr(1)
36406       .sr(1)
36407       .m(3)
36408       .n(4)
36409       .k(k)
36410       .b_zero_point(0)
36411       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36412   }
36413 }
36414 
TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,no_zero_point)36415 TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
36416   for (size_t k = 1; k <= 5; k += 2) {
36417     GemmMicrokernelTester()
36418       .mr(3)
36419       .nr(4)
36420       .kr(1)
36421       .sr(1)
36422       .m(3)
36423       .n(4)
36424       .k(k)
36425       .a_zero_point(0)
36426       .b_zero_point(0)
36427       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36428   }
36429 }
36430 
36431 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)36432 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
36433   GemmMicrokernelTester()
36434     .mr(4)
36435     .nr(2)
36436     .kr(1)
36437     .sr(1)
36438     .m(4)
36439     .n(2)
36440     .k(1)
36441     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36442 }
36443 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)36444 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
36445   GemmMicrokernelTester()
36446     .mr(4)
36447     .nr(2)
36448     .kr(1)
36449     .sr(1)
36450     .m(4)
36451     .n(2)
36452     .k(1)
36453     .cn_stride(5)
36454     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36455 }
36456 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)36457 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
36458   for (uint32_t n = 1; n <= 2; n++) {
36459     for (uint32_t m = 1; m <= 4; m++) {
36460       GemmMicrokernelTester()
36461         .mr(4)
36462         .nr(2)
36463         .kr(1)
36464         .sr(1)
36465         .m(m)
36466         .n(n)
36467         .k(1)
36468         .iterations(1)
36469         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36470     }
36471   }
36472 }
36473 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)36474 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
36475   for (uint32_t m = 1; m <= 4; m++) {
36476     GemmMicrokernelTester()
36477       .mr(4)
36478       .nr(2)
36479       .kr(1)
36480       .sr(1)
36481       .m(m)
36482       .n(2)
36483       .k(1)
36484       .iterations(1)
36485       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36486   }
36487 }
36488 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)36489 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36490   for (uint32_t n = 1; n <= 2; n++) {
36491     GemmMicrokernelTester()
36492       .mr(4)
36493       .nr(2)
36494       .kr(1)
36495       .sr(1)
36496       .m(4)
36497       .n(n)
36498       .k(1)
36499       .iterations(1)
36500       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36501   }
36502 }
36503 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)36504 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
36505   for (size_t k = 2; k < 10; k++) {
36506     GemmMicrokernelTester()
36507       .mr(4)
36508       .nr(2)
36509       .kr(1)
36510       .sr(1)
36511       .m(4)
36512       .n(2)
36513       .k(k)
36514       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36515   }
36516 }
36517 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)36518 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
36519   for (size_t k = 2; k < 10; k++) {
36520     for (uint32_t n = 1; n <= 2; n++) {
36521       for (uint32_t m = 1; m <= 4; m++) {
36522         GemmMicrokernelTester()
36523           .mr(4)
36524           .nr(2)
36525           .kr(1)
36526           .sr(1)
36527           .m(m)
36528           .n(n)
36529           .k(k)
36530           .iterations(1)
36531           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36532       }
36533     }
36534   }
36535 }
36536 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)36537 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
36538   for (uint32_t n = 3; n < 4; n++) {
36539     for (size_t k = 1; k <= 5; k += 2) {
36540       GemmMicrokernelTester()
36541         .mr(4)
36542         .nr(2)
36543         .kr(1)
36544         .sr(1)
36545         .m(4)
36546         .n(n)
36547         .k(k)
36548         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36549     }
36550   }
36551 }
36552 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)36553 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
36554   for (uint32_t n = 3; n < 4; n++) {
36555     for (size_t k = 1; k <= 5; k += 2) {
36556       GemmMicrokernelTester()
36557         .mr(4)
36558         .nr(2)
36559         .kr(1)
36560         .sr(1)
36561         .m(4)
36562         .n(n)
36563         .k(k)
36564         .cn_stride(5)
36565         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36566     }
36567   }
36568 }
36569 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)36570 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
36571   for (uint32_t n = 3; n < 4; n++) {
36572     for (size_t k = 1; k <= 5; k += 2) {
36573       for (uint32_t m = 1; m <= 4; m++) {
36574         GemmMicrokernelTester()
36575           .mr(4)
36576           .nr(2)
36577           .kr(1)
36578           .sr(1)
36579           .m(m)
36580           .n(n)
36581           .k(k)
36582           .iterations(1)
36583           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36584       }
36585     }
36586   }
36587 }
36588 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)36589 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
36590   for (uint32_t n = 4; n <= 6; n += 2) {
36591     for (size_t k = 1; k <= 5; k += 2) {
36592       GemmMicrokernelTester()
36593         .mr(4)
36594         .nr(2)
36595         .kr(1)
36596         .sr(1)
36597         .m(4)
36598         .n(n)
36599         .k(k)
36600         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36601     }
36602   }
36603 }
36604 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)36605 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
36606   for (uint32_t n = 4; n <= 6; n += 2) {
36607     for (size_t k = 1; k <= 5; k += 2) {
36608       GemmMicrokernelTester()
36609         .mr(4)
36610         .nr(2)
36611         .kr(1)
36612         .sr(1)
36613         .m(4)
36614         .n(n)
36615         .k(k)
36616         .cn_stride(5)
36617         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36618     }
36619   }
36620 }
36621 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)36622 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
36623   for (uint32_t n = 4; n <= 6; n += 2) {
36624     for (size_t k = 1; k <= 5; k += 2) {
36625       for (uint32_t m = 1; m <= 4; m++) {
36626         GemmMicrokernelTester()
36627           .mr(4)
36628           .nr(2)
36629           .kr(1)
36630           .sr(1)
36631           .m(m)
36632           .n(n)
36633           .k(k)
36634           .iterations(1)
36635           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36636       }
36637     }
36638   }
36639 }
36640 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel)36641 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
36642   for (size_t k = 1; k <= 5; k += 2) {
36643     GemmMicrokernelTester()
36644       .mr(4)
36645       .nr(2)
36646       .kr(1)
36647       .sr(1)
36648       .m(4)
36649       .n(2)
36650       .k(k)
36651       .ks(3)
36652       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36653   }
36654 }
36655 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel_subtile)36656 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
36657   for (size_t k = 1; k <= 5; k += 2) {
36658     for (uint32_t n = 1; n <= 2; n++) {
36659       for (uint32_t m = 1; m <= 4; m++) {
36660         GemmMicrokernelTester()
36661           .mr(4)
36662           .nr(2)
36663           .kr(1)
36664           .sr(1)
36665           .m(m)
36666           .n(n)
36667           .k(k)
36668           .ks(3)
36669           .iterations(1)
36670           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36671       }
36672     }
36673   }
36674 }
36675 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_small_kernel)36676 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
36677   for (uint32_t n = 3; n < 4; n++) {
36678     for (size_t k = 1; k <= 5; k += 2) {
36679       GemmMicrokernelTester()
36680         .mr(4)
36681         .nr(2)
36682         .kr(1)
36683         .sr(1)
36684         .m(4)
36685         .n(n)
36686         .k(k)
36687         .ks(3)
36688         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36689     }
36690   }
36691 }
36692 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_small_kernel)36693 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
36694   for (uint32_t n = 4; n <= 6; n += 2) {
36695     for (size_t k = 1; k <= 5; k += 2) {
36696       GemmMicrokernelTester()
36697         .mr(4)
36698         .nr(2)
36699         .kr(1)
36700         .sr(1)
36701         .m(4)
36702         .n(n)
36703         .k(k)
36704         .ks(3)
36705         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36706     }
36707   }
36708 }
36709 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)36710 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
36711   for (size_t k = 1; k <= 5; k += 2) {
36712     for (uint32_t n = 1; n <= 2; n++) {
36713       for (uint32_t m = 1; m <= 4; m++) {
36714         GemmMicrokernelTester()
36715           .mr(4)
36716           .nr(2)
36717           .kr(1)
36718           .sr(1)
36719           .m(m)
36720           .n(n)
36721           .k(k)
36722           .cm_stride(5)
36723           .iterations(1)
36724           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36725       }
36726     }
36727   }
36728 }
36729 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,a_offset)36730 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
36731   for (size_t k = 1; k <= 5; k += 2) {
36732     GemmMicrokernelTester()
36733       .mr(4)
36734       .nr(2)
36735       .kr(1)
36736       .sr(1)
36737       .m(4)
36738       .n(2)
36739       .k(k)
36740       .ks(3)
36741       .a_offset(23)
36742       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36743   }
36744 }
36745 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,zero)36746 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
36747   for (size_t k = 1; k <= 5; k += 2) {
36748     for (uint32_t mz = 0; mz < 4; mz++) {
36749       GemmMicrokernelTester()
36750         .mr(4)
36751         .nr(2)
36752         .kr(1)
36753         .sr(1)
36754         .m(4)
36755         .n(2)
36756         .k(k)
36757         .ks(3)
36758         .a_offset(23)
36759         .zero_index(mz)
36760         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36761     }
36762   }
36763 }
36764 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)36765 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
36766   GemmMicrokernelTester()
36767     .mr(4)
36768     .nr(2)
36769     .kr(1)
36770     .sr(1)
36771     .m(4)
36772     .n(2)
36773     .k(1)
36774     .qmin(128)
36775     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36776 }
36777 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)36778 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
36779   GemmMicrokernelTester()
36780     .mr(4)
36781     .nr(2)
36782     .kr(1)
36783     .sr(1)
36784     .m(4)
36785     .n(2)
36786     .k(1)
36787     .qmax(128)
36788     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36789 }
36790 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)36791 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
36792   GemmMicrokernelTester()
36793     .mr(4)
36794     .nr(2)
36795     .kr(1)
36796     .sr(1)
36797     .m(4)
36798     .n(2)
36799     .k(1)
36800     .cm_stride(5)
36801     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36802 }
36803 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_a_zero_point)36804 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_a_zero_point) {
36805   for (size_t k = 1; k <= 5; k += 2) {
36806     GemmMicrokernelTester()
36807       .mr(4)
36808       .nr(2)
36809       .kr(1)
36810       .sr(1)
36811       .m(4)
36812       .n(2)
36813       .k(k)
36814       .a_zero_point(0)
36815       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36816   }
36817 }
36818 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_b_zero_point)36819 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_b_zero_point) {
36820   for (size_t k = 1; k <= 5; k += 2) {
36821     GemmMicrokernelTester()
36822       .mr(4)
36823       .nr(2)
36824       .kr(1)
36825       .sr(1)
36826       .m(4)
36827       .n(2)
36828       .k(k)
36829       .b_zero_point(0)
36830       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36831   }
36832 }
36833 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,no_zero_point)36834 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, no_zero_point) {
36835   for (size_t k = 1; k <= 5; k += 2) {
36836     GemmMicrokernelTester()
36837       .mr(4)
36838       .nr(2)
36839       .kr(1)
36840       .sr(1)
36841       .m(4)
36842       .n(2)
36843       .k(k)
36844       .a_zero_point(0)
36845       .b_zero_point(0)
36846       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
36847   }
36848 }
36849 
36850 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)36851 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
36852   GemmMicrokernelTester()
36853     .mr(4)
36854     .nr(2)
36855     .kr(1)
36856     .sr(1)
36857     .m(4)
36858     .n(2)
36859     .k(1)
36860     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36861 }
36862 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)36863 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
36864   GemmMicrokernelTester()
36865     .mr(4)
36866     .nr(2)
36867     .kr(1)
36868     .sr(1)
36869     .m(4)
36870     .n(2)
36871     .k(1)
36872     .cn_stride(5)
36873     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36874 }
36875 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)36876 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
36877   for (uint32_t n = 1; n <= 2; n++) {
36878     for (uint32_t m = 1; m <= 4; m++) {
36879       GemmMicrokernelTester()
36880         .mr(4)
36881         .nr(2)
36882         .kr(1)
36883         .sr(1)
36884         .m(m)
36885         .n(n)
36886         .k(1)
36887         .iterations(1)
36888         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36889     }
36890   }
36891 }
36892 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)36893 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
36894   for (uint32_t m = 1; m <= 4; m++) {
36895     GemmMicrokernelTester()
36896       .mr(4)
36897       .nr(2)
36898       .kr(1)
36899       .sr(1)
36900       .m(m)
36901       .n(2)
36902       .k(1)
36903       .iterations(1)
36904       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36905   }
36906 }
36907 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)36908 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
36909   for (uint32_t n = 1; n <= 2; n++) {
36910     GemmMicrokernelTester()
36911       .mr(4)
36912       .nr(2)
36913       .kr(1)
36914       .sr(1)
36915       .m(4)
36916       .n(n)
36917       .k(1)
36918       .iterations(1)
36919       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36920   }
36921 }
36922 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)36923 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
36924   for (size_t k = 2; k < 10; k++) {
36925     GemmMicrokernelTester()
36926       .mr(4)
36927       .nr(2)
36928       .kr(1)
36929       .sr(1)
36930       .m(4)
36931       .n(2)
36932       .k(k)
36933       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36934   }
36935 }
36936 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)36937 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
36938   for (size_t k = 2; k < 10; k++) {
36939     for (uint32_t n = 1; n <= 2; n++) {
36940       for (uint32_t m = 1; m <= 4; m++) {
36941         GemmMicrokernelTester()
36942           .mr(4)
36943           .nr(2)
36944           .kr(1)
36945           .sr(1)
36946           .m(m)
36947           .n(n)
36948           .k(k)
36949           .iterations(1)
36950           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36951       }
36952     }
36953   }
36954 }
36955 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)36956 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
36957   for (uint32_t n = 3; n < 4; n++) {
36958     for (size_t k = 1; k <= 5; k += 2) {
36959       GemmMicrokernelTester()
36960         .mr(4)
36961         .nr(2)
36962         .kr(1)
36963         .sr(1)
36964         .m(4)
36965         .n(n)
36966         .k(k)
36967         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36968     }
36969   }
36970 }
36971 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)36972 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
36973   for (uint32_t n = 3; n < 4; n++) {
36974     for (size_t k = 1; k <= 5; k += 2) {
36975       GemmMicrokernelTester()
36976         .mr(4)
36977         .nr(2)
36978         .kr(1)
36979         .sr(1)
36980         .m(4)
36981         .n(n)
36982         .k(k)
36983         .cn_stride(5)
36984         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
36985     }
36986   }
36987 }
36988 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)36989 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
36990   for (uint32_t n = 3; n < 4; n++) {
36991     for (size_t k = 1; k <= 5; k += 2) {
36992       for (uint32_t m = 1; m <= 4; m++) {
36993         GemmMicrokernelTester()
36994           .mr(4)
36995           .nr(2)
36996           .kr(1)
36997           .sr(1)
36998           .m(m)
36999           .n(n)
37000           .k(k)
37001           .iterations(1)
37002           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37003       }
37004     }
37005   }
37006 }
37007 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)37008 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
37009   for (uint32_t n = 4; n <= 6; n += 2) {
37010     for (size_t k = 1; k <= 5; k += 2) {
37011       GemmMicrokernelTester()
37012         .mr(4)
37013         .nr(2)
37014         .kr(1)
37015         .sr(1)
37016         .m(4)
37017         .n(n)
37018         .k(k)
37019         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37020     }
37021   }
37022 }
37023 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)37024 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
37025   for (uint32_t n = 4; n <= 6; n += 2) {
37026     for (size_t k = 1; k <= 5; k += 2) {
37027       GemmMicrokernelTester()
37028         .mr(4)
37029         .nr(2)
37030         .kr(1)
37031         .sr(1)
37032         .m(4)
37033         .n(n)
37034         .k(k)
37035         .cn_stride(5)
37036         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37037     }
37038   }
37039 }
37040 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)37041 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
37042   for (uint32_t n = 4; n <= 6; n += 2) {
37043     for (size_t k = 1; k <= 5; k += 2) {
37044       for (uint32_t m = 1; m <= 4; m++) {
37045         GemmMicrokernelTester()
37046           .mr(4)
37047           .nr(2)
37048           .kr(1)
37049           .sr(1)
37050           .m(m)
37051           .n(n)
37052           .k(k)
37053           .iterations(1)
37054           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37055       }
37056     }
37057   }
37058 }
37059 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)37060 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
37061   for (size_t k = 1; k <= 5; k += 2) {
37062     GemmMicrokernelTester()
37063       .mr(4)
37064       .nr(2)
37065       .kr(1)
37066       .sr(1)
37067       .m(4)
37068       .n(2)
37069       .k(k)
37070       .ks(3)
37071       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37072   }
37073 }
37074 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)37075 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
37076   for (size_t k = 1; k <= 5; k += 2) {
37077     for (uint32_t n = 1; n <= 2; n++) {
37078       for (uint32_t m = 1; m <= 4; m++) {
37079         GemmMicrokernelTester()
37080           .mr(4)
37081           .nr(2)
37082           .kr(1)
37083           .sr(1)
37084           .m(m)
37085           .n(n)
37086           .k(k)
37087           .ks(3)
37088           .iterations(1)
37089           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37090       }
37091     }
37092   }
37093 }
37094 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)37095 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
37096   for (uint32_t n = 3; n < 4; n++) {
37097     for (size_t k = 1; k <= 5; k += 2) {
37098       GemmMicrokernelTester()
37099         .mr(4)
37100         .nr(2)
37101         .kr(1)
37102         .sr(1)
37103         .m(4)
37104         .n(n)
37105         .k(k)
37106         .ks(3)
37107         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37108     }
37109   }
37110 }
37111 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)37112 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
37113   for (uint32_t n = 4; n <= 6; n += 2) {
37114     for (size_t k = 1; k <= 5; k += 2) {
37115       GemmMicrokernelTester()
37116         .mr(4)
37117         .nr(2)
37118         .kr(1)
37119         .sr(1)
37120         .m(4)
37121         .n(n)
37122         .k(k)
37123         .ks(3)
37124         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37125     }
37126   }
37127 }
37128 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)37129 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
37130   for (size_t k = 1; k <= 5; k += 2) {
37131     for (uint32_t n = 1; n <= 2; n++) {
37132       for (uint32_t m = 1; m <= 4; m++) {
37133         GemmMicrokernelTester()
37134           .mr(4)
37135           .nr(2)
37136           .kr(1)
37137           .sr(1)
37138           .m(m)
37139           .n(n)
37140           .k(k)
37141           .cm_stride(5)
37142           .iterations(1)
37143           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37144       }
37145     }
37146   }
37147 }
37148 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)37149 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
37150   for (size_t k = 1; k <= 5; k += 2) {
37151     GemmMicrokernelTester()
37152       .mr(4)
37153       .nr(2)
37154       .kr(1)
37155       .sr(1)
37156       .m(4)
37157       .n(2)
37158       .k(k)
37159       .ks(3)
37160       .a_offset(23)
37161       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37162   }
37163 }
37164 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)37165 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
37166   for (size_t k = 1; k <= 5; k += 2) {
37167     for (uint32_t mz = 0; mz < 4; mz++) {
37168       GemmMicrokernelTester()
37169         .mr(4)
37170         .nr(2)
37171         .kr(1)
37172         .sr(1)
37173         .m(4)
37174         .n(2)
37175         .k(k)
37176         .ks(3)
37177         .a_offset(23)
37178         .zero_index(mz)
37179         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37180     }
37181   }
37182 }
37183 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)37184 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
37185   GemmMicrokernelTester()
37186     .mr(4)
37187     .nr(2)
37188     .kr(1)
37189     .sr(1)
37190     .m(4)
37191     .n(2)
37192     .k(1)
37193     .qmin(128)
37194     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37195 }
37196 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)37197 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
37198   GemmMicrokernelTester()
37199     .mr(4)
37200     .nr(2)
37201     .kr(1)
37202     .sr(1)
37203     .m(4)
37204     .n(2)
37205     .k(1)
37206     .qmax(128)
37207     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37208 }
37209 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)37210 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
37211   GemmMicrokernelTester()
37212     .mr(4)
37213     .nr(2)
37214     .kr(1)
37215     .sr(1)
37216     .m(4)
37217     .n(2)
37218     .k(1)
37219     .cm_stride(5)
37220     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37221 }
37222 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_a_zero_point)37223 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
37224   for (size_t k = 1; k <= 5; k += 2) {
37225     GemmMicrokernelTester()
37226       .mr(4)
37227       .nr(2)
37228       .kr(1)
37229       .sr(1)
37230       .m(4)
37231       .n(2)
37232       .k(k)
37233       .a_zero_point(0)
37234       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37235   }
37236 }
37237 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_b_zero_point)37238 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
37239   for (size_t k = 1; k <= 5; k += 2) {
37240     GemmMicrokernelTester()
37241       .mr(4)
37242       .nr(2)
37243       .kr(1)
37244       .sr(1)
37245       .m(4)
37246       .n(2)
37247       .k(k)
37248       .b_zero_point(0)
37249       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37250   }
37251 }
37252 
TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,no_zero_point)37253 TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
37254   for (size_t k = 1; k <= 5; k += 2) {
37255     GemmMicrokernelTester()
37256       .mr(4)
37257       .nr(2)
37258       .kr(1)
37259       .sr(1)
37260       .m(4)
37261       .n(2)
37262       .k(k)
37263       .a_zero_point(0)
37264       .b_zero_point(0)
37265       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37266   }
37267 }
37268 
37269 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)37270 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
37271   GemmMicrokernelTester()
37272     .mr(4)
37273     .nr(4)
37274     .kr(1)
37275     .sr(1)
37276     .m(4)
37277     .n(4)
37278     .k(1)
37279     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37280 }
37281 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)37282 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
37283   GemmMicrokernelTester()
37284     .mr(4)
37285     .nr(4)
37286     .kr(1)
37287     .sr(1)
37288     .m(4)
37289     .n(4)
37290     .k(1)
37291     .cn_stride(7)
37292     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37293 }
37294 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)37295 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
37296   for (uint32_t n = 1; n <= 4; n++) {
37297     for (uint32_t m = 1; m <= 4; m++) {
37298       GemmMicrokernelTester()
37299         .mr(4)
37300         .nr(4)
37301         .kr(1)
37302         .sr(1)
37303         .m(m)
37304         .n(n)
37305         .k(1)
37306         .iterations(1)
37307         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37308     }
37309   }
37310 }
37311 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)37312 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
37313   for (uint32_t m = 1; m <= 4; m++) {
37314     GemmMicrokernelTester()
37315       .mr(4)
37316       .nr(4)
37317       .kr(1)
37318       .sr(1)
37319       .m(m)
37320       .n(4)
37321       .k(1)
37322       .iterations(1)
37323       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37324   }
37325 }
37326 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)37327 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
37328   for (uint32_t n = 1; n <= 4; n++) {
37329     GemmMicrokernelTester()
37330       .mr(4)
37331       .nr(4)
37332       .kr(1)
37333       .sr(1)
37334       .m(4)
37335       .n(n)
37336       .k(1)
37337       .iterations(1)
37338       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37339   }
37340 }
37341 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)37342 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
37343   for (size_t k = 2; k < 10; k++) {
37344     GemmMicrokernelTester()
37345       .mr(4)
37346       .nr(4)
37347       .kr(1)
37348       .sr(1)
37349       .m(4)
37350       .n(4)
37351       .k(k)
37352       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37353   }
37354 }
37355 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)37356 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
37357   for (size_t k = 2; k < 10; k++) {
37358     for (uint32_t n = 1; n <= 4; n++) {
37359       for (uint32_t m = 1; m <= 4; m++) {
37360         GemmMicrokernelTester()
37361           .mr(4)
37362           .nr(4)
37363           .kr(1)
37364           .sr(1)
37365           .m(m)
37366           .n(n)
37367           .k(k)
37368           .iterations(1)
37369           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37370       }
37371     }
37372   }
37373 }
37374 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)37375 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
37376   for (uint32_t n = 5; n < 8; n++) {
37377     for (size_t k = 1; k <= 5; k += 2) {
37378       GemmMicrokernelTester()
37379         .mr(4)
37380         .nr(4)
37381         .kr(1)
37382         .sr(1)
37383         .m(4)
37384         .n(n)
37385         .k(k)
37386         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37387     }
37388   }
37389 }
37390 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)37391 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
37392   for (uint32_t n = 5; n < 8; n++) {
37393     for (size_t k = 1; k <= 5; k += 2) {
37394       GemmMicrokernelTester()
37395         .mr(4)
37396         .nr(4)
37397         .kr(1)
37398         .sr(1)
37399         .m(4)
37400         .n(n)
37401         .k(k)
37402         .cn_stride(7)
37403         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37404     }
37405   }
37406 }
37407 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)37408 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
37409   for (uint32_t n = 5; n < 8; n++) {
37410     for (size_t k = 1; k <= 5; k += 2) {
37411       for (uint32_t m = 1; m <= 4; m++) {
37412         GemmMicrokernelTester()
37413           .mr(4)
37414           .nr(4)
37415           .kr(1)
37416           .sr(1)
37417           .m(m)
37418           .n(n)
37419           .k(k)
37420           .iterations(1)
37421           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37422       }
37423     }
37424   }
37425 }
37426 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)37427 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
37428   for (uint32_t n = 8; n <= 12; n += 4) {
37429     for (size_t k = 1; k <= 5; k += 2) {
37430       GemmMicrokernelTester()
37431         .mr(4)
37432         .nr(4)
37433         .kr(1)
37434         .sr(1)
37435         .m(4)
37436         .n(n)
37437         .k(k)
37438         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37439     }
37440   }
37441 }
37442 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)37443 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
37444   for (uint32_t n = 8; n <= 12; n += 4) {
37445     for (size_t k = 1; k <= 5; k += 2) {
37446       GemmMicrokernelTester()
37447         .mr(4)
37448         .nr(4)
37449         .kr(1)
37450         .sr(1)
37451         .m(4)
37452         .n(n)
37453         .k(k)
37454         .cn_stride(7)
37455         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37456     }
37457   }
37458 }
37459 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)37460 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
37461   for (uint32_t n = 8; n <= 12; n += 4) {
37462     for (size_t k = 1; k <= 5; k += 2) {
37463       for (uint32_t m = 1; m <= 4; m++) {
37464         GemmMicrokernelTester()
37465           .mr(4)
37466           .nr(4)
37467           .kr(1)
37468           .sr(1)
37469           .m(m)
37470           .n(n)
37471           .k(k)
37472           .iterations(1)
37473           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37474       }
37475     }
37476   }
37477 }
37478 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)37479 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
37480   for (size_t k = 1; k <= 5; k += 2) {
37481     GemmMicrokernelTester()
37482       .mr(4)
37483       .nr(4)
37484       .kr(1)
37485       .sr(1)
37486       .m(4)
37487       .n(4)
37488       .k(k)
37489       .ks(3)
37490       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37491   }
37492 }
37493 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)37494 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
37495   for (size_t k = 1; k <= 5; k += 2) {
37496     for (uint32_t n = 1; n <= 4; n++) {
37497       for (uint32_t m = 1; m <= 4; m++) {
37498         GemmMicrokernelTester()
37499           .mr(4)
37500           .nr(4)
37501           .kr(1)
37502           .sr(1)
37503           .m(m)
37504           .n(n)
37505           .k(k)
37506           .ks(3)
37507           .iterations(1)
37508           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37509       }
37510     }
37511   }
37512 }
37513 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)37514 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
37515   for (uint32_t n = 5; n < 8; n++) {
37516     for (size_t k = 1; k <= 5; k += 2) {
37517       GemmMicrokernelTester()
37518         .mr(4)
37519         .nr(4)
37520         .kr(1)
37521         .sr(1)
37522         .m(4)
37523         .n(n)
37524         .k(k)
37525         .ks(3)
37526         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37527     }
37528   }
37529 }
37530 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)37531 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
37532   for (uint32_t n = 8; n <= 12; n += 4) {
37533     for (size_t k = 1; k <= 5; k += 2) {
37534       GemmMicrokernelTester()
37535         .mr(4)
37536         .nr(4)
37537         .kr(1)
37538         .sr(1)
37539         .m(4)
37540         .n(n)
37541         .k(k)
37542         .ks(3)
37543         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37544     }
37545   }
37546 }
37547 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)37548 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
37549   for (size_t k = 1; k <= 5; k += 2) {
37550     for (uint32_t n = 1; n <= 4; n++) {
37551       for (uint32_t m = 1; m <= 4; m++) {
37552         GemmMicrokernelTester()
37553           .mr(4)
37554           .nr(4)
37555           .kr(1)
37556           .sr(1)
37557           .m(m)
37558           .n(n)
37559           .k(k)
37560           .cm_stride(7)
37561           .iterations(1)
37562           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37563       }
37564     }
37565   }
37566 }
37567 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)37568 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
37569   for (size_t k = 1; k <= 5; k += 2) {
37570     GemmMicrokernelTester()
37571       .mr(4)
37572       .nr(4)
37573       .kr(1)
37574       .sr(1)
37575       .m(4)
37576       .n(4)
37577       .k(k)
37578       .ks(3)
37579       .a_offset(23)
37580       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37581   }
37582 }
37583 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)37584 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
37585   for (size_t k = 1; k <= 5; k += 2) {
37586     for (uint32_t mz = 0; mz < 4; mz++) {
37587       GemmMicrokernelTester()
37588         .mr(4)
37589         .nr(4)
37590         .kr(1)
37591         .sr(1)
37592         .m(4)
37593         .n(4)
37594         .k(k)
37595         .ks(3)
37596         .a_offset(23)
37597         .zero_index(mz)
37598         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37599     }
37600   }
37601 }
37602 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)37603 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
37604   GemmMicrokernelTester()
37605     .mr(4)
37606     .nr(4)
37607     .kr(1)
37608     .sr(1)
37609     .m(4)
37610     .n(4)
37611     .k(1)
37612     .qmin(128)
37613     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37614 }
37615 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)37616 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
37617   GemmMicrokernelTester()
37618     .mr(4)
37619     .nr(4)
37620     .kr(1)
37621     .sr(1)
37622     .m(4)
37623     .n(4)
37624     .k(1)
37625     .qmax(128)
37626     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37627 }
37628 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)37629 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
37630   GemmMicrokernelTester()
37631     .mr(4)
37632     .nr(4)
37633     .kr(1)
37634     .sr(1)
37635     .m(4)
37636     .n(4)
37637     .k(1)
37638     .cm_stride(7)
37639     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37640 }
37641 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_a_zero_point)37642 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_a_zero_point) {
37643   for (size_t k = 1; k <= 5; k += 2) {
37644     GemmMicrokernelTester()
37645       .mr(4)
37646       .nr(4)
37647       .kr(1)
37648       .sr(1)
37649       .m(4)
37650       .n(4)
37651       .k(k)
37652       .a_zero_point(0)
37653       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37654   }
37655 }
37656 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_b_zero_point)37657 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_b_zero_point) {
37658   for (size_t k = 1; k <= 5; k += 2) {
37659     GemmMicrokernelTester()
37660       .mr(4)
37661       .nr(4)
37662       .kr(1)
37663       .sr(1)
37664       .m(4)
37665       .n(4)
37666       .k(k)
37667       .b_zero_point(0)
37668       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37669   }
37670 }
37671 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,no_zero_point)37672 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, no_zero_point) {
37673   for (size_t k = 1; k <= 5; k += 2) {
37674     GemmMicrokernelTester()
37675       .mr(4)
37676       .nr(4)
37677       .kr(1)
37678       .sr(1)
37679       .m(4)
37680       .n(4)
37681       .k(k)
37682       .a_zero_point(0)
37683       .b_zero_point(0)
37684       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
37685   }
37686 }
37687 
37688 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)37689 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
37690   GemmMicrokernelTester()
37691     .mr(4)
37692     .nr(4)
37693     .kr(1)
37694     .sr(1)
37695     .m(4)
37696     .n(4)
37697     .k(1)
37698     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37699 }
37700 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)37701 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
37702   GemmMicrokernelTester()
37703     .mr(4)
37704     .nr(4)
37705     .kr(1)
37706     .sr(1)
37707     .m(4)
37708     .n(4)
37709     .k(1)
37710     .cn_stride(7)
37711     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37712 }
37713 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)37714 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
37715   for (uint32_t n = 1; n <= 4; n++) {
37716     for (uint32_t m = 1; m <= 4; m++) {
37717       GemmMicrokernelTester()
37718         .mr(4)
37719         .nr(4)
37720         .kr(1)
37721         .sr(1)
37722         .m(m)
37723         .n(n)
37724         .k(1)
37725         .iterations(1)
37726         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37727     }
37728   }
37729 }
37730 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)37731 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
37732   for (uint32_t m = 1; m <= 4; m++) {
37733     GemmMicrokernelTester()
37734       .mr(4)
37735       .nr(4)
37736       .kr(1)
37737       .sr(1)
37738       .m(m)
37739       .n(4)
37740       .k(1)
37741       .iterations(1)
37742       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37743   }
37744 }
37745 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)37746 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
37747   for (uint32_t n = 1; n <= 4; n++) {
37748     GemmMicrokernelTester()
37749       .mr(4)
37750       .nr(4)
37751       .kr(1)
37752       .sr(1)
37753       .m(4)
37754       .n(n)
37755       .k(1)
37756       .iterations(1)
37757       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37758   }
37759 }
37760 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)37761 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
37762   for (size_t k = 2; k < 10; k++) {
37763     GemmMicrokernelTester()
37764       .mr(4)
37765       .nr(4)
37766       .kr(1)
37767       .sr(1)
37768       .m(4)
37769       .n(4)
37770       .k(k)
37771       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37772   }
37773 }
37774 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)37775 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
37776   for (size_t k = 2; k < 10; k++) {
37777     for (uint32_t n = 1; n <= 4; n++) {
37778       for (uint32_t m = 1; m <= 4; m++) {
37779         GemmMicrokernelTester()
37780           .mr(4)
37781           .nr(4)
37782           .kr(1)
37783           .sr(1)
37784           .m(m)
37785           .n(n)
37786           .k(k)
37787           .iterations(1)
37788           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37789       }
37790     }
37791   }
37792 }
37793 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)37794 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
37795   for (uint32_t n = 5; n < 8; n++) {
37796     for (size_t k = 1; k <= 5; k += 2) {
37797       GemmMicrokernelTester()
37798         .mr(4)
37799         .nr(4)
37800         .kr(1)
37801         .sr(1)
37802         .m(4)
37803         .n(n)
37804         .k(k)
37805         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37806     }
37807   }
37808 }
37809 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)37810 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
37811   for (uint32_t n = 5; n < 8; n++) {
37812     for (size_t k = 1; k <= 5; k += 2) {
37813       GemmMicrokernelTester()
37814         .mr(4)
37815         .nr(4)
37816         .kr(1)
37817         .sr(1)
37818         .m(4)
37819         .n(n)
37820         .k(k)
37821         .cn_stride(7)
37822         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37823     }
37824   }
37825 }
37826 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)37827 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
37828   for (uint32_t n = 5; n < 8; n++) {
37829     for (size_t k = 1; k <= 5; k += 2) {
37830       for (uint32_t m = 1; m <= 4; m++) {
37831         GemmMicrokernelTester()
37832           .mr(4)
37833           .nr(4)
37834           .kr(1)
37835           .sr(1)
37836           .m(m)
37837           .n(n)
37838           .k(k)
37839           .iterations(1)
37840           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37841       }
37842     }
37843   }
37844 }
37845 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)37846 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
37847   for (uint32_t n = 8; n <= 12; n += 4) {
37848     for (size_t k = 1; k <= 5; k += 2) {
37849       GemmMicrokernelTester()
37850         .mr(4)
37851         .nr(4)
37852         .kr(1)
37853         .sr(1)
37854         .m(4)
37855         .n(n)
37856         .k(k)
37857         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37858     }
37859   }
37860 }
37861 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)37862 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
37863   for (uint32_t n = 8; n <= 12; n += 4) {
37864     for (size_t k = 1; k <= 5; k += 2) {
37865       GemmMicrokernelTester()
37866         .mr(4)
37867         .nr(4)
37868         .kr(1)
37869         .sr(1)
37870         .m(4)
37871         .n(n)
37872         .k(k)
37873         .cn_stride(7)
37874         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37875     }
37876   }
37877 }
37878 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)37879 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
37880   for (uint32_t n = 8; n <= 12; n += 4) {
37881     for (size_t k = 1; k <= 5; k += 2) {
37882       for (uint32_t m = 1; m <= 4; m++) {
37883         GemmMicrokernelTester()
37884           .mr(4)
37885           .nr(4)
37886           .kr(1)
37887           .sr(1)
37888           .m(m)
37889           .n(n)
37890           .k(k)
37891           .iterations(1)
37892           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37893       }
37894     }
37895   }
37896 }
37897 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel)37898 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
37899   for (size_t k = 1; k <= 5; k += 2) {
37900     GemmMicrokernelTester()
37901       .mr(4)
37902       .nr(4)
37903       .kr(1)
37904       .sr(1)
37905       .m(4)
37906       .n(4)
37907       .k(k)
37908       .ks(3)
37909       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37910   }
37911 }
37912 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel_subtile)37913 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
37914   for (size_t k = 1; k <= 5; k += 2) {
37915     for (uint32_t n = 1; n <= 4; n++) {
37916       for (uint32_t m = 1; m <= 4; m++) {
37917         GemmMicrokernelTester()
37918           .mr(4)
37919           .nr(4)
37920           .kr(1)
37921           .sr(1)
37922           .m(m)
37923           .n(n)
37924           .k(k)
37925           .ks(3)
37926           .iterations(1)
37927           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37928       }
37929     }
37930   }
37931 }
37932 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_small_kernel)37933 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
37934   for (uint32_t n = 5; n < 8; n++) {
37935     for (size_t k = 1; k <= 5; k += 2) {
37936       GemmMicrokernelTester()
37937         .mr(4)
37938         .nr(4)
37939         .kr(1)
37940         .sr(1)
37941         .m(4)
37942         .n(n)
37943         .k(k)
37944         .ks(3)
37945         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37946     }
37947   }
37948 }
37949 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_small_kernel)37950 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
37951   for (uint32_t n = 8; n <= 12; n += 4) {
37952     for (size_t k = 1; k <= 5; k += 2) {
37953       GemmMicrokernelTester()
37954         .mr(4)
37955         .nr(4)
37956         .kr(1)
37957         .sr(1)
37958         .m(4)
37959         .n(n)
37960         .k(k)
37961         .ks(3)
37962         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37963     }
37964   }
37965 }
37966 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)37967 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
37968   for (size_t k = 1; k <= 5; k += 2) {
37969     for (uint32_t n = 1; n <= 4; n++) {
37970       for (uint32_t m = 1; m <= 4; m++) {
37971         GemmMicrokernelTester()
37972           .mr(4)
37973           .nr(4)
37974           .kr(1)
37975           .sr(1)
37976           .m(m)
37977           .n(n)
37978           .k(k)
37979           .cm_stride(7)
37980           .iterations(1)
37981           .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
37982       }
37983     }
37984   }
37985 }
37986 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,a_offset)37987 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
37988   for (size_t k = 1; k <= 5; k += 2) {
37989     GemmMicrokernelTester()
37990       .mr(4)
37991       .nr(4)
37992       .kr(1)
37993       .sr(1)
37994       .m(4)
37995       .n(4)
37996       .k(k)
37997       .ks(3)
37998       .a_offset(23)
37999       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38000   }
38001 }
38002 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,zero)38003 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
38004   for (size_t k = 1; k <= 5; k += 2) {
38005     for (uint32_t mz = 0; mz < 4; mz++) {
38006       GemmMicrokernelTester()
38007         .mr(4)
38008         .nr(4)
38009         .kr(1)
38010         .sr(1)
38011         .m(4)
38012         .n(4)
38013         .k(k)
38014         .ks(3)
38015         .a_offset(23)
38016         .zero_index(mz)
38017         .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38018     }
38019   }
38020 }
38021 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)38022 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
38023   GemmMicrokernelTester()
38024     .mr(4)
38025     .nr(4)
38026     .kr(1)
38027     .sr(1)
38028     .m(4)
38029     .n(4)
38030     .k(1)
38031     .qmin(128)
38032     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38033 }
38034 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)38035 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
38036   GemmMicrokernelTester()
38037     .mr(4)
38038     .nr(4)
38039     .kr(1)
38040     .sr(1)
38041     .m(4)
38042     .n(4)
38043     .k(1)
38044     .qmax(128)
38045     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38046 }
38047 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)38048 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
38049   GemmMicrokernelTester()
38050     .mr(4)
38051     .nr(4)
38052     .kr(1)
38053     .sr(1)
38054     .m(4)
38055     .n(4)
38056     .k(1)
38057     .cm_stride(7)
38058     .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38059 }
38060 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_a_zero_point)38061 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
38062   for (size_t k = 1; k <= 5; k += 2) {
38063     GemmMicrokernelTester()
38064       .mr(4)
38065       .nr(4)
38066       .kr(1)
38067       .sr(1)
38068       .m(4)
38069       .n(4)
38070       .k(k)
38071       .a_zero_point(0)
38072       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38073   }
38074 }
38075 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_b_zero_point)38076 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
38077   for (size_t k = 1; k <= 5; k += 2) {
38078     GemmMicrokernelTester()
38079       .mr(4)
38080       .nr(4)
38081       .kr(1)
38082       .sr(1)
38083       .m(4)
38084       .n(4)
38085       .k(k)
38086       .b_zero_point(0)
38087       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38088   }
38089 }
38090 
TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,no_zero_point)38091 TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
38092   for (size_t k = 1; k <= 5; k += 2) {
38093     GemmMicrokernelTester()
38094       .mr(4)
38095       .nr(4)
38096       .kr(1)
38097       .sr(1)
38098       .m(4)
38099       .n(4)
38100       .k(k)
38101       .a_zero_point(0)
38102       .b_zero_point(0)
38103       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
38104   }
38105 }
38106