xref: /aosp_15_r20/external/XNNPACK/test/qs8-igemm-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qs8-igemm-minmax-fp32.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)28   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
29     TEST_REQUIRES_ARM_SIMD32;
30     GemmMicrokernelTester()
31       .mr(1)
32       .nr(1)
33       .kr(4)
34       .sr(1)
35       .m(1)
36       .n(1)
37       .k(4)
38       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
39   }
40 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)41   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
42     TEST_REQUIRES_ARM_SIMD32;
43     GemmMicrokernelTester()
44       .mr(1)
45       .nr(1)
46       .kr(4)
47       .sr(1)
48       .m(1)
49       .n(1)
50       .k(4)
51       .cn_stride(3)
52       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
53   }
54 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)55   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
56     TEST_REQUIRES_ARM_SIMD32;
57     for (uint32_t n = 1; n <= 1; n++) {
58       for (uint32_t m = 1; m <= 1; m++) {
59         GemmMicrokernelTester()
60           .mr(1)
61           .nr(1)
62           .kr(4)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(4)
67           .iterations(1)
68           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
69       }
70     }
71   }
72 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)73   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
74     TEST_REQUIRES_ARM_SIMD32;
75     for (uint32_t m = 1; m <= 1; m++) {
76       GemmMicrokernelTester()
77         .mr(1)
78         .nr(1)
79         .kr(4)
80         .sr(1)
81         .m(m)
82         .n(1)
83         .k(4)
84         .iterations(1)
85         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
86     }
87   }
88 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)89   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
90     TEST_REQUIRES_ARM_SIMD32;
91     for (uint32_t n = 1; n <= 1; n++) {
92       GemmMicrokernelTester()
93         .mr(1)
94         .nr(1)
95         .kr(4)
96         .sr(1)
97         .m(1)
98         .n(n)
99         .k(4)
100         .iterations(1)
101         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
102     }
103   }
104 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)105   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
106     TEST_REQUIRES_ARM_SIMD32;
107     for (size_t k = 1; k < 4; k++) {
108       GemmMicrokernelTester()
109         .mr(1)
110         .nr(1)
111         .kr(4)
112         .sr(1)
113         .m(1)
114         .n(1)
115         .k(k)
116         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
117     }
118   }
119 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)120   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
121     TEST_REQUIRES_ARM_SIMD32;
122     for (size_t k = 1; k < 4; k++) {
123       for (uint32_t n = 1; n <= 1; n++) {
124         for (uint32_t m = 1; m <= 1; m++) {
125           GemmMicrokernelTester()
126             .mr(1)
127             .nr(1)
128             .kr(4)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
135         }
136       }
137     }
138   }
139 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)140   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
141     TEST_REQUIRES_ARM_SIMD32;
142     for (size_t k = 5; k < 8; k++) {
143       GemmMicrokernelTester()
144         .mr(1)
145         .nr(1)
146         .kr(4)
147         .sr(1)
148         .m(1)
149         .n(1)
150         .k(k)
151         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
152     }
153   }
154 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)155   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
156     TEST_REQUIRES_ARM_SIMD32;
157     for (size_t k = 5; k < 8; k++) {
158       for (uint32_t n = 1; n <= 1; n++) {
159         for (uint32_t m = 1; m <= 1; m++) {
160           GemmMicrokernelTester()
161             .mr(1)
162             .nr(1)
163             .kr(4)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
170         }
171       }
172     }
173   }
174 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)175   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
176     TEST_REQUIRES_ARM_SIMD32;
177     for (size_t k = 8; k <= 40; k += 4) {
178       GemmMicrokernelTester()
179         .mr(1)
180         .nr(1)
181         .kr(4)
182         .sr(1)
183         .m(1)
184         .n(1)
185         .k(k)
186         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
187     }
188   }
189 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)190   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
191     TEST_REQUIRES_ARM_SIMD32;
192     for (size_t k = 8; k <= 40; k += 4) {
193       for (uint32_t n = 1; n <= 1; n++) {
194         for (uint32_t m = 1; m <= 1; m++) {
195           GemmMicrokernelTester()
196             .mr(1)
197             .nr(1)
198             .kr(4)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
205         }
206       }
207     }
208   }
209 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)210   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
211     TEST_REQUIRES_ARM_SIMD32;
212     for (uint32_t n = 2; n < 2; n++) {
213       for (size_t k = 1; k <= 20; k += 5) {
214         GemmMicrokernelTester()
215           .mr(1)
216           .nr(1)
217           .kr(4)
218           .sr(1)
219           .m(1)
220           .n(n)
221           .k(k)
222           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
223       }
224     }
225   }
226 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)227   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
228     TEST_REQUIRES_ARM_SIMD32;
229     for (uint32_t n = 2; n < 2; n++) {
230       for (size_t k = 1; k <= 20; k += 5) {
231         GemmMicrokernelTester()
232           .mr(1)
233           .nr(1)
234           .kr(4)
235           .sr(1)
236           .m(1)
237           .n(n)
238           .k(k)
239           .cn_stride(3)
240           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
241       }
242     }
243   }
244 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)245   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
246     TEST_REQUIRES_ARM_SIMD32;
247     for (uint32_t n = 2; n < 2; n++) {
248       for (size_t k = 1; k <= 20; k += 5) {
249         for (uint32_t m = 1; m <= 1; m++) {
250           GemmMicrokernelTester()
251             .mr(1)
252             .nr(1)
253             .kr(4)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
260         }
261       }
262     }
263   }
264 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)265   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
266     TEST_REQUIRES_ARM_SIMD32;
267     for (uint32_t n = 2; n <= 3; n += 1) {
268       for (size_t k = 1; k <= 20; k += 5) {
269         GemmMicrokernelTester()
270           .mr(1)
271           .nr(1)
272           .kr(4)
273           .sr(1)
274           .m(1)
275           .n(n)
276           .k(k)
277           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
278       }
279     }
280   }
281 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)282   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
283     TEST_REQUIRES_ARM_SIMD32;
284     for (uint32_t n = 2; n <= 3; n += 1) {
285       for (size_t k = 1; k <= 20; k += 5) {
286         GemmMicrokernelTester()
287           .mr(1)
288           .nr(1)
289           .kr(4)
290           .sr(1)
291           .m(1)
292           .n(n)
293           .k(k)
294           .cn_stride(3)
295           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
296       }
297     }
298   }
299 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)300   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
301     TEST_REQUIRES_ARM_SIMD32;
302     for (uint32_t n = 2; n <= 3; n += 1) {
303       for (size_t k = 1; k <= 20; k += 5) {
304         for (uint32_t m = 1; m <= 1; m++) {
305           GemmMicrokernelTester()
306             .mr(1)
307             .nr(1)
308             .kr(4)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
315         }
316       }
317     }
318   }
319 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel)320   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel) {
321     TEST_REQUIRES_ARM_SIMD32;
322     for (size_t k = 1; k <= 20; k += 5) {
323       GemmMicrokernelTester()
324         .mr(1)
325         .nr(1)
326         .kr(4)
327         .sr(1)
328         .m(1)
329         .n(1)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
333     }
334   }
335 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel_subtile)336   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_SIMD32;
338     for (size_t k = 1; k <= 20; k += 5) {
339       for (uint32_t n = 1; n <= 1; n++) {
340         for (uint32_t m = 1; m <= 1; m++) {
341           GemmMicrokernelTester()
342             .mr(1)
343             .nr(1)
344             .kr(4)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
352         }
353       }
354     }
355   }
356 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_small_kernel)357   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_small_kernel) {
358     TEST_REQUIRES_ARM_SIMD32;
359     for (uint32_t n = 2; n < 2; n++) {
360       for (size_t k = 1; k <= 20; k += 5) {
361         GemmMicrokernelTester()
362           .mr(1)
363           .nr(1)
364           .kr(4)
365           .sr(1)
366           .m(1)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
371       }
372     }
373   }
374 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_small_kernel)375   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_small_kernel) {
376     TEST_REQUIRES_ARM_SIMD32;
377     for (uint32_t n = 2; n <= 3; n += 1) {
378       for (size_t k = 1; k <= 20; k += 5) {
379         GemmMicrokernelTester()
380           .mr(1)
381           .nr(1)
382           .kr(4)
383           .sr(1)
384           .m(1)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
389       }
390     }
391   }
392 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)393   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_SIMD32;
395     for (size_t k = 1; k <= 20; k += 5) {
396       for (uint32_t n = 1; n <= 1; n++) {
397         for (uint32_t m = 1; m <= 1; m++) {
398           GemmMicrokernelTester()
399             .mr(1)
400             .nr(1)
401             .kr(4)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(3)
407             .iterations(1)
408             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
409         }
410       }
411     }
412   }
413 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,a_offset)414   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, a_offset) {
415     TEST_REQUIRES_ARM_SIMD32;
416     for (size_t k = 1; k <= 20; k += 5) {
417       GemmMicrokernelTester()
418         .mr(1)
419         .nr(1)
420         .kr(4)
421         .sr(1)
422         .m(1)
423         .n(1)
424         .k(k)
425         .ks(3)
426         .a_offset(23)
427         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
428     }
429   }
430 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,zero)431   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, zero) {
432     TEST_REQUIRES_ARM_SIMD32;
433     for (size_t k = 1; k <= 20; k += 5) {
434       for (uint32_t mz = 0; mz < 1; mz++) {
435         GemmMicrokernelTester()
436           .mr(1)
437           .nr(1)
438           .kr(4)
439           .sr(1)
440           .m(1)
441           .n(1)
442           .k(k)
443           .ks(3)
444           .a_offset(23)
445           .zero_index(mz)
446           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
447       }
448     }
449   }
450 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)451   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
452     TEST_REQUIRES_ARM_SIMD32;
453     GemmMicrokernelTester()
454       .mr(1)
455       .nr(1)
456       .kr(4)
457       .sr(1)
458       .m(1)
459       .n(1)
460       .k(4)
461       .qmin(128)
462       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
463   }
464 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)465   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
466     TEST_REQUIRES_ARM_SIMD32;
467     GemmMicrokernelTester()
468       .mr(1)
469       .nr(1)
470       .kr(4)
471       .sr(1)
472       .m(1)
473       .n(1)
474       .k(4)
475       .qmax(128)
476       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
477   }
478 
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)479   TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
480     TEST_REQUIRES_ARM_SIMD32;
481     GemmMicrokernelTester()
482       .mr(1)
483       .nr(1)
484       .kr(4)
485       .sr(1)
486       .m(1)
487       .n(1)
488       .k(4)
489       .cm_stride(3)
490       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
491   }
492 #endif  // XNN_ARCH_ARM
493 
494 
495 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)496   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
497     TEST_REQUIRES_ARM_SIMD32;
498     GemmMicrokernelTester()
499       .mr(2)
500       .nr(1)
501       .kr(4)
502       .sr(1)
503       .m(2)
504       .n(1)
505       .k(4)
506       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
507   }
508 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)509   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
510     TEST_REQUIRES_ARM_SIMD32;
511     GemmMicrokernelTester()
512       .mr(2)
513       .nr(1)
514       .kr(4)
515       .sr(1)
516       .m(2)
517       .n(1)
518       .k(4)
519       .cn_stride(3)
520       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
521   }
522 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)523   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
524     TEST_REQUIRES_ARM_SIMD32;
525     for (uint32_t n = 1; n <= 1; n++) {
526       for (uint32_t m = 1; m <= 2; m++) {
527         GemmMicrokernelTester()
528           .mr(2)
529           .nr(1)
530           .kr(4)
531           .sr(1)
532           .m(m)
533           .n(n)
534           .k(4)
535           .iterations(1)
536           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
537       }
538     }
539   }
540 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)541   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
542     TEST_REQUIRES_ARM_SIMD32;
543     for (uint32_t m = 1; m <= 2; m++) {
544       GemmMicrokernelTester()
545         .mr(2)
546         .nr(1)
547         .kr(4)
548         .sr(1)
549         .m(m)
550         .n(1)
551         .k(4)
552         .iterations(1)
553         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
554     }
555   }
556 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)557   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
558     TEST_REQUIRES_ARM_SIMD32;
559     for (uint32_t n = 1; n <= 1; n++) {
560       GemmMicrokernelTester()
561         .mr(2)
562         .nr(1)
563         .kr(4)
564         .sr(1)
565         .m(2)
566         .n(n)
567         .k(4)
568         .iterations(1)
569         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
570     }
571   }
572 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)573   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
574     TEST_REQUIRES_ARM_SIMD32;
575     for (size_t k = 1; k < 4; k++) {
576       GemmMicrokernelTester()
577         .mr(2)
578         .nr(1)
579         .kr(4)
580         .sr(1)
581         .m(2)
582         .n(1)
583         .k(k)
584         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
585     }
586   }
587 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)588   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
589     TEST_REQUIRES_ARM_SIMD32;
590     for (size_t k = 1; k < 4; k++) {
591       for (uint32_t n = 1; n <= 1; n++) {
592         for (uint32_t m = 1; m <= 2; m++) {
593           GemmMicrokernelTester()
594             .mr(2)
595             .nr(1)
596             .kr(4)
597             .sr(1)
598             .m(m)
599             .n(n)
600             .k(k)
601             .iterations(1)
602             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
603         }
604       }
605     }
606   }
607 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)608   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
609     TEST_REQUIRES_ARM_SIMD32;
610     for (size_t k = 5; k < 8; k++) {
611       GemmMicrokernelTester()
612         .mr(2)
613         .nr(1)
614         .kr(4)
615         .sr(1)
616         .m(2)
617         .n(1)
618         .k(k)
619         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
620     }
621   }
622 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)623   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
624     TEST_REQUIRES_ARM_SIMD32;
625     for (size_t k = 5; k < 8; k++) {
626       for (uint32_t n = 1; n <= 1; n++) {
627         for (uint32_t m = 1; m <= 2; m++) {
628           GemmMicrokernelTester()
629             .mr(2)
630             .nr(1)
631             .kr(4)
632             .sr(1)
633             .m(m)
634             .n(n)
635             .k(k)
636             .iterations(1)
637             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
638         }
639       }
640     }
641   }
642 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)643   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
644     TEST_REQUIRES_ARM_SIMD32;
645     for (size_t k = 8; k <= 40; k += 4) {
646       GemmMicrokernelTester()
647         .mr(2)
648         .nr(1)
649         .kr(4)
650         .sr(1)
651         .m(2)
652         .n(1)
653         .k(k)
654         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
655     }
656   }
657 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)658   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
659     TEST_REQUIRES_ARM_SIMD32;
660     for (size_t k = 8; k <= 40; k += 4) {
661       for (uint32_t n = 1; n <= 1; n++) {
662         for (uint32_t m = 1; m <= 2; m++) {
663           GemmMicrokernelTester()
664             .mr(2)
665             .nr(1)
666             .kr(4)
667             .sr(1)
668             .m(m)
669             .n(n)
670             .k(k)
671             .iterations(1)
672             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
673         }
674       }
675     }
676   }
677 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)678   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
679     TEST_REQUIRES_ARM_SIMD32;
680     for (uint32_t n = 2; n < 2; n++) {
681       for (size_t k = 1; k <= 20; k += 5) {
682         GemmMicrokernelTester()
683           .mr(2)
684           .nr(1)
685           .kr(4)
686           .sr(1)
687           .m(2)
688           .n(n)
689           .k(k)
690           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
691       }
692     }
693   }
694 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)695   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
696     TEST_REQUIRES_ARM_SIMD32;
697     for (uint32_t n = 2; n < 2; n++) {
698       for (size_t k = 1; k <= 20; k += 5) {
699         GemmMicrokernelTester()
700           .mr(2)
701           .nr(1)
702           .kr(4)
703           .sr(1)
704           .m(2)
705           .n(n)
706           .k(k)
707           .cn_stride(3)
708           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
709       }
710     }
711   }
712 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)713   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
714     TEST_REQUIRES_ARM_SIMD32;
715     for (uint32_t n = 2; n < 2; n++) {
716       for (size_t k = 1; k <= 20; k += 5) {
717         for (uint32_t m = 1; m <= 2; m++) {
718           GemmMicrokernelTester()
719             .mr(2)
720             .nr(1)
721             .kr(4)
722             .sr(1)
723             .m(m)
724             .n(n)
725             .k(k)
726             .iterations(1)
727             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
728         }
729       }
730     }
731   }
732 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)733   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
734     TEST_REQUIRES_ARM_SIMD32;
735     for (uint32_t n = 2; n <= 3; n += 1) {
736       for (size_t k = 1; k <= 20; k += 5) {
737         GemmMicrokernelTester()
738           .mr(2)
739           .nr(1)
740           .kr(4)
741           .sr(1)
742           .m(2)
743           .n(n)
744           .k(k)
745           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
746       }
747     }
748   }
749 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)750   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
751     TEST_REQUIRES_ARM_SIMD32;
752     for (uint32_t n = 2; n <= 3; n += 1) {
753       for (size_t k = 1; k <= 20; k += 5) {
754         GemmMicrokernelTester()
755           .mr(2)
756           .nr(1)
757           .kr(4)
758           .sr(1)
759           .m(2)
760           .n(n)
761           .k(k)
762           .cn_stride(3)
763           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
764       }
765     }
766   }
767 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)768   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
769     TEST_REQUIRES_ARM_SIMD32;
770     for (uint32_t n = 2; n <= 3; n += 1) {
771       for (size_t k = 1; k <= 20; k += 5) {
772         for (uint32_t m = 1; m <= 2; m++) {
773           GemmMicrokernelTester()
774             .mr(2)
775             .nr(1)
776             .kr(4)
777             .sr(1)
778             .m(m)
779             .n(n)
780             .k(k)
781             .iterations(1)
782             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
783         }
784       }
785     }
786   }
787 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel)788   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel) {
789     TEST_REQUIRES_ARM_SIMD32;
790     for (size_t k = 1; k <= 20; k += 5) {
791       GemmMicrokernelTester()
792         .mr(2)
793         .nr(1)
794         .kr(4)
795         .sr(1)
796         .m(2)
797         .n(1)
798         .k(k)
799         .ks(3)
800         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
801     }
802   }
803 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel_subtile)804   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel_subtile) {
805     TEST_REQUIRES_ARM_SIMD32;
806     for (size_t k = 1; k <= 20; k += 5) {
807       for (uint32_t n = 1; n <= 1; n++) {
808         for (uint32_t m = 1; m <= 2; m++) {
809           GemmMicrokernelTester()
810             .mr(2)
811             .nr(1)
812             .kr(4)
813             .sr(1)
814             .m(m)
815             .n(n)
816             .k(k)
817             .ks(3)
818             .iterations(1)
819             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
820         }
821       }
822     }
823   }
824 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_small_kernel)825   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_small_kernel) {
826     TEST_REQUIRES_ARM_SIMD32;
827     for (uint32_t n = 2; n < 2; n++) {
828       for (size_t k = 1; k <= 20; k += 5) {
829         GemmMicrokernelTester()
830           .mr(2)
831           .nr(1)
832           .kr(4)
833           .sr(1)
834           .m(2)
835           .n(n)
836           .k(k)
837           .ks(3)
838           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
839       }
840     }
841   }
842 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_small_kernel)843   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_small_kernel) {
844     TEST_REQUIRES_ARM_SIMD32;
845     for (uint32_t n = 2; n <= 3; n += 1) {
846       for (size_t k = 1; k <= 20; k += 5) {
847         GemmMicrokernelTester()
848           .mr(2)
849           .nr(1)
850           .kr(4)
851           .sr(1)
852           .m(2)
853           .n(n)
854           .k(k)
855           .ks(3)
856           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
857       }
858     }
859   }
860 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)861   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
862     TEST_REQUIRES_ARM_SIMD32;
863     for (size_t k = 1; k <= 20; k += 5) {
864       for (uint32_t n = 1; n <= 1; n++) {
865         for (uint32_t m = 1; m <= 2; m++) {
866           GemmMicrokernelTester()
867             .mr(2)
868             .nr(1)
869             .kr(4)
870             .sr(1)
871             .m(m)
872             .n(n)
873             .k(k)
874             .cm_stride(3)
875             .iterations(1)
876             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
877         }
878       }
879     }
880   }
881 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,a_offset)882   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, a_offset) {
883     TEST_REQUIRES_ARM_SIMD32;
884     for (size_t k = 1; k <= 20; k += 5) {
885       GemmMicrokernelTester()
886         .mr(2)
887         .nr(1)
888         .kr(4)
889         .sr(1)
890         .m(2)
891         .n(1)
892         .k(k)
893         .ks(3)
894         .a_offset(43)
895         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
896     }
897   }
898 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,zero)899   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, zero) {
900     TEST_REQUIRES_ARM_SIMD32;
901     for (size_t k = 1; k <= 20; k += 5) {
902       for (uint32_t mz = 0; mz < 2; mz++) {
903         GemmMicrokernelTester()
904           .mr(2)
905           .nr(1)
906           .kr(4)
907           .sr(1)
908           .m(2)
909           .n(1)
910           .k(k)
911           .ks(3)
912           .a_offset(43)
913           .zero_index(mz)
914           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
915       }
916     }
917   }
918 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)919   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
920     TEST_REQUIRES_ARM_SIMD32;
921     GemmMicrokernelTester()
922       .mr(2)
923       .nr(1)
924       .kr(4)
925       .sr(1)
926       .m(2)
927       .n(1)
928       .k(4)
929       .qmin(128)
930       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
931   }
932 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)933   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
934     TEST_REQUIRES_ARM_SIMD32;
935     GemmMicrokernelTester()
936       .mr(2)
937       .nr(1)
938       .kr(4)
939       .sr(1)
940       .m(2)
941       .n(1)
942       .k(4)
943       .qmax(128)
944       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
945   }
946 
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)947   TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
948     TEST_REQUIRES_ARM_SIMD32;
949     GemmMicrokernelTester()
950       .mr(2)
951       .nr(1)
952       .kr(4)
953       .sr(1)
954       .m(2)
955       .n(1)
956       .k(4)
957       .cm_stride(3)
958       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
959   }
960 #endif  // XNN_ARCH_ARM
961 
962 
963 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16)964   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16) {
965     TEST_REQUIRES_ARM_NEON;
966     GemmMicrokernelTester()
967       .mr(1)
968       .nr(8)
969       .kr(2)
970       .sr(1)
971       .m(1)
972       .n(8)
973       .k(16)
974       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975   }
976 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cn)977   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cn) {
978     TEST_REQUIRES_ARM_NEON;
979     GemmMicrokernelTester()
980       .mr(1)
981       .nr(8)
982       .kr(2)
983       .sr(1)
984       .m(1)
985       .n(8)
986       .k(16)
987       .cn_stride(11)
988       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989   }
990 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile)991   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
992     TEST_REQUIRES_ARM_NEON;
993     for (uint32_t n = 1; n <= 8; n++) {
994       for (uint32_t m = 1; m <= 1; m++) {
995         GemmMicrokernelTester()
996           .mr(1)
997           .nr(8)
998           .kr(2)
999           .sr(1)
1000           .m(m)
1001           .n(n)
1002           .k(16)
1003           .iterations(1)
1004           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005       }
1006     }
1007   }
1008 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)1009   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (uint32_t m = 1; m <= 1; m++) {
1012       GemmMicrokernelTester()
1013         .mr(1)
1014         .nr(8)
1015         .kr(2)
1016         .sr(1)
1017         .m(m)
1018         .n(8)
1019         .k(16)
1020         .iterations(1)
1021         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022     }
1023   }
1024 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)1025   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
1026     TEST_REQUIRES_ARM_NEON;
1027     for (uint32_t n = 1; n <= 8; n++) {
1028       GemmMicrokernelTester()
1029         .mr(1)
1030         .nr(8)
1031         .kr(2)
1032         .sr(1)
1033         .m(1)
1034         .n(n)
1035         .k(16)
1036         .iterations(1)
1037         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038     }
1039   }
1040 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16)1041   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16) {
1042     TEST_REQUIRES_ARM_NEON;
1043     for (size_t k = 1; k < 16; k++) {
1044       GemmMicrokernelTester()
1045         .mr(1)
1046         .nr(8)
1047         .kr(2)
1048         .sr(1)
1049         .m(1)
1050         .n(8)
1051         .k(k)
1052         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053     }
1054   }
1055 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16_subtile)1056   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
1057     TEST_REQUIRES_ARM_NEON;
1058     for (size_t k = 1; k < 16; k++) {
1059       for (uint32_t n = 1; n <= 8; n++) {
1060         for (uint32_t m = 1; m <= 1; m++) {
1061           GemmMicrokernelTester()
1062             .mr(1)
1063             .nr(8)
1064             .kr(2)
1065             .sr(1)
1066             .m(m)
1067             .n(n)
1068             .k(k)
1069             .iterations(1)
1070             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071         }
1072       }
1073     }
1074   }
1075 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16)1076   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16) {
1077     TEST_REQUIRES_ARM_NEON;
1078     for (size_t k = 17; k < 32; k++) {
1079       GemmMicrokernelTester()
1080         .mr(1)
1081         .nr(8)
1082         .kr(2)
1083         .sr(1)
1084         .m(1)
1085         .n(8)
1086         .k(k)
1087         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088     }
1089   }
1090 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16_subtile)1091   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
1092     TEST_REQUIRES_ARM_NEON;
1093     for (size_t k = 17; k < 32; k++) {
1094       for (uint32_t n = 1; n <= 8; n++) {
1095         for (uint32_t m = 1; m <= 1; m++) {
1096           GemmMicrokernelTester()
1097             .mr(1)
1098             .nr(8)
1099             .kr(2)
1100             .sr(1)
1101             .m(m)
1102             .n(n)
1103             .k(k)
1104             .iterations(1)
1105             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106         }
1107       }
1108     }
1109   }
1110 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16)1111   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16) {
1112     TEST_REQUIRES_ARM_NEON;
1113     for (size_t k = 32; k <= 160; k += 16) {
1114       GemmMicrokernelTester()
1115         .mr(1)
1116         .nr(8)
1117         .kr(2)
1118         .sr(1)
1119         .m(1)
1120         .n(8)
1121         .k(k)
1122         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123     }
1124   }
1125 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16_subtile)1126   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
1127     TEST_REQUIRES_ARM_NEON;
1128     for (size_t k = 32; k <= 160; k += 16) {
1129       for (uint32_t n = 1; n <= 8; n++) {
1130         for (uint32_t m = 1; m <= 1; m++) {
1131           GemmMicrokernelTester()
1132             .mr(1)
1133             .nr(8)
1134             .kr(2)
1135             .sr(1)
1136             .m(m)
1137             .n(n)
1138             .k(k)
1139             .iterations(1)
1140             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141         }
1142       }
1143     }
1144   }
1145 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8)1146   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8) {
1147     TEST_REQUIRES_ARM_NEON;
1148     for (uint32_t n = 9; n < 16; n++) {
1149       for (size_t k = 1; k <= 80; k += 17) {
1150         GemmMicrokernelTester()
1151           .mr(1)
1152           .nr(8)
1153           .kr(2)
1154           .sr(1)
1155           .m(1)
1156           .n(n)
1157           .k(k)
1158           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159       }
1160     }
1161   }
1162 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)1163   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
1164     TEST_REQUIRES_ARM_NEON;
1165     for (uint32_t n = 9; n < 16; n++) {
1166       for (size_t k = 1; k <= 80; k += 17) {
1167         GemmMicrokernelTester()
1168           .mr(1)
1169           .nr(8)
1170           .kr(2)
1171           .sr(1)
1172           .m(1)
1173           .n(n)
1174           .k(k)
1175           .cn_stride(11)
1176           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177       }
1178     }
1179   }
1180 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_subtile)1181   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
1182     TEST_REQUIRES_ARM_NEON;
1183     for (uint32_t n = 9; n < 16; n++) {
1184       for (size_t k = 1; k <= 80; k += 17) {
1185         for (uint32_t m = 1; m <= 1; m++) {
1186           GemmMicrokernelTester()
1187             .mr(1)
1188             .nr(8)
1189             .kr(2)
1190             .sr(1)
1191             .m(m)
1192             .n(n)
1193             .k(k)
1194             .iterations(1)
1195             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196         }
1197       }
1198     }
1199   }
1200 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8)1201   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8) {
1202     TEST_REQUIRES_ARM_NEON;
1203     for (uint32_t n = 16; n <= 24; n += 8) {
1204       for (size_t k = 1; k <= 80; k += 17) {
1205         GemmMicrokernelTester()
1206           .mr(1)
1207           .nr(8)
1208           .kr(2)
1209           .sr(1)
1210           .m(1)
1211           .n(n)
1212           .k(k)
1213           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214       }
1215     }
1216   }
1217 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)1218   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
1219     TEST_REQUIRES_ARM_NEON;
1220     for (uint32_t n = 16; n <= 24; n += 8) {
1221       for (size_t k = 1; k <= 80; k += 17) {
1222         GemmMicrokernelTester()
1223           .mr(1)
1224           .nr(8)
1225           .kr(2)
1226           .sr(1)
1227           .m(1)
1228           .n(n)
1229           .k(k)
1230           .cn_stride(11)
1231           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232       }
1233     }
1234   }
1235 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_subtile)1236   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
1237     TEST_REQUIRES_ARM_NEON;
1238     for (uint32_t n = 16; n <= 24; n += 8) {
1239       for (size_t k = 1; k <= 80; k += 17) {
1240         for (uint32_t m = 1; m <= 1; m++) {
1241           GemmMicrokernelTester()
1242             .mr(1)
1243             .nr(8)
1244             .kr(2)
1245             .sr(1)
1246             .m(m)
1247             .n(n)
1248             .k(k)
1249             .iterations(1)
1250             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251         }
1252       }
1253     }
1254   }
1255 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel)1256   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel) {
1257     TEST_REQUIRES_ARM_NEON;
1258     for (size_t k = 1; k <= 80; k += 17) {
1259       GemmMicrokernelTester()
1260         .mr(1)
1261         .nr(8)
1262         .kr(2)
1263         .sr(1)
1264         .m(1)
1265         .n(8)
1266         .k(k)
1267         .ks(3)
1268         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269     }
1270   }
1271 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel_subtile)1272   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
1273     TEST_REQUIRES_ARM_NEON;
1274     for (size_t k = 1; k <= 80; k += 17) {
1275       for (uint32_t n = 1; n <= 8; n++) {
1276         for (uint32_t m = 1; m <= 1; m++) {
1277           GemmMicrokernelTester()
1278             .mr(1)
1279             .nr(8)
1280             .kr(2)
1281             .sr(1)
1282             .m(m)
1283             .n(n)
1284             .k(k)
1285             .ks(3)
1286             .iterations(1)
1287             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288         }
1289       }
1290     }
1291   }
1292 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)1293   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
1294     TEST_REQUIRES_ARM_NEON;
1295     for (uint32_t n = 9; n < 16; n++) {
1296       for (size_t k = 1; k <= 80; k += 17) {
1297         GemmMicrokernelTester()
1298           .mr(1)
1299           .nr(8)
1300           .kr(2)
1301           .sr(1)
1302           .m(1)
1303           .n(n)
1304           .k(k)
1305           .ks(3)
1306           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307       }
1308     }
1309   }
1310 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)1311   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
1312     TEST_REQUIRES_ARM_NEON;
1313     for (uint32_t n = 16; n <= 24; n += 8) {
1314       for (size_t k = 1; k <= 80; k += 17) {
1315         GemmMicrokernelTester()
1316           .mr(1)
1317           .nr(8)
1318           .kr(2)
1319           .sr(1)
1320           .m(1)
1321           .n(n)
1322           .k(k)
1323           .ks(3)
1324           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325       }
1326     }
1327   }
1328 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm_subtile)1329   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
1330     TEST_REQUIRES_ARM_NEON;
1331     for (size_t k = 1; k <= 80; k += 17) {
1332       for (uint32_t n = 1; n <= 8; n++) {
1333         for (uint32_t m = 1; m <= 1; m++) {
1334           GemmMicrokernelTester()
1335             .mr(1)
1336             .nr(8)
1337             .kr(2)
1338             .sr(1)
1339             .m(m)
1340             .n(n)
1341             .k(k)
1342             .cm_stride(11)
1343             .iterations(1)
1344             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345         }
1346       }
1347     }
1348   }
1349 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,a_offset)1350   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, a_offset) {
1351     TEST_REQUIRES_ARM_NEON;
1352     for (size_t k = 1; k <= 80; k += 17) {
1353       GemmMicrokernelTester()
1354         .mr(1)
1355         .nr(8)
1356         .kr(2)
1357         .sr(1)
1358         .m(1)
1359         .n(8)
1360         .k(k)
1361         .ks(3)
1362         .a_offset(83)
1363         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364     }
1365   }
1366 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,zero)1367   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, zero) {
1368     TEST_REQUIRES_ARM_NEON;
1369     for (size_t k = 1; k <= 80; k += 17) {
1370       for (uint32_t mz = 0; mz < 1; mz++) {
1371         GemmMicrokernelTester()
1372           .mr(1)
1373           .nr(8)
1374           .kr(2)
1375           .sr(1)
1376           .m(1)
1377           .n(8)
1378           .k(k)
1379           .ks(3)
1380           .a_offset(83)
1381           .zero_index(mz)
1382           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383       }
1384     }
1385   }
1386 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmin)1387   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmin) {
1388     TEST_REQUIRES_ARM_NEON;
1389     GemmMicrokernelTester()
1390       .mr(1)
1391       .nr(8)
1392       .kr(2)
1393       .sr(1)
1394       .m(1)
1395       .n(8)
1396       .k(16)
1397       .qmin(128)
1398       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399   }
1400 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmax)1401   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmax) {
1402     TEST_REQUIRES_ARM_NEON;
1403     GemmMicrokernelTester()
1404       .mr(1)
1405       .nr(8)
1406       .kr(2)
1407       .sr(1)
1408       .m(1)
1409       .n(8)
1410       .k(16)
1411       .qmax(128)
1412       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413   }
1414 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm)1415   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm) {
1416     TEST_REQUIRES_ARM_NEON;
1417     GemmMicrokernelTester()
1418       .mr(1)
1419       .nr(8)
1420       .kr(2)
1421       .sr(1)
1422       .m(1)
1423       .n(8)
1424       .k(16)
1425       .cm_stride(11)
1426       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427   }
1428 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429 
1430 
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16)1432   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
1433     TEST_REQUIRES_ARM_NEON;
1434     GemmMicrokernelTester()
1435       .mr(1)
1436       .nr(8)
1437       .kr(2)
1438       .sr(1)
1439       .m(1)
1440       .n(8)
1441       .k(16)
1442       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1443   }
1444 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cn)1445   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
1446     TEST_REQUIRES_ARM_NEON;
1447     GemmMicrokernelTester()
1448       .mr(1)
1449       .nr(8)
1450       .kr(2)
1451       .sr(1)
1452       .m(1)
1453       .n(8)
1454       .k(16)
1455       .cn_stride(11)
1456       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1457   }
1458 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)1459   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
1460     TEST_REQUIRES_ARM_NEON;
1461     for (uint32_t n = 1; n <= 8; n++) {
1462       for (uint32_t m = 1; m <= 1; m++) {
1463         GemmMicrokernelTester()
1464           .mr(1)
1465           .nr(8)
1466           .kr(2)
1467           .sr(1)
1468           .m(m)
1469           .n(n)
1470           .k(16)
1471           .iterations(1)
1472           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1473       }
1474     }
1475   }
1476 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)1477   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
1478     TEST_REQUIRES_ARM_NEON;
1479     for (uint32_t m = 1; m <= 1; m++) {
1480       GemmMicrokernelTester()
1481         .mr(1)
1482         .nr(8)
1483         .kr(2)
1484         .sr(1)
1485         .m(m)
1486         .n(8)
1487         .k(16)
1488         .iterations(1)
1489         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1490     }
1491   }
1492 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)1493   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
1494     TEST_REQUIRES_ARM_NEON;
1495     for (uint32_t n = 1; n <= 8; n++) {
1496       GemmMicrokernelTester()
1497         .mr(1)
1498         .nr(8)
1499         .kr(2)
1500         .sr(1)
1501         .m(1)
1502         .n(n)
1503         .k(16)
1504         .iterations(1)
1505         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1506     }
1507   }
1508 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16)1509   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
1510     TEST_REQUIRES_ARM_NEON;
1511     for (size_t k = 1; k < 16; k++) {
1512       GemmMicrokernelTester()
1513         .mr(1)
1514         .nr(8)
1515         .kr(2)
1516         .sr(1)
1517         .m(1)
1518         .n(8)
1519         .k(k)
1520         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1521     }
1522   }
1523 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)1524   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
1525     TEST_REQUIRES_ARM_NEON;
1526     for (size_t k = 1; k < 16; k++) {
1527       for (uint32_t n = 1; n <= 8; n++) {
1528         for (uint32_t m = 1; m <= 1; m++) {
1529           GemmMicrokernelTester()
1530             .mr(1)
1531             .nr(8)
1532             .kr(2)
1533             .sr(1)
1534             .m(m)
1535             .n(n)
1536             .k(k)
1537             .iterations(1)
1538             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1539         }
1540       }
1541     }
1542   }
1543 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16)1544   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
1545     TEST_REQUIRES_ARM_NEON;
1546     for (size_t k = 17; k < 32; k++) {
1547       GemmMicrokernelTester()
1548         .mr(1)
1549         .nr(8)
1550         .kr(2)
1551         .sr(1)
1552         .m(1)
1553         .n(8)
1554         .k(k)
1555         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1556     }
1557   }
1558 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)1559   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
1560     TEST_REQUIRES_ARM_NEON;
1561     for (size_t k = 17; k < 32; k++) {
1562       for (uint32_t n = 1; n <= 8; n++) {
1563         for (uint32_t m = 1; m <= 1; m++) {
1564           GemmMicrokernelTester()
1565             .mr(1)
1566             .nr(8)
1567             .kr(2)
1568             .sr(1)
1569             .m(m)
1570             .n(n)
1571             .k(k)
1572             .iterations(1)
1573             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1574         }
1575       }
1576     }
1577   }
1578 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16)1579   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
1580     TEST_REQUIRES_ARM_NEON;
1581     for (size_t k = 32; k <= 160; k += 16) {
1582       GemmMicrokernelTester()
1583         .mr(1)
1584         .nr(8)
1585         .kr(2)
1586         .sr(1)
1587         .m(1)
1588         .n(8)
1589         .k(k)
1590         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1591     }
1592   }
1593 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)1594   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
1595     TEST_REQUIRES_ARM_NEON;
1596     for (size_t k = 32; k <= 160; k += 16) {
1597       for (uint32_t n = 1; n <= 8; n++) {
1598         for (uint32_t m = 1; m <= 1; m++) {
1599           GemmMicrokernelTester()
1600             .mr(1)
1601             .nr(8)
1602             .kr(2)
1603             .sr(1)
1604             .m(m)
1605             .n(n)
1606             .k(k)
1607             .iterations(1)
1608             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1609         }
1610       }
1611     }
1612   }
1613 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8)1614   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
1615     TEST_REQUIRES_ARM_NEON;
1616     for (uint32_t n = 9; n < 16; n++) {
1617       for (size_t k = 1; k <= 80; k += 17) {
1618         GemmMicrokernelTester()
1619           .mr(1)
1620           .nr(8)
1621           .kr(2)
1622           .sr(1)
1623           .m(1)
1624           .n(n)
1625           .k(k)
1626           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1627       }
1628     }
1629   }
1630 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)1631   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
1632     TEST_REQUIRES_ARM_NEON;
1633     for (uint32_t n = 9; n < 16; n++) {
1634       for (size_t k = 1; k <= 80; k += 17) {
1635         GemmMicrokernelTester()
1636           .mr(1)
1637           .nr(8)
1638           .kr(2)
1639           .sr(1)
1640           .m(1)
1641           .n(n)
1642           .k(k)
1643           .cn_stride(11)
1644           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1645       }
1646     }
1647   }
1648 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)1649   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
1650     TEST_REQUIRES_ARM_NEON;
1651     for (uint32_t n = 9; n < 16; n++) {
1652       for (size_t k = 1; k <= 80; k += 17) {
1653         for (uint32_t m = 1; m <= 1; m++) {
1654           GemmMicrokernelTester()
1655             .mr(1)
1656             .nr(8)
1657             .kr(2)
1658             .sr(1)
1659             .m(m)
1660             .n(n)
1661             .k(k)
1662             .iterations(1)
1663             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1664         }
1665       }
1666     }
1667   }
1668 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8)1669   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
1670     TEST_REQUIRES_ARM_NEON;
1671     for (uint32_t n = 16; n <= 24; n += 8) {
1672       for (size_t k = 1; k <= 80; k += 17) {
1673         GemmMicrokernelTester()
1674           .mr(1)
1675           .nr(8)
1676           .kr(2)
1677           .sr(1)
1678           .m(1)
1679           .n(n)
1680           .k(k)
1681           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1682       }
1683     }
1684   }
1685 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)1686   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
1687     TEST_REQUIRES_ARM_NEON;
1688     for (uint32_t n = 16; n <= 24; n += 8) {
1689       for (size_t k = 1; k <= 80; k += 17) {
1690         GemmMicrokernelTester()
1691           .mr(1)
1692           .nr(8)
1693           .kr(2)
1694           .sr(1)
1695           .m(1)
1696           .n(n)
1697           .k(k)
1698           .cn_stride(11)
1699           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1700       }
1701     }
1702   }
1703 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)1704   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
1705     TEST_REQUIRES_ARM_NEON;
1706     for (uint32_t n = 16; n <= 24; n += 8) {
1707       for (size_t k = 1; k <= 80; k += 17) {
1708         for (uint32_t m = 1; m <= 1; m++) {
1709           GemmMicrokernelTester()
1710             .mr(1)
1711             .nr(8)
1712             .kr(2)
1713             .sr(1)
1714             .m(m)
1715             .n(n)
1716             .k(k)
1717             .iterations(1)
1718             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1719         }
1720       }
1721     }
1722   }
1723 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel)1724   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel) {
1725     TEST_REQUIRES_ARM_NEON;
1726     for (size_t k = 1; k <= 80; k += 17) {
1727       GemmMicrokernelTester()
1728         .mr(1)
1729         .nr(8)
1730         .kr(2)
1731         .sr(1)
1732         .m(1)
1733         .n(8)
1734         .k(k)
1735         .ks(3)
1736         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1737     }
1738   }
1739 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel_subtile)1740   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
1741     TEST_REQUIRES_ARM_NEON;
1742     for (size_t k = 1; k <= 80; k += 17) {
1743       for (uint32_t n = 1; n <= 8; n++) {
1744         for (uint32_t m = 1; m <= 1; m++) {
1745           GemmMicrokernelTester()
1746             .mr(1)
1747             .nr(8)
1748             .kr(2)
1749             .sr(1)
1750             .m(m)
1751             .n(n)
1752             .k(k)
1753             .ks(3)
1754             .iterations(1)
1755             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)1761   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
1762     TEST_REQUIRES_ARM_NEON;
1763     for (uint32_t n = 9; n < 16; n++) {
1764       for (size_t k = 1; k <= 80; k += 17) {
1765         GemmMicrokernelTester()
1766           .mr(1)
1767           .nr(8)
1768           .kr(2)
1769           .sr(1)
1770           .m(1)
1771           .n(n)
1772           .k(k)
1773           .ks(3)
1774           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1775       }
1776     }
1777   }
1778 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)1779   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
1780     TEST_REQUIRES_ARM_NEON;
1781     for (uint32_t n = 16; n <= 24; n += 8) {
1782       for (size_t k = 1; k <= 80; k += 17) {
1783         GemmMicrokernelTester()
1784           .mr(1)
1785           .nr(8)
1786           .kr(2)
1787           .sr(1)
1788           .m(1)
1789           .n(n)
1790           .k(k)
1791           .ks(3)
1792           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1793       }
1794     }
1795   }
1796 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)1797   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
1798     TEST_REQUIRES_ARM_NEON;
1799     for (size_t k = 1; k <= 80; k += 17) {
1800       for (uint32_t n = 1; n <= 8; n++) {
1801         for (uint32_t m = 1; m <= 1; m++) {
1802           GemmMicrokernelTester()
1803             .mr(1)
1804             .nr(8)
1805             .kr(2)
1806             .sr(1)
1807             .m(m)
1808             .n(n)
1809             .k(k)
1810             .cm_stride(11)
1811             .iterations(1)
1812             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1813         }
1814       }
1815     }
1816   }
1817 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,a_offset)1818   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, a_offset) {
1819     TEST_REQUIRES_ARM_NEON;
1820     for (size_t k = 1; k <= 80; k += 17) {
1821       GemmMicrokernelTester()
1822         .mr(1)
1823         .nr(8)
1824         .kr(2)
1825         .sr(1)
1826         .m(1)
1827         .n(8)
1828         .k(k)
1829         .ks(3)
1830         .a_offset(83)
1831         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1832     }
1833   }
1834 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,zero)1835   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, zero) {
1836     TEST_REQUIRES_ARM_NEON;
1837     for (size_t k = 1; k <= 80; k += 17) {
1838       for (uint32_t mz = 0; mz < 1; mz++) {
1839         GemmMicrokernelTester()
1840           .mr(1)
1841           .nr(8)
1842           .kr(2)
1843           .sr(1)
1844           .m(1)
1845           .n(8)
1846           .k(k)
1847           .ks(3)
1848           .a_offset(83)
1849           .zero_index(mz)
1850           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851       }
1852     }
1853   }
1854 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmin)1855   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
1856     TEST_REQUIRES_ARM_NEON;
1857     GemmMicrokernelTester()
1858       .mr(1)
1859       .nr(8)
1860       .kr(2)
1861       .sr(1)
1862       .m(1)
1863       .n(8)
1864       .k(16)
1865       .qmin(128)
1866       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1867   }
1868 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmax)1869   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
1870     TEST_REQUIRES_ARM_NEON;
1871     GemmMicrokernelTester()
1872       .mr(1)
1873       .nr(8)
1874       .kr(2)
1875       .sr(1)
1876       .m(1)
1877       .n(8)
1878       .k(16)
1879       .qmax(128)
1880       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1881   }
1882 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm)1883   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
1884     TEST_REQUIRES_ARM_NEON;
1885     GemmMicrokernelTester()
1886       .mr(1)
1887       .nr(8)
1888       .kr(2)
1889       .sr(1)
1890       .m(1)
1891       .n(8)
1892       .k(16)
1893       .cm_stride(11)
1894       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1895   }
1896 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1897 
1898 
1899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16)1900   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
1901     TEST_REQUIRES_ARM_NEON;
1902     GemmMicrokernelTester()
1903       .mr(1)
1904       .nr(8)
1905       .kr(2)
1906       .sr(1)
1907       .m(1)
1908       .n(8)
1909       .k(16)
1910       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1911   }
1912 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cn)1913   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
1914     TEST_REQUIRES_ARM_NEON;
1915     GemmMicrokernelTester()
1916       .mr(1)
1917       .nr(8)
1918       .kr(2)
1919       .sr(1)
1920       .m(1)
1921       .n(8)
1922       .k(16)
1923       .cn_stride(11)
1924       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1925   }
1926 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)1927   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
1928     TEST_REQUIRES_ARM_NEON;
1929     for (uint32_t n = 1; n <= 8; n++) {
1930       for (uint32_t m = 1; m <= 1; m++) {
1931         GemmMicrokernelTester()
1932           .mr(1)
1933           .nr(8)
1934           .kr(2)
1935           .sr(1)
1936           .m(m)
1937           .n(n)
1938           .k(16)
1939           .iterations(1)
1940           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1941       }
1942     }
1943   }
1944 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)1945   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
1946     TEST_REQUIRES_ARM_NEON;
1947     for (uint32_t m = 1; m <= 1; m++) {
1948       GemmMicrokernelTester()
1949         .mr(1)
1950         .nr(8)
1951         .kr(2)
1952         .sr(1)
1953         .m(m)
1954         .n(8)
1955         .k(16)
1956         .iterations(1)
1957         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1958     }
1959   }
1960 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)1961   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
1962     TEST_REQUIRES_ARM_NEON;
1963     for (uint32_t n = 1; n <= 8; n++) {
1964       GemmMicrokernelTester()
1965         .mr(1)
1966         .nr(8)
1967         .kr(2)
1968         .sr(1)
1969         .m(1)
1970         .n(n)
1971         .k(16)
1972         .iterations(1)
1973         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1974     }
1975   }
1976 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16)1977   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
1978     TEST_REQUIRES_ARM_NEON;
1979     for (size_t k = 1; k < 16; k++) {
1980       GemmMicrokernelTester()
1981         .mr(1)
1982         .nr(8)
1983         .kr(2)
1984         .sr(1)
1985         .m(1)
1986         .n(8)
1987         .k(k)
1988         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1989     }
1990   }
1991 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)1992   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
1993     TEST_REQUIRES_ARM_NEON;
1994     for (size_t k = 1; k < 16; k++) {
1995       for (uint32_t n = 1; n <= 8; n++) {
1996         for (uint32_t m = 1; m <= 1; m++) {
1997           GemmMicrokernelTester()
1998             .mr(1)
1999             .nr(8)
2000             .kr(2)
2001             .sr(1)
2002             .m(m)
2003             .n(n)
2004             .k(k)
2005             .iterations(1)
2006             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2007         }
2008       }
2009     }
2010   }
2011 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16)2012   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
2013     TEST_REQUIRES_ARM_NEON;
2014     for (size_t k = 17; k < 32; k++) {
2015       GemmMicrokernelTester()
2016         .mr(1)
2017         .nr(8)
2018         .kr(2)
2019         .sr(1)
2020         .m(1)
2021         .n(8)
2022         .k(k)
2023         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2024     }
2025   }
2026 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)2027   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
2028     TEST_REQUIRES_ARM_NEON;
2029     for (size_t k = 17; k < 32; k++) {
2030       for (uint32_t n = 1; n <= 8; n++) {
2031         for (uint32_t m = 1; m <= 1; m++) {
2032           GemmMicrokernelTester()
2033             .mr(1)
2034             .nr(8)
2035             .kr(2)
2036             .sr(1)
2037             .m(m)
2038             .n(n)
2039             .k(k)
2040             .iterations(1)
2041             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2042         }
2043       }
2044     }
2045   }
2046 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16)2047   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
2048     TEST_REQUIRES_ARM_NEON;
2049     for (size_t k = 32; k <= 160; k += 16) {
2050       GemmMicrokernelTester()
2051         .mr(1)
2052         .nr(8)
2053         .kr(2)
2054         .sr(1)
2055         .m(1)
2056         .n(8)
2057         .k(k)
2058         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2059     }
2060   }
2061 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)2062   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
2063     TEST_REQUIRES_ARM_NEON;
2064     for (size_t k = 32; k <= 160; k += 16) {
2065       for (uint32_t n = 1; n <= 8; n++) {
2066         for (uint32_t m = 1; m <= 1; m++) {
2067           GemmMicrokernelTester()
2068             .mr(1)
2069             .nr(8)
2070             .kr(2)
2071             .sr(1)
2072             .m(m)
2073             .n(n)
2074             .k(k)
2075             .iterations(1)
2076             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2077         }
2078       }
2079     }
2080   }
2081 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8)2082   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
2083     TEST_REQUIRES_ARM_NEON;
2084     for (uint32_t n = 9; n < 16; n++) {
2085       for (size_t k = 1; k <= 80; k += 17) {
2086         GemmMicrokernelTester()
2087           .mr(1)
2088           .nr(8)
2089           .kr(2)
2090           .sr(1)
2091           .m(1)
2092           .n(n)
2093           .k(k)
2094           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2095       }
2096     }
2097   }
2098 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)2099   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
2100     TEST_REQUIRES_ARM_NEON;
2101     for (uint32_t n = 9; n < 16; n++) {
2102       for (size_t k = 1; k <= 80; k += 17) {
2103         GemmMicrokernelTester()
2104           .mr(1)
2105           .nr(8)
2106           .kr(2)
2107           .sr(1)
2108           .m(1)
2109           .n(n)
2110           .k(k)
2111           .cn_stride(11)
2112           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2113       }
2114     }
2115   }
2116 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)2117   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
2118     TEST_REQUIRES_ARM_NEON;
2119     for (uint32_t n = 9; n < 16; n++) {
2120       for (size_t k = 1; k <= 80; k += 17) {
2121         for (uint32_t m = 1; m <= 1; m++) {
2122           GemmMicrokernelTester()
2123             .mr(1)
2124             .nr(8)
2125             .kr(2)
2126             .sr(1)
2127             .m(m)
2128             .n(n)
2129             .k(k)
2130             .iterations(1)
2131             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2132         }
2133       }
2134     }
2135   }
2136 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8)2137   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
2138     TEST_REQUIRES_ARM_NEON;
2139     for (uint32_t n = 16; n <= 24; n += 8) {
2140       for (size_t k = 1; k <= 80; k += 17) {
2141         GemmMicrokernelTester()
2142           .mr(1)
2143           .nr(8)
2144           .kr(2)
2145           .sr(1)
2146           .m(1)
2147           .n(n)
2148           .k(k)
2149           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2150       }
2151     }
2152   }
2153 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)2154   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
2155     TEST_REQUIRES_ARM_NEON;
2156     for (uint32_t n = 16; n <= 24; n += 8) {
2157       for (size_t k = 1; k <= 80; k += 17) {
2158         GemmMicrokernelTester()
2159           .mr(1)
2160           .nr(8)
2161           .kr(2)
2162           .sr(1)
2163           .m(1)
2164           .n(n)
2165           .k(k)
2166           .cn_stride(11)
2167           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2168       }
2169     }
2170   }
2171 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)2172   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
2173     TEST_REQUIRES_ARM_NEON;
2174     for (uint32_t n = 16; n <= 24; n += 8) {
2175       for (size_t k = 1; k <= 80; k += 17) {
2176         for (uint32_t m = 1; m <= 1; m++) {
2177           GemmMicrokernelTester()
2178             .mr(1)
2179             .nr(8)
2180             .kr(2)
2181             .sr(1)
2182             .m(m)
2183             .n(n)
2184             .k(k)
2185             .iterations(1)
2186             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2187         }
2188       }
2189     }
2190   }
2191 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel)2192   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel) {
2193     TEST_REQUIRES_ARM_NEON;
2194     for (size_t k = 1; k <= 80; k += 17) {
2195       GemmMicrokernelTester()
2196         .mr(1)
2197         .nr(8)
2198         .kr(2)
2199         .sr(1)
2200         .m(1)
2201         .n(8)
2202         .k(k)
2203         .ks(3)
2204         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2205     }
2206   }
2207 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel_subtile)2208   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
2209     TEST_REQUIRES_ARM_NEON;
2210     for (size_t k = 1; k <= 80; k += 17) {
2211       for (uint32_t n = 1; n <= 8; n++) {
2212         for (uint32_t m = 1; m <= 1; m++) {
2213           GemmMicrokernelTester()
2214             .mr(1)
2215             .nr(8)
2216             .kr(2)
2217             .sr(1)
2218             .m(m)
2219             .n(n)
2220             .k(k)
2221             .ks(3)
2222             .iterations(1)
2223             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2224         }
2225       }
2226     }
2227   }
2228 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)2229   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
2230     TEST_REQUIRES_ARM_NEON;
2231     for (uint32_t n = 9; n < 16; n++) {
2232       for (size_t k = 1; k <= 80; k += 17) {
2233         GemmMicrokernelTester()
2234           .mr(1)
2235           .nr(8)
2236           .kr(2)
2237           .sr(1)
2238           .m(1)
2239           .n(n)
2240           .k(k)
2241           .ks(3)
2242           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2243       }
2244     }
2245   }
2246 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)2247   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
2248     TEST_REQUIRES_ARM_NEON;
2249     for (uint32_t n = 16; n <= 24; n += 8) {
2250       for (size_t k = 1; k <= 80; k += 17) {
2251         GemmMicrokernelTester()
2252           .mr(1)
2253           .nr(8)
2254           .kr(2)
2255           .sr(1)
2256           .m(1)
2257           .n(n)
2258           .k(k)
2259           .ks(3)
2260           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2261       }
2262     }
2263   }
2264 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)2265   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
2266     TEST_REQUIRES_ARM_NEON;
2267     for (size_t k = 1; k <= 80; k += 17) {
2268       for (uint32_t n = 1; n <= 8; n++) {
2269         for (uint32_t m = 1; m <= 1; m++) {
2270           GemmMicrokernelTester()
2271             .mr(1)
2272             .nr(8)
2273             .kr(2)
2274             .sr(1)
2275             .m(m)
2276             .n(n)
2277             .k(k)
2278             .cm_stride(11)
2279             .iterations(1)
2280             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2281         }
2282       }
2283     }
2284   }
2285 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,a_offset)2286   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, a_offset) {
2287     TEST_REQUIRES_ARM_NEON;
2288     for (size_t k = 1; k <= 80; k += 17) {
2289       GemmMicrokernelTester()
2290         .mr(1)
2291         .nr(8)
2292         .kr(2)
2293         .sr(1)
2294         .m(1)
2295         .n(8)
2296         .k(k)
2297         .ks(3)
2298         .a_offset(83)
2299         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2300     }
2301   }
2302 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,zero)2303   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, zero) {
2304     TEST_REQUIRES_ARM_NEON;
2305     for (size_t k = 1; k <= 80; k += 17) {
2306       for (uint32_t mz = 0; mz < 1; mz++) {
2307         GemmMicrokernelTester()
2308           .mr(1)
2309           .nr(8)
2310           .kr(2)
2311           .sr(1)
2312           .m(1)
2313           .n(8)
2314           .k(k)
2315           .ks(3)
2316           .a_offset(83)
2317           .zero_index(mz)
2318           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2319       }
2320     }
2321   }
2322 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmin)2323   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
2324     TEST_REQUIRES_ARM_NEON;
2325     GemmMicrokernelTester()
2326       .mr(1)
2327       .nr(8)
2328       .kr(2)
2329       .sr(1)
2330       .m(1)
2331       .n(8)
2332       .k(16)
2333       .qmin(128)
2334       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2335   }
2336 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmax)2337   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
2338     TEST_REQUIRES_ARM_NEON;
2339     GemmMicrokernelTester()
2340       .mr(1)
2341       .nr(8)
2342       .kr(2)
2343       .sr(1)
2344       .m(1)
2345       .n(8)
2346       .k(16)
2347       .qmax(128)
2348       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2349   }
2350 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm)2351   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
2352     TEST_REQUIRES_ARM_NEON;
2353     GemmMicrokernelTester()
2354       .mr(1)
2355       .nr(8)
2356       .kr(2)
2357       .sr(1)
2358       .m(1)
2359       .n(8)
2360       .k(16)
2361       .cm_stride(11)
2362       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2363   }
2364 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2365 
2366 
2367 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16)2368   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
2369     TEST_REQUIRES_ARM_NEON_V8;
2370     GemmMicrokernelTester()
2371       .mr(1)
2372       .nr(8)
2373       .kr(2)
2374       .sr(1)
2375       .m(1)
2376       .n(8)
2377       .k(16)
2378       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379   }
2380 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cn)2381   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
2382     TEST_REQUIRES_ARM_NEON_V8;
2383     GemmMicrokernelTester()
2384       .mr(1)
2385       .nr(8)
2386       .kr(2)
2387       .sr(1)
2388       .m(1)
2389       .n(8)
2390       .k(16)
2391       .cn_stride(11)
2392       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393   }
2394 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)2395   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
2396     TEST_REQUIRES_ARM_NEON_V8;
2397     for (uint32_t n = 1; n <= 8; n++) {
2398       for (uint32_t m = 1; m <= 1; m++) {
2399         GemmMicrokernelTester()
2400           .mr(1)
2401           .nr(8)
2402           .kr(2)
2403           .sr(1)
2404           .m(m)
2405           .n(n)
2406           .k(16)
2407           .iterations(1)
2408           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409       }
2410     }
2411   }
2412 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)2413   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
2414     TEST_REQUIRES_ARM_NEON_V8;
2415     for (uint32_t m = 1; m <= 1; m++) {
2416       GemmMicrokernelTester()
2417         .mr(1)
2418         .nr(8)
2419         .kr(2)
2420         .sr(1)
2421         .m(m)
2422         .n(8)
2423         .k(16)
2424         .iterations(1)
2425         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426     }
2427   }
2428 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)2429   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
2430     TEST_REQUIRES_ARM_NEON_V8;
2431     for (uint32_t n = 1; n <= 8; n++) {
2432       GemmMicrokernelTester()
2433         .mr(1)
2434         .nr(8)
2435         .kr(2)
2436         .sr(1)
2437         .m(1)
2438         .n(n)
2439         .k(16)
2440         .iterations(1)
2441         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442     }
2443   }
2444 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16)2445   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
2446     TEST_REQUIRES_ARM_NEON_V8;
2447     for (size_t k = 1; k < 16; k++) {
2448       GemmMicrokernelTester()
2449         .mr(1)
2450         .nr(8)
2451         .kr(2)
2452         .sr(1)
2453         .m(1)
2454         .n(8)
2455         .k(k)
2456         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457     }
2458   }
2459 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)2460   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
2461     TEST_REQUIRES_ARM_NEON_V8;
2462     for (size_t k = 1; k < 16; k++) {
2463       for (uint32_t n = 1; n <= 8; n++) {
2464         for (uint32_t m = 1; m <= 1; m++) {
2465           GemmMicrokernelTester()
2466             .mr(1)
2467             .nr(8)
2468             .kr(2)
2469             .sr(1)
2470             .m(m)
2471             .n(n)
2472             .k(k)
2473             .iterations(1)
2474             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475         }
2476       }
2477     }
2478   }
2479 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16)2480   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
2481     TEST_REQUIRES_ARM_NEON_V8;
2482     for (size_t k = 17; k < 32; k++) {
2483       GemmMicrokernelTester()
2484         .mr(1)
2485         .nr(8)
2486         .kr(2)
2487         .sr(1)
2488         .m(1)
2489         .n(8)
2490         .k(k)
2491         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492     }
2493   }
2494 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)2495   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
2496     TEST_REQUIRES_ARM_NEON_V8;
2497     for (size_t k = 17; k < 32; k++) {
2498       for (uint32_t n = 1; n <= 8; n++) {
2499         for (uint32_t m = 1; m <= 1; m++) {
2500           GemmMicrokernelTester()
2501             .mr(1)
2502             .nr(8)
2503             .kr(2)
2504             .sr(1)
2505             .m(m)
2506             .n(n)
2507             .k(k)
2508             .iterations(1)
2509             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510         }
2511       }
2512     }
2513   }
2514 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16)2515   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
2516     TEST_REQUIRES_ARM_NEON_V8;
2517     for (size_t k = 32; k <= 160; k += 16) {
2518       GemmMicrokernelTester()
2519         .mr(1)
2520         .nr(8)
2521         .kr(2)
2522         .sr(1)
2523         .m(1)
2524         .n(8)
2525         .k(k)
2526         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527     }
2528   }
2529 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)2530   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
2531     TEST_REQUIRES_ARM_NEON_V8;
2532     for (size_t k = 32; k <= 160; k += 16) {
2533       for (uint32_t n = 1; n <= 8; n++) {
2534         for (uint32_t m = 1; m <= 1; m++) {
2535           GemmMicrokernelTester()
2536             .mr(1)
2537             .nr(8)
2538             .kr(2)
2539             .sr(1)
2540             .m(m)
2541             .n(n)
2542             .k(k)
2543             .iterations(1)
2544             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545         }
2546       }
2547     }
2548   }
2549 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8)2550   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
2551     TEST_REQUIRES_ARM_NEON_V8;
2552     for (uint32_t n = 9; n < 16; n++) {
2553       for (size_t k = 1; k <= 80; k += 17) {
2554         GemmMicrokernelTester()
2555           .mr(1)
2556           .nr(8)
2557           .kr(2)
2558           .sr(1)
2559           .m(1)
2560           .n(n)
2561           .k(k)
2562           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563       }
2564     }
2565   }
2566 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)2567   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
2568     TEST_REQUIRES_ARM_NEON_V8;
2569     for (uint32_t n = 9; n < 16; n++) {
2570       for (size_t k = 1; k <= 80; k += 17) {
2571         GemmMicrokernelTester()
2572           .mr(1)
2573           .nr(8)
2574           .kr(2)
2575           .sr(1)
2576           .m(1)
2577           .n(n)
2578           .k(k)
2579           .cn_stride(11)
2580           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581       }
2582     }
2583   }
2584 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)2585   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
2586     TEST_REQUIRES_ARM_NEON_V8;
2587     for (uint32_t n = 9; n < 16; n++) {
2588       for (size_t k = 1; k <= 80; k += 17) {
2589         for (uint32_t m = 1; m <= 1; m++) {
2590           GemmMicrokernelTester()
2591             .mr(1)
2592             .nr(8)
2593             .kr(2)
2594             .sr(1)
2595             .m(m)
2596             .n(n)
2597             .k(k)
2598             .iterations(1)
2599             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600         }
2601       }
2602     }
2603   }
2604 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8)2605   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
2606     TEST_REQUIRES_ARM_NEON_V8;
2607     for (uint32_t n = 16; n <= 24; n += 8) {
2608       for (size_t k = 1; k <= 80; k += 17) {
2609         GemmMicrokernelTester()
2610           .mr(1)
2611           .nr(8)
2612           .kr(2)
2613           .sr(1)
2614           .m(1)
2615           .n(n)
2616           .k(k)
2617           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618       }
2619     }
2620   }
2621 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)2622   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
2623     TEST_REQUIRES_ARM_NEON_V8;
2624     for (uint32_t n = 16; n <= 24; n += 8) {
2625       for (size_t k = 1; k <= 80; k += 17) {
2626         GemmMicrokernelTester()
2627           .mr(1)
2628           .nr(8)
2629           .kr(2)
2630           .sr(1)
2631           .m(1)
2632           .n(n)
2633           .k(k)
2634           .cn_stride(11)
2635           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636       }
2637     }
2638   }
2639 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)2640   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
2641     TEST_REQUIRES_ARM_NEON_V8;
2642     for (uint32_t n = 16; n <= 24; n += 8) {
2643       for (size_t k = 1; k <= 80; k += 17) {
2644         for (uint32_t m = 1; m <= 1; m++) {
2645           GemmMicrokernelTester()
2646             .mr(1)
2647             .nr(8)
2648             .kr(2)
2649             .sr(1)
2650             .m(m)
2651             .n(n)
2652             .k(k)
2653             .iterations(1)
2654             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655         }
2656       }
2657     }
2658   }
2659 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel)2660   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel) {
2661     TEST_REQUIRES_ARM_NEON_V8;
2662     for (size_t k = 1; k <= 80; k += 17) {
2663       GemmMicrokernelTester()
2664         .mr(1)
2665         .nr(8)
2666         .kr(2)
2667         .sr(1)
2668         .m(1)
2669         .n(8)
2670         .k(k)
2671         .ks(3)
2672         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673     }
2674   }
2675 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel_subtile)2676   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
2677     TEST_REQUIRES_ARM_NEON_V8;
2678     for (size_t k = 1; k <= 80; k += 17) {
2679       for (uint32_t n = 1; n <= 8; n++) {
2680         for (uint32_t m = 1; m <= 1; m++) {
2681           GemmMicrokernelTester()
2682             .mr(1)
2683             .nr(8)
2684             .kr(2)
2685             .sr(1)
2686             .m(m)
2687             .n(n)
2688             .k(k)
2689             .ks(3)
2690             .iterations(1)
2691             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692         }
2693       }
2694     }
2695   }
2696 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)2697   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
2698     TEST_REQUIRES_ARM_NEON_V8;
2699     for (uint32_t n = 9; n < 16; n++) {
2700       for (size_t k = 1; k <= 80; k += 17) {
2701         GemmMicrokernelTester()
2702           .mr(1)
2703           .nr(8)
2704           .kr(2)
2705           .sr(1)
2706           .m(1)
2707           .n(n)
2708           .k(k)
2709           .ks(3)
2710           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711       }
2712     }
2713   }
2714 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_small_kernel)2715   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
2716     TEST_REQUIRES_ARM_NEON_V8;
2717     for (uint32_t n = 16; n <= 24; n += 8) {
2718       for (size_t k = 1; k <= 80; k += 17) {
2719         GemmMicrokernelTester()
2720           .mr(1)
2721           .nr(8)
2722           .kr(2)
2723           .sr(1)
2724           .m(1)
2725           .n(n)
2726           .k(k)
2727           .ks(3)
2728           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729       }
2730     }
2731   }
2732 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)2733   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
2734     TEST_REQUIRES_ARM_NEON_V8;
2735     for (size_t k = 1; k <= 80; k += 17) {
2736       for (uint32_t n = 1; n <= 8; n++) {
2737         for (uint32_t m = 1; m <= 1; m++) {
2738           GemmMicrokernelTester()
2739             .mr(1)
2740             .nr(8)
2741             .kr(2)
2742             .sr(1)
2743             .m(m)
2744             .n(n)
2745             .k(k)
2746             .cm_stride(11)
2747             .iterations(1)
2748             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749         }
2750       }
2751     }
2752   }
2753 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,a_offset)2754   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, a_offset) {
2755     TEST_REQUIRES_ARM_NEON_V8;
2756     for (size_t k = 1; k <= 80; k += 17) {
2757       GemmMicrokernelTester()
2758         .mr(1)
2759         .nr(8)
2760         .kr(2)
2761         .sr(1)
2762         .m(1)
2763         .n(8)
2764         .k(k)
2765         .ks(3)
2766         .a_offset(83)
2767         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768     }
2769   }
2770 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,zero)2771   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, zero) {
2772     TEST_REQUIRES_ARM_NEON_V8;
2773     for (size_t k = 1; k <= 80; k += 17) {
2774       for (uint32_t mz = 0; mz < 1; mz++) {
2775         GemmMicrokernelTester()
2776           .mr(1)
2777           .nr(8)
2778           .kr(2)
2779           .sr(1)
2780           .m(1)
2781           .n(8)
2782           .k(k)
2783           .ks(3)
2784           .a_offset(83)
2785           .zero_index(mz)
2786           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787       }
2788     }
2789   }
2790 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmin)2791   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
2792     TEST_REQUIRES_ARM_NEON_V8;
2793     GemmMicrokernelTester()
2794       .mr(1)
2795       .nr(8)
2796       .kr(2)
2797       .sr(1)
2798       .m(1)
2799       .n(8)
2800       .k(16)
2801       .qmin(128)
2802       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803   }
2804 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmax)2805   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
2806     TEST_REQUIRES_ARM_NEON_V8;
2807     GemmMicrokernelTester()
2808       .mr(1)
2809       .nr(8)
2810       .kr(2)
2811       .sr(1)
2812       .m(1)
2813       .n(8)
2814       .k(16)
2815       .qmax(128)
2816       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817   }
2818 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm)2819   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
2820     TEST_REQUIRES_ARM_NEON_V8;
2821     GemmMicrokernelTester()
2822       .mr(1)
2823       .nr(8)
2824       .kr(2)
2825       .sr(1)
2826       .m(1)
2827       .n(8)
2828       .k(16)
2829       .cm_stride(11)
2830       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831   }
2832 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2833 
2834 
2835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16)2836   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
2837     TEST_REQUIRES_ARM_NEON_V8;
2838     GemmMicrokernelTester()
2839       .mr(1)
2840       .nr(8)
2841       .kr(2)
2842       .sr(1)
2843       .m(1)
2844       .n(8)
2845       .k(16)
2846       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847   }
2848 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cn)2849   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
2850     TEST_REQUIRES_ARM_NEON_V8;
2851     GemmMicrokernelTester()
2852       .mr(1)
2853       .nr(8)
2854       .kr(2)
2855       .sr(1)
2856       .m(1)
2857       .n(8)
2858       .k(16)
2859       .cn_stride(11)
2860       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861   }
2862 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)2863   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
2864     TEST_REQUIRES_ARM_NEON_V8;
2865     for (uint32_t n = 1; n <= 8; n++) {
2866       for (uint32_t m = 1; m <= 1; m++) {
2867         GemmMicrokernelTester()
2868           .mr(1)
2869           .nr(8)
2870           .kr(2)
2871           .sr(1)
2872           .m(m)
2873           .n(n)
2874           .k(16)
2875           .iterations(1)
2876           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877       }
2878     }
2879   }
2880 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)2881   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
2882     TEST_REQUIRES_ARM_NEON_V8;
2883     for (uint32_t m = 1; m <= 1; m++) {
2884       GemmMicrokernelTester()
2885         .mr(1)
2886         .nr(8)
2887         .kr(2)
2888         .sr(1)
2889         .m(m)
2890         .n(8)
2891         .k(16)
2892         .iterations(1)
2893         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894     }
2895   }
2896 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)2897   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
2898     TEST_REQUIRES_ARM_NEON_V8;
2899     for (uint32_t n = 1; n <= 8; n++) {
2900       GemmMicrokernelTester()
2901         .mr(1)
2902         .nr(8)
2903         .kr(2)
2904         .sr(1)
2905         .m(1)
2906         .n(n)
2907         .k(16)
2908         .iterations(1)
2909         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910     }
2911   }
2912 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16)2913   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
2914     TEST_REQUIRES_ARM_NEON_V8;
2915     for (size_t k = 1; k < 16; k++) {
2916       GemmMicrokernelTester()
2917         .mr(1)
2918         .nr(8)
2919         .kr(2)
2920         .sr(1)
2921         .m(1)
2922         .n(8)
2923         .k(k)
2924         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925     }
2926   }
2927 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)2928   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
2929     TEST_REQUIRES_ARM_NEON_V8;
2930     for (size_t k = 1; k < 16; k++) {
2931       for (uint32_t n = 1; n <= 8; n++) {
2932         for (uint32_t m = 1; m <= 1; m++) {
2933           GemmMicrokernelTester()
2934             .mr(1)
2935             .nr(8)
2936             .kr(2)
2937             .sr(1)
2938             .m(m)
2939             .n(n)
2940             .k(k)
2941             .iterations(1)
2942             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943         }
2944       }
2945     }
2946   }
2947 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16)2948   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
2949     TEST_REQUIRES_ARM_NEON_V8;
2950     for (size_t k = 17; k < 32; k++) {
2951       GemmMicrokernelTester()
2952         .mr(1)
2953         .nr(8)
2954         .kr(2)
2955         .sr(1)
2956         .m(1)
2957         .n(8)
2958         .k(k)
2959         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960     }
2961   }
2962 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)2963   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
2964     TEST_REQUIRES_ARM_NEON_V8;
2965     for (size_t k = 17; k < 32; k++) {
2966       for (uint32_t n = 1; n <= 8; n++) {
2967         for (uint32_t m = 1; m <= 1; m++) {
2968           GemmMicrokernelTester()
2969             .mr(1)
2970             .nr(8)
2971             .kr(2)
2972             .sr(1)
2973             .m(m)
2974             .n(n)
2975             .k(k)
2976             .iterations(1)
2977             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978         }
2979       }
2980     }
2981   }
2982 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16)2983   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
2984     TEST_REQUIRES_ARM_NEON_V8;
2985     for (size_t k = 32; k <= 160; k += 16) {
2986       GemmMicrokernelTester()
2987         .mr(1)
2988         .nr(8)
2989         .kr(2)
2990         .sr(1)
2991         .m(1)
2992         .n(8)
2993         .k(k)
2994         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995     }
2996   }
2997 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)2998   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
2999     TEST_REQUIRES_ARM_NEON_V8;
3000     for (size_t k = 32; k <= 160; k += 16) {
3001       for (uint32_t n = 1; n <= 8; n++) {
3002         for (uint32_t m = 1; m <= 1; m++) {
3003           GemmMicrokernelTester()
3004             .mr(1)
3005             .nr(8)
3006             .kr(2)
3007             .sr(1)
3008             .m(m)
3009             .n(n)
3010             .k(k)
3011             .iterations(1)
3012             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013         }
3014       }
3015     }
3016   }
3017 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8)3018   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
3019     TEST_REQUIRES_ARM_NEON_V8;
3020     for (uint32_t n = 9; n < 16; n++) {
3021       for (size_t k = 1; k <= 80; k += 17) {
3022         GemmMicrokernelTester()
3023           .mr(1)
3024           .nr(8)
3025           .kr(2)
3026           .sr(1)
3027           .m(1)
3028           .n(n)
3029           .k(k)
3030           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031       }
3032     }
3033   }
3034 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)3035   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
3036     TEST_REQUIRES_ARM_NEON_V8;
3037     for (uint32_t n = 9; n < 16; n++) {
3038       for (size_t k = 1; k <= 80; k += 17) {
3039         GemmMicrokernelTester()
3040           .mr(1)
3041           .nr(8)
3042           .kr(2)
3043           .sr(1)
3044           .m(1)
3045           .n(n)
3046           .k(k)
3047           .cn_stride(11)
3048           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049       }
3050     }
3051   }
3052 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)3053   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
3054     TEST_REQUIRES_ARM_NEON_V8;
3055     for (uint32_t n = 9; n < 16; n++) {
3056       for (size_t k = 1; k <= 80; k += 17) {
3057         for (uint32_t m = 1; m <= 1; m++) {
3058           GemmMicrokernelTester()
3059             .mr(1)
3060             .nr(8)
3061             .kr(2)
3062             .sr(1)
3063             .m(m)
3064             .n(n)
3065             .k(k)
3066             .iterations(1)
3067             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068         }
3069       }
3070     }
3071   }
3072 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8)3073   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
3074     TEST_REQUIRES_ARM_NEON_V8;
3075     for (uint32_t n = 16; n <= 24; n += 8) {
3076       for (size_t k = 1; k <= 80; k += 17) {
3077         GemmMicrokernelTester()
3078           .mr(1)
3079           .nr(8)
3080           .kr(2)
3081           .sr(1)
3082           .m(1)
3083           .n(n)
3084           .k(k)
3085           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086       }
3087     }
3088   }
3089 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)3090   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
3091     TEST_REQUIRES_ARM_NEON_V8;
3092     for (uint32_t n = 16; n <= 24; n += 8) {
3093       for (size_t k = 1; k <= 80; k += 17) {
3094         GemmMicrokernelTester()
3095           .mr(1)
3096           .nr(8)
3097           .kr(2)
3098           .sr(1)
3099           .m(1)
3100           .n(n)
3101           .k(k)
3102           .cn_stride(11)
3103           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104       }
3105     }
3106   }
3107 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)3108   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
3109     TEST_REQUIRES_ARM_NEON_V8;
3110     for (uint32_t n = 16; n <= 24; n += 8) {
3111       for (size_t k = 1; k <= 80; k += 17) {
3112         for (uint32_t m = 1; m <= 1; m++) {
3113           GemmMicrokernelTester()
3114             .mr(1)
3115             .nr(8)
3116             .kr(2)
3117             .sr(1)
3118             .m(m)
3119             .n(n)
3120             .k(k)
3121             .iterations(1)
3122             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123         }
3124       }
3125     }
3126   }
3127 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel)3128   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel) {
3129     TEST_REQUIRES_ARM_NEON_V8;
3130     for (size_t k = 1; k <= 80; k += 17) {
3131       GemmMicrokernelTester()
3132         .mr(1)
3133         .nr(8)
3134         .kr(2)
3135         .sr(1)
3136         .m(1)
3137         .n(8)
3138         .k(k)
3139         .ks(3)
3140         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141     }
3142   }
3143 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel_subtile)3144   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
3145     TEST_REQUIRES_ARM_NEON_V8;
3146     for (size_t k = 1; k <= 80; k += 17) {
3147       for (uint32_t n = 1; n <= 8; n++) {
3148         for (uint32_t m = 1; m <= 1; m++) {
3149           GemmMicrokernelTester()
3150             .mr(1)
3151             .nr(8)
3152             .kr(2)
3153             .sr(1)
3154             .m(m)
3155             .n(n)
3156             .k(k)
3157             .ks(3)
3158             .iterations(1)
3159             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160         }
3161       }
3162     }
3163   }
3164 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_small_kernel)3165   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
3166     TEST_REQUIRES_ARM_NEON_V8;
3167     for (uint32_t n = 9; n < 16; n++) {
3168       for (size_t k = 1; k <= 80; k += 17) {
3169         GemmMicrokernelTester()
3170           .mr(1)
3171           .nr(8)
3172           .kr(2)
3173           .sr(1)
3174           .m(1)
3175           .n(n)
3176           .k(k)
3177           .ks(3)
3178           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179       }
3180     }
3181   }
3182 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_small_kernel)3183   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
3184     TEST_REQUIRES_ARM_NEON_V8;
3185     for (uint32_t n = 16; n <= 24; n += 8) {
3186       for (size_t k = 1; k <= 80; k += 17) {
3187         GemmMicrokernelTester()
3188           .mr(1)
3189           .nr(8)
3190           .kr(2)
3191           .sr(1)
3192           .m(1)
3193           .n(n)
3194           .k(k)
3195           .ks(3)
3196           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197       }
3198     }
3199   }
3200 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)3201   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
3202     TEST_REQUIRES_ARM_NEON_V8;
3203     for (size_t k = 1; k <= 80; k += 17) {
3204       for (uint32_t n = 1; n <= 8; n++) {
3205         for (uint32_t m = 1; m <= 1; m++) {
3206           GemmMicrokernelTester()
3207             .mr(1)
3208             .nr(8)
3209             .kr(2)
3210             .sr(1)
3211             .m(m)
3212             .n(n)
3213             .k(k)
3214             .cm_stride(11)
3215             .iterations(1)
3216             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217         }
3218       }
3219     }
3220   }
3221 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,a_offset)3222   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, a_offset) {
3223     TEST_REQUIRES_ARM_NEON_V8;
3224     for (size_t k = 1; k <= 80; k += 17) {
3225       GemmMicrokernelTester()
3226         .mr(1)
3227         .nr(8)
3228         .kr(2)
3229         .sr(1)
3230         .m(1)
3231         .n(8)
3232         .k(k)
3233         .ks(3)
3234         .a_offset(83)
3235         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236     }
3237   }
3238 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,zero)3239   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, zero) {
3240     TEST_REQUIRES_ARM_NEON_V8;
3241     for (size_t k = 1; k <= 80; k += 17) {
3242       for (uint32_t mz = 0; mz < 1; mz++) {
3243         GemmMicrokernelTester()
3244           .mr(1)
3245           .nr(8)
3246           .kr(2)
3247           .sr(1)
3248           .m(1)
3249           .n(8)
3250           .k(k)
3251           .ks(3)
3252           .a_offset(83)
3253           .zero_index(mz)
3254           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255       }
3256     }
3257   }
3258 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmin)3259   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
3260     TEST_REQUIRES_ARM_NEON_V8;
3261     GemmMicrokernelTester()
3262       .mr(1)
3263       .nr(8)
3264       .kr(2)
3265       .sr(1)
3266       .m(1)
3267       .n(8)
3268       .k(16)
3269       .qmin(128)
3270       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271   }
3272 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmax)3273   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
3274     TEST_REQUIRES_ARM_NEON_V8;
3275     GemmMicrokernelTester()
3276       .mr(1)
3277       .nr(8)
3278       .kr(2)
3279       .sr(1)
3280       .m(1)
3281       .n(8)
3282       .k(16)
3283       .qmax(128)
3284       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285   }
3286 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm)3287   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
3288     TEST_REQUIRES_ARM_NEON_V8;
3289     GemmMicrokernelTester()
3290       .mr(1)
3291       .nr(8)
3292       .kr(2)
3293       .sr(1)
3294       .m(1)
3295       .n(8)
3296       .k(16)
3297       .cm_stride(11)
3298       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299   }
3300 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3301 
3302 
3303 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16)3304   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16) {
3305     TEST_REQUIRES_ARM_NEON;
3306     GemmMicrokernelTester()
3307       .mr(1)
3308       .nr(8)
3309       .kr(2)
3310       .sr(4)
3311       .m(1)
3312       .n(8)
3313       .k(16)
3314       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3315   }
3316 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cn)3317   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cn) {
3318     TEST_REQUIRES_ARM_NEON;
3319     GemmMicrokernelTester()
3320       .mr(1)
3321       .nr(8)
3322       .kr(2)
3323       .sr(4)
3324       .m(1)
3325       .n(8)
3326       .k(16)
3327       .cn_stride(11)
3328       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3329   }
3330 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile)3331   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile) {
3332     TEST_REQUIRES_ARM_NEON;
3333     for (uint32_t n = 1; n <= 8; n++) {
3334       for (uint32_t m = 1; m <= 1; m++) {
3335         GemmMicrokernelTester()
3336           .mr(1)
3337           .nr(8)
3338           .kr(2)
3339           .sr(4)
3340           .m(m)
3341           .n(n)
3342           .k(16)
3343           .iterations(1)
3344           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3345       }
3346     }
3347   }
3348 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)3349   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
3350     TEST_REQUIRES_ARM_NEON;
3351     for (uint32_t m = 1; m <= 1; m++) {
3352       GemmMicrokernelTester()
3353         .mr(1)
3354         .nr(8)
3355         .kr(2)
3356         .sr(4)
3357         .m(m)
3358         .n(8)
3359         .k(16)
3360         .iterations(1)
3361         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3362     }
3363   }
3364 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)3365   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
3366     TEST_REQUIRES_ARM_NEON;
3367     for (uint32_t n = 1; n <= 8; n++) {
3368       GemmMicrokernelTester()
3369         .mr(1)
3370         .nr(8)
3371         .kr(2)
3372         .sr(4)
3373         .m(1)
3374         .n(n)
3375         .k(16)
3376         .iterations(1)
3377         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3378     }
3379   }
3380 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16)3381   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16) {
3382     TEST_REQUIRES_ARM_NEON;
3383     for (size_t k = 1; k < 16; k++) {
3384       GemmMicrokernelTester()
3385         .mr(1)
3386         .nr(8)
3387         .kr(2)
3388         .sr(4)
3389         .m(1)
3390         .n(8)
3391         .k(k)
3392         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3393     }
3394   }
3395 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16_subtile)3396   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_subtile) {
3397     TEST_REQUIRES_ARM_NEON;
3398     for (size_t k = 1; k < 16; k++) {
3399       for (uint32_t n = 1; n <= 8; n++) {
3400         for (uint32_t m = 1; m <= 1; m++) {
3401           GemmMicrokernelTester()
3402             .mr(1)
3403             .nr(8)
3404             .kr(2)
3405             .sr(4)
3406             .m(m)
3407             .n(n)
3408             .k(k)
3409             .iterations(1)
3410             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3411         }
3412       }
3413     }
3414   }
3415 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16)3416   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16) {
3417     TEST_REQUIRES_ARM_NEON;
3418     for (size_t k = 17; k < 32; k++) {
3419       GemmMicrokernelTester()
3420         .mr(1)
3421         .nr(8)
3422         .kr(2)
3423         .sr(4)
3424         .m(1)
3425         .n(8)
3426         .k(k)
3427         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3428     }
3429   }
3430 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16_subtile)3431   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_subtile) {
3432     TEST_REQUIRES_ARM_NEON;
3433     for (size_t k = 17; k < 32; k++) {
3434       for (uint32_t n = 1; n <= 8; n++) {
3435         for (uint32_t m = 1; m <= 1; m++) {
3436           GemmMicrokernelTester()
3437             .mr(1)
3438             .nr(8)
3439             .kr(2)
3440             .sr(4)
3441             .m(m)
3442             .n(n)
3443             .k(k)
3444             .iterations(1)
3445             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3446         }
3447       }
3448     }
3449   }
3450 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16)3451   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16) {
3452     TEST_REQUIRES_ARM_NEON;
3453     for (size_t k = 32; k <= 160; k += 16) {
3454       GemmMicrokernelTester()
3455         .mr(1)
3456         .nr(8)
3457         .kr(2)
3458         .sr(4)
3459         .m(1)
3460         .n(8)
3461         .k(k)
3462         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3463     }
3464   }
3465 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16_subtile)3466   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_subtile) {
3467     TEST_REQUIRES_ARM_NEON;
3468     for (size_t k = 32; k <= 160; k += 16) {
3469       for (uint32_t n = 1; n <= 8; n++) {
3470         for (uint32_t m = 1; m <= 1; m++) {
3471           GemmMicrokernelTester()
3472             .mr(1)
3473             .nr(8)
3474             .kr(2)
3475             .sr(4)
3476             .m(m)
3477             .n(n)
3478             .k(k)
3479             .iterations(1)
3480             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3481         }
3482       }
3483     }
3484   }
3485 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8)3486   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8) {
3487     TEST_REQUIRES_ARM_NEON;
3488     for (uint32_t n = 9; n < 16; n++) {
3489       for (size_t k = 1; k <= 80; k += 17) {
3490         GemmMicrokernelTester()
3491           .mr(1)
3492           .nr(8)
3493           .kr(2)
3494           .sr(4)
3495           .m(1)
3496           .n(n)
3497           .k(k)
3498           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3499       }
3500     }
3501   }
3502 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)3503   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
3504     TEST_REQUIRES_ARM_NEON;
3505     for (uint32_t n = 9; n < 16; n++) {
3506       for (size_t k = 1; k <= 80; k += 17) {
3507         GemmMicrokernelTester()
3508           .mr(1)
3509           .nr(8)
3510           .kr(2)
3511           .sr(4)
3512           .m(1)
3513           .n(n)
3514           .k(k)
3515           .cn_stride(11)
3516           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3517       }
3518     }
3519   }
3520 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_subtile)3521   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_subtile) {
3522     TEST_REQUIRES_ARM_NEON;
3523     for (uint32_t n = 9; n < 16; n++) {
3524       for (size_t k = 1; k <= 80; k += 17) {
3525         for (uint32_t m = 1; m <= 1; m++) {
3526           GemmMicrokernelTester()
3527             .mr(1)
3528             .nr(8)
3529             .kr(2)
3530             .sr(4)
3531             .m(m)
3532             .n(n)
3533             .k(k)
3534             .iterations(1)
3535             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3536         }
3537       }
3538     }
3539   }
3540 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8)3541   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8) {
3542     TEST_REQUIRES_ARM_NEON;
3543     for (uint32_t n = 16; n <= 24; n += 8) {
3544       for (size_t k = 1; k <= 80; k += 17) {
3545         GemmMicrokernelTester()
3546           .mr(1)
3547           .nr(8)
3548           .kr(2)
3549           .sr(4)
3550           .m(1)
3551           .n(n)
3552           .k(k)
3553           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3554       }
3555     }
3556   }
3557 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)3558   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
3559     TEST_REQUIRES_ARM_NEON;
3560     for (uint32_t n = 16; n <= 24; n += 8) {
3561       for (size_t k = 1; k <= 80; k += 17) {
3562         GemmMicrokernelTester()
3563           .mr(1)
3564           .nr(8)
3565           .kr(2)
3566           .sr(4)
3567           .m(1)
3568           .n(n)
3569           .k(k)
3570           .cn_stride(11)
3571           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3572       }
3573     }
3574   }
3575 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_subtile)3576   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_subtile) {
3577     TEST_REQUIRES_ARM_NEON;
3578     for (uint32_t n = 16; n <= 24; n += 8) {
3579       for (size_t k = 1; k <= 80; k += 17) {
3580         for (uint32_t m = 1; m <= 1; m++) {
3581           GemmMicrokernelTester()
3582             .mr(1)
3583             .nr(8)
3584             .kr(2)
3585             .sr(4)
3586             .m(m)
3587             .n(n)
3588             .k(k)
3589             .iterations(1)
3590             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3591         }
3592       }
3593     }
3594   }
3595 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel)3596   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel) {
3597     TEST_REQUIRES_ARM_NEON;
3598     for (size_t k = 1; k <= 80; k += 17) {
3599       GemmMicrokernelTester()
3600         .mr(1)
3601         .nr(8)
3602         .kr(2)
3603         .sr(4)
3604         .m(1)
3605         .n(8)
3606         .k(k)
3607         .ks(3)
3608         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3609     }
3610   }
3611 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel_subtile)3612   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel_subtile) {
3613     TEST_REQUIRES_ARM_NEON;
3614     for (size_t k = 1; k <= 80; k += 17) {
3615       for (uint32_t n = 1; n <= 8; n++) {
3616         for (uint32_t m = 1; m <= 1; m++) {
3617           GemmMicrokernelTester()
3618             .mr(1)
3619             .nr(8)
3620             .kr(2)
3621             .sr(4)
3622             .m(m)
3623             .n(n)
3624             .k(k)
3625             .ks(3)
3626             .iterations(1)
3627             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3628         }
3629       }
3630     }
3631   }
3632 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_small_kernel)3633   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
3634     TEST_REQUIRES_ARM_NEON;
3635     for (uint32_t n = 9; n < 16; n++) {
3636       for (size_t k = 1; k <= 80; k += 17) {
3637         GemmMicrokernelTester()
3638           .mr(1)
3639           .nr(8)
3640           .kr(2)
3641           .sr(4)
3642           .m(1)
3643           .n(n)
3644           .k(k)
3645           .ks(3)
3646           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3647       }
3648     }
3649   }
3650 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_small_kernel)3651   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
3652     TEST_REQUIRES_ARM_NEON;
3653     for (uint32_t n = 16; n <= 24; n += 8) {
3654       for (size_t k = 1; k <= 80; k += 17) {
3655         GemmMicrokernelTester()
3656           .mr(1)
3657           .nr(8)
3658           .kr(2)
3659           .sr(4)
3660           .m(1)
3661           .n(n)
3662           .k(k)
3663           .ks(3)
3664           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3665       }
3666     }
3667   }
3668 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm_subtile)3669   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm_subtile) {
3670     TEST_REQUIRES_ARM_NEON;
3671     for (size_t k = 1; k <= 80; k += 17) {
3672       for (uint32_t n = 1; n <= 8; n++) {
3673         for (uint32_t m = 1; m <= 1; m++) {
3674           GemmMicrokernelTester()
3675             .mr(1)
3676             .nr(8)
3677             .kr(2)
3678             .sr(4)
3679             .m(m)
3680             .n(n)
3681             .k(k)
3682             .cm_stride(11)
3683             .iterations(1)
3684             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3685         }
3686       }
3687     }
3688   }
3689 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,a_offset)3690   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, a_offset) {
3691     TEST_REQUIRES_ARM_NEON;
3692     for (size_t k = 1; k <= 80; k += 17) {
3693       GemmMicrokernelTester()
3694         .mr(1)
3695         .nr(8)
3696         .kr(2)
3697         .sr(4)
3698         .m(1)
3699         .n(8)
3700         .k(k)
3701         .ks(3)
3702         .a_offset(83)
3703         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3704     }
3705   }
3706 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,zero)3707   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, zero) {
3708     TEST_REQUIRES_ARM_NEON;
3709     for (size_t k = 1; k <= 80; k += 17) {
3710       for (uint32_t mz = 0; mz < 1; mz++) {
3711         GemmMicrokernelTester()
3712           .mr(1)
3713           .nr(8)
3714           .kr(2)
3715           .sr(4)
3716           .m(1)
3717           .n(8)
3718           .k(k)
3719           .ks(3)
3720           .a_offset(83)
3721           .zero_index(mz)
3722           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3723       }
3724     }
3725   }
3726 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmin)3727   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmin) {
3728     TEST_REQUIRES_ARM_NEON;
3729     GemmMicrokernelTester()
3730       .mr(1)
3731       .nr(8)
3732       .kr(2)
3733       .sr(4)
3734       .m(1)
3735       .n(8)
3736       .k(16)
3737       .qmin(128)
3738       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3739   }
3740 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmax)3741   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmax) {
3742     TEST_REQUIRES_ARM_NEON;
3743     GemmMicrokernelTester()
3744       .mr(1)
3745       .nr(8)
3746       .kr(2)
3747       .sr(4)
3748       .m(1)
3749       .n(8)
3750       .k(16)
3751       .qmax(128)
3752       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3753   }
3754 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm)3755   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm) {
3756     TEST_REQUIRES_ARM_NEON;
3757     GemmMicrokernelTester()
3758       .mr(1)
3759       .nr(8)
3760       .kr(2)
3761       .sr(4)
3762       .m(1)
3763       .n(8)
3764       .k(16)
3765       .cm_stride(11)
3766       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3767   }
3768 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3769 
3770 
3771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16)3772   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16) {
3773     TEST_REQUIRES_ARM_NEON_V8;
3774     GemmMicrokernelTester()
3775       .mr(1)
3776       .nr(8)
3777       .kr(2)
3778       .sr(4)
3779       .m(1)
3780       .n(8)
3781       .k(16)
3782       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3783   }
3784 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cn)3785   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cn) {
3786     TEST_REQUIRES_ARM_NEON_V8;
3787     GemmMicrokernelTester()
3788       .mr(1)
3789       .nr(8)
3790       .kr(2)
3791       .sr(4)
3792       .m(1)
3793       .n(8)
3794       .k(16)
3795       .cn_stride(11)
3796       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3797   }
3798 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile)3799   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
3800     TEST_REQUIRES_ARM_NEON_V8;
3801     for (uint32_t n = 1; n <= 8; n++) {
3802       for (uint32_t m = 1; m <= 1; m++) {
3803         GemmMicrokernelTester()
3804           .mr(1)
3805           .nr(8)
3806           .kr(2)
3807           .sr(4)
3808           .m(m)
3809           .n(n)
3810           .k(16)
3811           .iterations(1)
3812           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813       }
3814     }
3815   }
3816 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)3817   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
3818     TEST_REQUIRES_ARM_NEON_V8;
3819     for (uint32_t m = 1; m <= 1; m++) {
3820       GemmMicrokernelTester()
3821         .mr(1)
3822         .nr(8)
3823         .kr(2)
3824         .sr(4)
3825         .m(m)
3826         .n(8)
3827         .k(16)
3828         .iterations(1)
3829         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830     }
3831   }
3832 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)3833   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
3834     TEST_REQUIRES_ARM_NEON_V8;
3835     for (uint32_t n = 1; n <= 8; n++) {
3836       GemmMicrokernelTester()
3837         .mr(1)
3838         .nr(8)
3839         .kr(2)
3840         .sr(4)
3841         .m(1)
3842         .n(n)
3843         .k(16)
3844         .iterations(1)
3845         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846     }
3847   }
3848 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16)3849   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16) {
3850     TEST_REQUIRES_ARM_NEON_V8;
3851     for (size_t k = 1; k < 16; k++) {
3852       GemmMicrokernelTester()
3853         .mr(1)
3854         .nr(8)
3855         .kr(2)
3856         .sr(4)
3857         .m(1)
3858         .n(8)
3859         .k(k)
3860         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3861     }
3862   }
3863 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16_subtile)3864   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
3865     TEST_REQUIRES_ARM_NEON_V8;
3866     for (size_t k = 1; k < 16; k++) {
3867       for (uint32_t n = 1; n <= 8; n++) {
3868         for (uint32_t m = 1; m <= 1; m++) {
3869           GemmMicrokernelTester()
3870             .mr(1)
3871             .nr(8)
3872             .kr(2)
3873             .sr(4)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16)3884   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16) {
3885     TEST_REQUIRES_ARM_NEON_V8;
3886     for (size_t k = 17; k < 32; k++) {
3887       GemmMicrokernelTester()
3888         .mr(1)
3889         .nr(8)
3890         .kr(2)
3891         .sr(4)
3892         .m(1)
3893         .n(8)
3894         .k(k)
3895         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3896     }
3897   }
3898 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16_subtile)3899   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
3900     TEST_REQUIRES_ARM_NEON_V8;
3901     for (size_t k = 17; k < 32; k++) {
3902       for (uint32_t n = 1; n <= 8; n++) {
3903         for (uint32_t m = 1; m <= 1; m++) {
3904           GemmMicrokernelTester()
3905             .mr(1)
3906             .nr(8)
3907             .kr(2)
3908             .sr(4)
3909             .m(m)
3910             .n(n)
3911             .k(k)
3912             .iterations(1)
3913             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3914         }
3915       }
3916     }
3917   }
3918 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16)3919   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16) {
3920     TEST_REQUIRES_ARM_NEON_V8;
3921     for (size_t k = 32; k <= 160; k += 16) {
3922       GemmMicrokernelTester()
3923         .mr(1)
3924         .nr(8)
3925         .kr(2)
3926         .sr(4)
3927         .m(1)
3928         .n(8)
3929         .k(k)
3930         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3931     }
3932   }
3933 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16_subtile)3934   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
3935     TEST_REQUIRES_ARM_NEON_V8;
3936     for (size_t k = 32; k <= 160; k += 16) {
3937       for (uint32_t n = 1; n <= 8; n++) {
3938         for (uint32_t m = 1; m <= 1; m++) {
3939           GemmMicrokernelTester()
3940             .mr(1)
3941             .nr(8)
3942             .kr(2)
3943             .sr(4)
3944             .m(m)
3945             .n(n)
3946             .k(k)
3947             .iterations(1)
3948             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3949         }
3950       }
3951     }
3952   }
3953 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8)3954   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8) {
3955     TEST_REQUIRES_ARM_NEON_V8;
3956     for (uint32_t n = 9; n < 16; n++) {
3957       for (size_t k = 1; k <= 80; k += 17) {
3958         GemmMicrokernelTester()
3959           .mr(1)
3960           .nr(8)
3961           .kr(2)
3962           .sr(4)
3963           .m(1)
3964           .n(n)
3965           .k(k)
3966           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3967       }
3968     }
3969   }
3970 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)3971   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
3972     TEST_REQUIRES_ARM_NEON_V8;
3973     for (uint32_t n = 9; n < 16; n++) {
3974       for (size_t k = 1; k <= 80; k += 17) {
3975         GemmMicrokernelTester()
3976           .mr(1)
3977           .nr(8)
3978           .kr(2)
3979           .sr(4)
3980           .m(1)
3981           .n(n)
3982           .k(k)
3983           .cn_stride(11)
3984           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3985       }
3986     }
3987   }
3988 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_subtile)3989   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
3990     TEST_REQUIRES_ARM_NEON_V8;
3991     for (uint32_t n = 9; n < 16; n++) {
3992       for (size_t k = 1; k <= 80; k += 17) {
3993         for (uint32_t m = 1; m <= 1; m++) {
3994           GemmMicrokernelTester()
3995             .mr(1)
3996             .nr(8)
3997             .kr(2)
3998             .sr(4)
3999             .m(m)
4000             .n(n)
4001             .k(k)
4002             .iterations(1)
4003             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4004         }
4005       }
4006     }
4007   }
4008 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8)4009   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8) {
4010     TEST_REQUIRES_ARM_NEON_V8;
4011     for (uint32_t n = 16; n <= 24; n += 8) {
4012       for (size_t k = 1; k <= 80; k += 17) {
4013         GemmMicrokernelTester()
4014           .mr(1)
4015           .nr(8)
4016           .kr(2)
4017           .sr(4)
4018           .m(1)
4019           .n(n)
4020           .k(k)
4021           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4022       }
4023     }
4024   }
4025 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)4026   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
4027     TEST_REQUIRES_ARM_NEON_V8;
4028     for (uint32_t n = 16; n <= 24; n += 8) {
4029       for (size_t k = 1; k <= 80; k += 17) {
4030         GemmMicrokernelTester()
4031           .mr(1)
4032           .nr(8)
4033           .kr(2)
4034           .sr(4)
4035           .m(1)
4036           .n(n)
4037           .k(k)
4038           .cn_stride(11)
4039           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4040       }
4041     }
4042   }
4043 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_subtile)4044   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
4045     TEST_REQUIRES_ARM_NEON_V8;
4046     for (uint32_t n = 16; n <= 24; n += 8) {
4047       for (size_t k = 1; k <= 80; k += 17) {
4048         for (uint32_t m = 1; m <= 1; m++) {
4049           GemmMicrokernelTester()
4050             .mr(1)
4051             .nr(8)
4052             .kr(2)
4053             .sr(4)
4054             .m(m)
4055             .n(n)
4056             .k(k)
4057             .iterations(1)
4058             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4059         }
4060       }
4061     }
4062   }
4063 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel)4064   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel) {
4065     TEST_REQUIRES_ARM_NEON_V8;
4066     for (size_t k = 1; k <= 80; k += 17) {
4067       GemmMicrokernelTester()
4068         .mr(1)
4069         .nr(8)
4070         .kr(2)
4071         .sr(4)
4072         .m(1)
4073         .n(8)
4074         .k(k)
4075         .ks(3)
4076         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4077     }
4078   }
4079 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel_subtile)4080   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
4081     TEST_REQUIRES_ARM_NEON_V8;
4082     for (size_t k = 1; k <= 80; k += 17) {
4083       for (uint32_t n = 1; n <= 8; n++) {
4084         for (uint32_t m = 1; m <= 1; m++) {
4085           GemmMicrokernelTester()
4086             .mr(1)
4087             .nr(8)
4088             .kr(2)
4089             .sr(4)
4090             .m(m)
4091             .n(n)
4092             .k(k)
4093             .ks(3)
4094             .iterations(1)
4095             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4096         }
4097       }
4098     }
4099   }
4100 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)4101   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
4102     TEST_REQUIRES_ARM_NEON_V8;
4103     for (uint32_t n = 9; n < 16; n++) {
4104       for (size_t k = 1; k <= 80; k += 17) {
4105         GemmMicrokernelTester()
4106           .mr(1)
4107           .nr(8)
4108           .kr(2)
4109           .sr(4)
4110           .m(1)
4111           .n(n)
4112           .k(k)
4113           .ks(3)
4114           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4115       }
4116     }
4117   }
4118 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)4119   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
4120     TEST_REQUIRES_ARM_NEON_V8;
4121     for (uint32_t n = 16; n <= 24; n += 8) {
4122       for (size_t k = 1; k <= 80; k += 17) {
4123         GemmMicrokernelTester()
4124           .mr(1)
4125           .nr(8)
4126           .kr(2)
4127           .sr(4)
4128           .m(1)
4129           .n(n)
4130           .k(k)
4131           .ks(3)
4132           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4133       }
4134     }
4135   }
4136 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm_subtile)4137   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
4138     TEST_REQUIRES_ARM_NEON_V8;
4139     for (size_t k = 1; k <= 80; k += 17) {
4140       for (uint32_t n = 1; n <= 8; n++) {
4141         for (uint32_t m = 1; m <= 1; m++) {
4142           GemmMicrokernelTester()
4143             .mr(1)
4144             .nr(8)
4145             .kr(2)
4146             .sr(4)
4147             .m(m)
4148             .n(n)
4149             .k(k)
4150             .cm_stride(11)
4151             .iterations(1)
4152             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4153         }
4154       }
4155     }
4156   }
4157 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,a_offset)4158   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, a_offset) {
4159     TEST_REQUIRES_ARM_NEON_V8;
4160     for (size_t k = 1; k <= 80; k += 17) {
4161       GemmMicrokernelTester()
4162         .mr(1)
4163         .nr(8)
4164         .kr(2)
4165         .sr(4)
4166         .m(1)
4167         .n(8)
4168         .k(k)
4169         .ks(3)
4170         .a_offset(83)
4171         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4172     }
4173   }
4174 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,zero)4175   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, zero) {
4176     TEST_REQUIRES_ARM_NEON_V8;
4177     for (size_t k = 1; k <= 80; k += 17) {
4178       for (uint32_t mz = 0; mz < 1; mz++) {
4179         GemmMicrokernelTester()
4180           .mr(1)
4181           .nr(8)
4182           .kr(2)
4183           .sr(4)
4184           .m(1)
4185           .n(8)
4186           .k(k)
4187           .ks(3)
4188           .a_offset(83)
4189           .zero_index(mz)
4190           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4191       }
4192     }
4193   }
4194 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmin)4195   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmin) {
4196     TEST_REQUIRES_ARM_NEON_V8;
4197     GemmMicrokernelTester()
4198       .mr(1)
4199       .nr(8)
4200       .kr(2)
4201       .sr(4)
4202       .m(1)
4203       .n(8)
4204       .k(16)
4205       .qmin(128)
4206       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4207   }
4208 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmax)4209   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmax) {
4210     TEST_REQUIRES_ARM_NEON_V8;
4211     GemmMicrokernelTester()
4212       .mr(1)
4213       .nr(8)
4214       .kr(2)
4215       .sr(4)
4216       .m(1)
4217       .n(8)
4218       .k(16)
4219       .qmax(128)
4220       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4221   }
4222 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm)4223   TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm) {
4224     TEST_REQUIRES_ARM_NEON_V8;
4225     GemmMicrokernelTester()
4226       .mr(1)
4227       .nr(8)
4228       .kr(2)
4229       .sr(4)
4230       .m(1)
4231       .n(8)
4232       .k(16)
4233       .cm_stride(11)
4234       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235   }
4236 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4237 
4238 
4239 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16)4240   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16) {
4241     TEST_REQUIRES_ARM_NEON;
4242     GemmMicrokernelTester()
4243       .mr(1)
4244       .nr(8)
4245       .kr(4)
4246       .sr(1)
4247       .m(1)
4248       .n(8)
4249       .k(16)
4250       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4251   }
4252 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cn)4253   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cn) {
4254     TEST_REQUIRES_ARM_NEON;
4255     GemmMicrokernelTester()
4256       .mr(1)
4257       .nr(8)
4258       .kr(4)
4259       .sr(1)
4260       .m(1)
4261       .n(8)
4262       .k(16)
4263       .cn_stride(11)
4264       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4265   }
4266 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile)4267   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
4268     TEST_REQUIRES_ARM_NEON;
4269     for (uint32_t n = 1; n <= 8; n++) {
4270       for (uint32_t m = 1; m <= 1; m++) {
4271         GemmMicrokernelTester()
4272           .mr(1)
4273           .nr(8)
4274           .kr(4)
4275           .sr(1)
4276           .m(m)
4277           .n(n)
4278           .k(16)
4279           .iterations(1)
4280           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4281       }
4282     }
4283   }
4284 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)4285   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
4286     TEST_REQUIRES_ARM_NEON;
4287     for (uint32_t m = 1; m <= 1; m++) {
4288       GemmMicrokernelTester()
4289         .mr(1)
4290         .nr(8)
4291         .kr(4)
4292         .sr(1)
4293         .m(m)
4294         .n(8)
4295         .k(16)
4296         .iterations(1)
4297         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4298     }
4299   }
4300 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)4301   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
4302     TEST_REQUIRES_ARM_NEON;
4303     for (uint32_t n = 1; n <= 8; n++) {
4304       GemmMicrokernelTester()
4305         .mr(1)
4306         .nr(8)
4307         .kr(4)
4308         .sr(1)
4309         .m(1)
4310         .n(n)
4311         .k(16)
4312         .iterations(1)
4313         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4314     }
4315   }
4316 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16)4317   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16) {
4318     TEST_REQUIRES_ARM_NEON;
4319     for (size_t k = 1; k < 16; k++) {
4320       GemmMicrokernelTester()
4321         .mr(1)
4322         .nr(8)
4323         .kr(4)
4324         .sr(1)
4325         .m(1)
4326         .n(8)
4327         .k(k)
4328         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4329     }
4330   }
4331 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16_subtile)4332   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
4333     TEST_REQUIRES_ARM_NEON;
4334     for (size_t k = 1; k < 16; k++) {
4335       for (uint32_t n = 1; n <= 8; n++) {
4336         for (uint32_t m = 1; m <= 1; m++) {
4337           GemmMicrokernelTester()
4338             .mr(1)
4339             .nr(8)
4340             .kr(4)
4341             .sr(1)
4342             .m(m)
4343             .n(n)
4344             .k(k)
4345             .iterations(1)
4346             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4347         }
4348       }
4349     }
4350   }
4351 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16)4352   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16) {
4353     TEST_REQUIRES_ARM_NEON;
4354     for (size_t k = 17; k < 32; k++) {
4355       GemmMicrokernelTester()
4356         .mr(1)
4357         .nr(8)
4358         .kr(4)
4359         .sr(1)
4360         .m(1)
4361         .n(8)
4362         .k(k)
4363         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4364     }
4365   }
4366 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16_subtile)4367   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
4368     TEST_REQUIRES_ARM_NEON;
4369     for (size_t k = 17; k < 32; k++) {
4370       for (uint32_t n = 1; n <= 8; n++) {
4371         for (uint32_t m = 1; m <= 1; m++) {
4372           GemmMicrokernelTester()
4373             .mr(1)
4374             .nr(8)
4375             .kr(4)
4376             .sr(1)
4377             .m(m)
4378             .n(n)
4379             .k(k)
4380             .iterations(1)
4381             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4382         }
4383       }
4384     }
4385   }
4386 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16)4387   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16) {
4388     TEST_REQUIRES_ARM_NEON;
4389     for (size_t k = 32; k <= 160; k += 16) {
4390       GemmMicrokernelTester()
4391         .mr(1)
4392         .nr(8)
4393         .kr(4)
4394         .sr(1)
4395         .m(1)
4396         .n(8)
4397         .k(k)
4398         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4399     }
4400   }
4401 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16_subtile)4402   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
4403     TEST_REQUIRES_ARM_NEON;
4404     for (size_t k = 32; k <= 160; k += 16) {
4405       for (uint32_t n = 1; n <= 8; n++) {
4406         for (uint32_t m = 1; m <= 1; m++) {
4407           GemmMicrokernelTester()
4408             .mr(1)
4409             .nr(8)
4410             .kr(4)
4411             .sr(1)
4412             .m(m)
4413             .n(n)
4414             .k(k)
4415             .iterations(1)
4416             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4417         }
4418       }
4419     }
4420   }
4421 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8)4422   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8) {
4423     TEST_REQUIRES_ARM_NEON;
4424     for (uint32_t n = 9; n < 16; n++) {
4425       for (size_t k = 1; k <= 80; k += 17) {
4426         GemmMicrokernelTester()
4427           .mr(1)
4428           .nr(8)
4429           .kr(4)
4430           .sr(1)
4431           .m(1)
4432           .n(n)
4433           .k(k)
4434           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4435       }
4436     }
4437   }
4438 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)4439   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
4440     TEST_REQUIRES_ARM_NEON;
4441     for (uint32_t n = 9; n < 16; n++) {
4442       for (size_t k = 1; k <= 80; k += 17) {
4443         GemmMicrokernelTester()
4444           .mr(1)
4445           .nr(8)
4446           .kr(4)
4447           .sr(1)
4448           .m(1)
4449           .n(n)
4450           .k(k)
4451           .cn_stride(11)
4452           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4453       }
4454     }
4455   }
4456 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_subtile)4457   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
4458     TEST_REQUIRES_ARM_NEON;
4459     for (uint32_t n = 9; n < 16; n++) {
4460       for (size_t k = 1; k <= 80; k += 17) {
4461         for (uint32_t m = 1; m <= 1; m++) {
4462           GemmMicrokernelTester()
4463             .mr(1)
4464             .nr(8)
4465             .kr(4)
4466             .sr(1)
4467             .m(m)
4468             .n(n)
4469             .k(k)
4470             .iterations(1)
4471             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4472         }
4473       }
4474     }
4475   }
4476 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8)4477   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8) {
4478     TEST_REQUIRES_ARM_NEON;
4479     for (uint32_t n = 16; n <= 24; n += 8) {
4480       for (size_t k = 1; k <= 80; k += 17) {
4481         GemmMicrokernelTester()
4482           .mr(1)
4483           .nr(8)
4484           .kr(4)
4485           .sr(1)
4486           .m(1)
4487           .n(n)
4488           .k(k)
4489           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4490       }
4491     }
4492   }
4493 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)4494   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
4495     TEST_REQUIRES_ARM_NEON;
4496     for (uint32_t n = 16; n <= 24; n += 8) {
4497       for (size_t k = 1; k <= 80; k += 17) {
4498         GemmMicrokernelTester()
4499           .mr(1)
4500           .nr(8)
4501           .kr(4)
4502           .sr(1)
4503           .m(1)
4504           .n(n)
4505           .k(k)
4506           .cn_stride(11)
4507           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4508       }
4509     }
4510   }
4511 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_subtile)4512   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
4513     TEST_REQUIRES_ARM_NEON;
4514     for (uint32_t n = 16; n <= 24; n += 8) {
4515       for (size_t k = 1; k <= 80; k += 17) {
4516         for (uint32_t m = 1; m <= 1; m++) {
4517           GemmMicrokernelTester()
4518             .mr(1)
4519             .nr(8)
4520             .kr(4)
4521             .sr(1)
4522             .m(m)
4523             .n(n)
4524             .k(k)
4525             .iterations(1)
4526             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4527         }
4528       }
4529     }
4530   }
4531 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel)4532   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel) {
4533     TEST_REQUIRES_ARM_NEON;
4534     for (size_t k = 1; k <= 80; k += 17) {
4535       GemmMicrokernelTester()
4536         .mr(1)
4537         .nr(8)
4538         .kr(4)
4539         .sr(1)
4540         .m(1)
4541         .n(8)
4542         .k(k)
4543         .ks(3)
4544         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4545     }
4546   }
4547 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel_subtile)4548   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
4549     TEST_REQUIRES_ARM_NEON;
4550     for (size_t k = 1; k <= 80; k += 17) {
4551       for (uint32_t n = 1; n <= 8; n++) {
4552         for (uint32_t m = 1; m <= 1; m++) {
4553           GemmMicrokernelTester()
4554             .mr(1)
4555             .nr(8)
4556             .kr(4)
4557             .sr(1)
4558             .m(m)
4559             .n(n)
4560             .k(k)
4561             .ks(3)
4562             .iterations(1)
4563             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4564         }
4565       }
4566     }
4567   }
4568 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)4569   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
4570     TEST_REQUIRES_ARM_NEON;
4571     for (uint32_t n = 9; n < 16; n++) {
4572       for (size_t k = 1; k <= 80; k += 17) {
4573         GemmMicrokernelTester()
4574           .mr(1)
4575           .nr(8)
4576           .kr(4)
4577           .sr(1)
4578           .m(1)
4579           .n(n)
4580           .k(k)
4581           .ks(3)
4582           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4583       }
4584     }
4585   }
4586 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)4587   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
4588     TEST_REQUIRES_ARM_NEON;
4589     for (uint32_t n = 16; n <= 24; n += 8) {
4590       for (size_t k = 1; k <= 80; k += 17) {
4591         GemmMicrokernelTester()
4592           .mr(1)
4593           .nr(8)
4594           .kr(4)
4595           .sr(1)
4596           .m(1)
4597           .n(n)
4598           .k(k)
4599           .ks(3)
4600           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4601       }
4602     }
4603   }
4604 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm_subtile)4605   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
4606     TEST_REQUIRES_ARM_NEON;
4607     for (size_t k = 1; k <= 80; k += 17) {
4608       for (uint32_t n = 1; n <= 8; n++) {
4609         for (uint32_t m = 1; m <= 1; m++) {
4610           GemmMicrokernelTester()
4611             .mr(1)
4612             .nr(8)
4613             .kr(4)
4614             .sr(1)
4615             .m(m)
4616             .n(n)
4617             .k(k)
4618             .cm_stride(11)
4619             .iterations(1)
4620             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4621         }
4622       }
4623     }
4624   }
4625 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,a_offset)4626   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, a_offset) {
4627     TEST_REQUIRES_ARM_NEON;
4628     for (size_t k = 1; k <= 80; k += 17) {
4629       GemmMicrokernelTester()
4630         .mr(1)
4631         .nr(8)
4632         .kr(4)
4633         .sr(1)
4634         .m(1)
4635         .n(8)
4636         .k(k)
4637         .ks(3)
4638         .a_offset(83)
4639         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4640     }
4641   }
4642 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,zero)4643   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, zero) {
4644     TEST_REQUIRES_ARM_NEON;
4645     for (size_t k = 1; k <= 80; k += 17) {
4646       for (uint32_t mz = 0; mz < 1; mz++) {
4647         GemmMicrokernelTester()
4648           .mr(1)
4649           .nr(8)
4650           .kr(4)
4651           .sr(1)
4652           .m(1)
4653           .n(8)
4654           .k(k)
4655           .ks(3)
4656           .a_offset(83)
4657           .zero_index(mz)
4658           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4659       }
4660     }
4661   }
4662 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmin)4663   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmin) {
4664     TEST_REQUIRES_ARM_NEON;
4665     GemmMicrokernelTester()
4666       .mr(1)
4667       .nr(8)
4668       .kr(4)
4669       .sr(1)
4670       .m(1)
4671       .n(8)
4672       .k(16)
4673       .qmin(128)
4674       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4675   }
4676 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmax)4677   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmax) {
4678     TEST_REQUIRES_ARM_NEON;
4679     GemmMicrokernelTester()
4680       .mr(1)
4681       .nr(8)
4682       .kr(4)
4683       .sr(1)
4684       .m(1)
4685       .n(8)
4686       .k(16)
4687       .qmax(128)
4688       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4689   }
4690 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm)4691   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm) {
4692     TEST_REQUIRES_ARM_NEON;
4693     GemmMicrokernelTester()
4694       .mr(1)
4695       .nr(8)
4696       .kr(4)
4697       .sr(1)
4698       .m(1)
4699       .n(8)
4700       .k(16)
4701       .cm_stride(11)
4702       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4703   }
4704 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4705 
4706 
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16)4708   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
4709     TEST_REQUIRES_ARM_NEON;
4710     GemmMicrokernelTester()
4711       .mr(1)
4712       .nr(8)
4713       .kr(4)
4714       .sr(1)
4715       .m(1)
4716       .n(8)
4717       .k(16)
4718       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4719   }
4720 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cn)4721   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
4722     TEST_REQUIRES_ARM_NEON;
4723     GemmMicrokernelTester()
4724       .mr(1)
4725       .nr(8)
4726       .kr(4)
4727       .sr(1)
4728       .m(1)
4729       .n(8)
4730       .k(16)
4731       .cn_stride(11)
4732       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4733   }
4734 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)4735   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
4736     TEST_REQUIRES_ARM_NEON;
4737     for (uint32_t n = 1; n <= 8; n++) {
4738       for (uint32_t m = 1; m <= 1; m++) {
4739         GemmMicrokernelTester()
4740           .mr(1)
4741           .nr(8)
4742           .kr(4)
4743           .sr(1)
4744           .m(m)
4745           .n(n)
4746           .k(16)
4747           .iterations(1)
4748           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4749       }
4750     }
4751   }
4752 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)4753   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
4754     TEST_REQUIRES_ARM_NEON;
4755     for (uint32_t m = 1; m <= 1; m++) {
4756       GemmMicrokernelTester()
4757         .mr(1)
4758         .nr(8)
4759         .kr(4)
4760         .sr(1)
4761         .m(m)
4762         .n(8)
4763         .k(16)
4764         .iterations(1)
4765         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4766     }
4767   }
4768 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)4769   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
4770     TEST_REQUIRES_ARM_NEON;
4771     for (uint32_t n = 1; n <= 8; n++) {
4772       GemmMicrokernelTester()
4773         .mr(1)
4774         .nr(8)
4775         .kr(4)
4776         .sr(1)
4777         .m(1)
4778         .n(n)
4779         .k(16)
4780         .iterations(1)
4781         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4782     }
4783   }
4784 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16)4785   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
4786     TEST_REQUIRES_ARM_NEON;
4787     for (size_t k = 1; k < 16; k++) {
4788       GemmMicrokernelTester()
4789         .mr(1)
4790         .nr(8)
4791         .kr(4)
4792         .sr(1)
4793         .m(1)
4794         .n(8)
4795         .k(k)
4796         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4797     }
4798   }
4799 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)4800   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
4801     TEST_REQUIRES_ARM_NEON;
4802     for (size_t k = 1; k < 16; k++) {
4803       for (uint32_t n = 1; n <= 8; n++) {
4804         for (uint32_t m = 1; m <= 1; m++) {
4805           GemmMicrokernelTester()
4806             .mr(1)
4807             .nr(8)
4808             .kr(4)
4809             .sr(1)
4810             .m(m)
4811             .n(n)
4812             .k(k)
4813             .iterations(1)
4814             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4815         }
4816       }
4817     }
4818   }
4819 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16)4820   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
4821     TEST_REQUIRES_ARM_NEON;
4822     for (size_t k = 17; k < 32; k++) {
4823       GemmMicrokernelTester()
4824         .mr(1)
4825         .nr(8)
4826         .kr(4)
4827         .sr(1)
4828         .m(1)
4829         .n(8)
4830         .k(k)
4831         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4832     }
4833   }
4834 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)4835   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
4836     TEST_REQUIRES_ARM_NEON;
4837     for (size_t k = 17; k < 32; k++) {
4838       for (uint32_t n = 1; n <= 8; n++) {
4839         for (uint32_t m = 1; m <= 1; m++) {
4840           GemmMicrokernelTester()
4841             .mr(1)
4842             .nr(8)
4843             .kr(4)
4844             .sr(1)
4845             .m(m)
4846             .n(n)
4847             .k(k)
4848             .iterations(1)
4849             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4850         }
4851       }
4852     }
4853   }
4854 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16)4855   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
4856     TEST_REQUIRES_ARM_NEON;
4857     for (size_t k = 32; k <= 160; k += 16) {
4858       GemmMicrokernelTester()
4859         .mr(1)
4860         .nr(8)
4861         .kr(4)
4862         .sr(1)
4863         .m(1)
4864         .n(8)
4865         .k(k)
4866         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4867     }
4868   }
4869 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)4870   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
4871     TEST_REQUIRES_ARM_NEON;
4872     for (size_t k = 32; k <= 160; k += 16) {
4873       for (uint32_t n = 1; n <= 8; n++) {
4874         for (uint32_t m = 1; m <= 1; m++) {
4875           GemmMicrokernelTester()
4876             .mr(1)
4877             .nr(8)
4878             .kr(4)
4879             .sr(1)
4880             .m(m)
4881             .n(n)
4882             .k(k)
4883             .iterations(1)
4884             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4885         }
4886       }
4887     }
4888   }
4889 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8)4890   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
4891     TEST_REQUIRES_ARM_NEON;
4892     for (uint32_t n = 9; n < 16; n++) {
4893       for (size_t k = 1; k <= 80; k += 17) {
4894         GemmMicrokernelTester()
4895           .mr(1)
4896           .nr(8)
4897           .kr(4)
4898           .sr(1)
4899           .m(1)
4900           .n(n)
4901           .k(k)
4902           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4903       }
4904     }
4905   }
4906 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)4907   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
4908     TEST_REQUIRES_ARM_NEON;
4909     for (uint32_t n = 9; n < 16; n++) {
4910       for (size_t k = 1; k <= 80; k += 17) {
4911         GemmMicrokernelTester()
4912           .mr(1)
4913           .nr(8)
4914           .kr(4)
4915           .sr(1)
4916           .m(1)
4917           .n(n)
4918           .k(k)
4919           .cn_stride(11)
4920           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4921       }
4922     }
4923   }
4924 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)4925   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
4926     TEST_REQUIRES_ARM_NEON;
4927     for (uint32_t n = 9; n < 16; n++) {
4928       for (size_t k = 1; k <= 80; k += 17) {
4929         for (uint32_t m = 1; m <= 1; m++) {
4930           GemmMicrokernelTester()
4931             .mr(1)
4932             .nr(8)
4933             .kr(4)
4934             .sr(1)
4935             .m(m)
4936             .n(n)
4937             .k(k)
4938             .iterations(1)
4939             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4940         }
4941       }
4942     }
4943   }
4944 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8)4945   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
4946     TEST_REQUIRES_ARM_NEON;
4947     for (uint32_t n = 16; n <= 24; n += 8) {
4948       for (size_t k = 1; k <= 80; k += 17) {
4949         GemmMicrokernelTester()
4950           .mr(1)
4951           .nr(8)
4952           .kr(4)
4953           .sr(1)
4954           .m(1)
4955           .n(n)
4956           .k(k)
4957           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4958       }
4959     }
4960   }
4961 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)4962   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4963     TEST_REQUIRES_ARM_NEON;
4964     for (uint32_t n = 16; n <= 24; n += 8) {
4965       for (size_t k = 1; k <= 80; k += 17) {
4966         GemmMicrokernelTester()
4967           .mr(1)
4968           .nr(8)
4969           .kr(4)
4970           .sr(1)
4971           .m(1)
4972           .n(n)
4973           .k(k)
4974           .cn_stride(11)
4975           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4976       }
4977     }
4978   }
4979 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)4980   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
4981     TEST_REQUIRES_ARM_NEON;
4982     for (uint32_t n = 16; n <= 24; n += 8) {
4983       for (size_t k = 1; k <= 80; k += 17) {
4984         for (uint32_t m = 1; m <= 1; m++) {
4985           GemmMicrokernelTester()
4986             .mr(1)
4987             .nr(8)
4988             .kr(4)
4989             .sr(1)
4990             .m(m)
4991             .n(n)
4992             .k(k)
4993             .iterations(1)
4994             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4995         }
4996       }
4997     }
4998   }
4999 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel)5000   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel) {
5001     TEST_REQUIRES_ARM_NEON;
5002     for (size_t k = 1; k <= 80; k += 17) {
5003       GemmMicrokernelTester()
5004         .mr(1)
5005         .nr(8)
5006         .kr(4)
5007         .sr(1)
5008         .m(1)
5009         .n(8)
5010         .k(k)
5011         .ks(3)
5012         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5013     }
5014   }
5015 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel_subtile)5016   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
5017     TEST_REQUIRES_ARM_NEON;
5018     for (size_t k = 1; k <= 80; k += 17) {
5019       for (uint32_t n = 1; n <= 8; n++) {
5020         for (uint32_t m = 1; m <= 1; m++) {
5021           GemmMicrokernelTester()
5022             .mr(1)
5023             .nr(8)
5024             .kr(4)
5025             .sr(1)
5026             .m(m)
5027             .n(n)
5028             .k(k)
5029             .ks(3)
5030             .iterations(1)
5031             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5032         }
5033       }
5034     }
5035   }
5036 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)5037   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5038     TEST_REQUIRES_ARM_NEON;
5039     for (uint32_t n = 9; n < 16; n++) {
5040       for (size_t k = 1; k <= 80; k += 17) {
5041         GemmMicrokernelTester()
5042           .mr(1)
5043           .nr(8)
5044           .kr(4)
5045           .sr(1)
5046           .m(1)
5047           .n(n)
5048           .k(k)
5049           .ks(3)
5050           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5051       }
5052     }
5053   }
5054 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)5055   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5056     TEST_REQUIRES_ARM_NEON;
5057     for (uint32_t n = 16; n <= 24; n += 8) {
5058       for (size_t k = 1; k <= 80; k += 17) {
5059         GemmMicrokernelTester()
5060           .mr(1)
5061           .nr(8)
5062           .kr(4)
5063           .sr(1)
5064           .m(1)
5065           .n(n)
5066           .k(k)
5067           .ks(3)
5068           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5069       }
5070     }
5071   }
5072 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)5073   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
5074     TEST_REQUIRES_ARM_NEON;
5075     for (size_t k = 1; k <= 80; k += 17) {
5076       for (uint32_t n = 1; n <= 8; n++) {
5077         for (uint32_t m = 1; m <= 1; m++) {
5078           GemmMicrokernelTester()
5079             .mr(1)
5080             .nr(8)
5081             .kr(4)
5082             .sr(1)
5083             .m(m)
5084             .n(n)
5085             .k(k)
5086             .cm_stride(11)
5087             .iterations(1)
5088             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5089         }
5090       }
5091     }
5092   }
5093 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,a_offset)5094   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, a_offset) {
5095     TEST_REQUIRES_ARM_NEON;
5096     for (size_t k = 1; k <= 80; k += 17) {
5097       GemmMicrokernelTester()
5098         .mr(1)
5099         .nr(8)
5100         .kr(4)
5101         .sr(1)
5102         .m(1)
5103         .n(8)
5104         .k(k)
5105         .ks(3)
5106         .a_offset(83)
5107         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5108     }
5109   }
5110 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,zero)5111   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, zero) {
5112     TEST_REQUIRES_ARM_NEON;
5113     for (size_t k = 1; k <= 80; k += 17) {
5114       for (uint32_t mz = 0; mz < 1; mz++) {
5115         GemmMicrokernelTester()
5116           .mr(1)
5117           .nr(8)
5118           .kr(4)
5119           .sr(1)
5120           .m(1)
5121           .n(8)
5122           .k(k)
5123           .ks(3)
5124           .a_offset(83)
5125           .zero_index(mz)
5126           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5127       }
5128     }
5129   }
5130 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmin)5131   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
5132     TEST_REQUIRES_ARM_NEON;
5133     GemmMicrokernelTester()
5134       .mr(1)
5135       .nr(8)
5136       .kr(4)
5137       .sr(1)
5138       .m(1)
5139       .n(8)
5140       .k(16)
5141       .qmin(128)
5142       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5143   }
5144 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmax)5145   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
5146     TEST_REQUIRES_ARM_NEON;
5147     GemmMicrokernelTester()
5148       .mr(1)
5149       .nr(8)
5150       .kr(4)
5151       .sr(1)
5152       .m(1)
5153       .n(8)
5154       .k(16)
5155       .qmax(128)
5156       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5157   }
5158 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm)5159   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
5160     TEST_REQUIRES_ARM_NEON;
5161     GemmMicrokernelTester()
5162       .mr(1)
5163       .nr(8)
5164       .kr(4)
5165       .sr(1)
5166       .m(1)
5167       .n(8)
5168       .k(16)
5169       .cm_stride(11)
5170       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5171   }
5172 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173 
5174 
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16)5176   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16) {
5177     TEST_REQUIRES_ARM_NEON_V8;
5178     GemmMicrokernelTester()
5179       .mr(1)
5180       .nr(8)
5181       .kr(4)
5182       .sr(1)
5183       .m(1)
5184       .n(8)
5185       .k(16)
5186       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5187   }
5188 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cn)5189   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cn) {
5190     TEST_REQUIRES_ARM_NEON_V8;
5191     GemmMicrokernelTester()
5192       .mr(1)
5193       .nr(8)
5194       .kr(4)
5195       .sr(1)
5196       .m(1)
5197       .n(8)
5198       .k(16)
5199       .cn_stride(11)
5200       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5201   }
5202 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)5203   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
5204     TEST_REQUIRES_ARM_NEON_V8;
5205     for (uint32_t n = 1; n <= 8; n++) {
5206       for (uint32_t m = 1; m <= 1; m++) {
5207         GemmMicrokernelTester()
5208           .mr(1)
5209           .nr(8)
5210           .kr(4)
5211           .sr(1)
5212           .m(m)
5213           .n(n)
5214           .k(16)
5215           .iterations(1)
5216           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5217       }
5218     }
5219   }
5220 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)5221   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
5222     TEST_REQUIRES_ARM_NEON_V8;
5223     for (uint32_t m = 1; m <= 1; m++) {
5224       GemmMicrokernelTester()
5225         .mr(1)
5226         .nr(8)
5227         .kr(4)
5228         .sr(1)
5229         .m(m)
5230         .n(8)
5231         .k(16)
5232         .iterations(1)
5233         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5234     }
5235   }
5236 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)5237   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
5238     TEST_REQUIRES_ARM_NEON_V8;
5239     for (uint32_t n = 1; n <= 8; n++) {
5240       GemmMicrokernelTester()
5241         .mr(1)
5242         .nr(8)
5243         .kr(4)
5244         .sr(1)
5245         .m(1)
5246         .n(n)
5247         .k(16)
5248         .iterations(1)
5249         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5250     }
5251   }
5252 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16)5253   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16) {
5254     TEST_REQUIRES_ARM_NEON_V8;
5255     for (size_t k = 1; k < 16; k++) {
5256       GemmMicrokernelTester()
5257         .mr(1)
5258         .nr(8)
5259         .kr(4)
5260         .sr(1)
5261         .m(1)
5262         .n(8)
5263         .k(k)
5264         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265     }
5266   }
5267 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)5268   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
5269     TEST_REQUIRES_ARM_NEON_V8;
5270     for (size_t k = 1; k < 16; k++) {
5271       for (uint32_t n = 1; n <= 8; n++) {
5272         for (uint32_t m = 1; m <= 1; m++) {
5273           GemmMicrokernelTester()
5274             .mr(1)
5275             .nr(8)
5276             .kr(4)
5277             .sr(1)
5278             .m(m)
5279             .n(n)
5280             .k(k)
5281             .iterations(1)
5282             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283         }
5284       }
5285     }
5286   }
5287 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16)5288   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16) {
5289     TEST_REQUIRES_ARM_NEON_V8;
5290     for (size_t k = 17; k < 32; k++) {
5291       GemmMicrokernelTester()
5292         .mr(1)
5293         .nr(8)
5294         .kr(4)
5295         .sr(1)
5296         .m(1)
5297         .n(8)
5298         .k(k)
5299         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5300     }
5301   }
5302 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)5303   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
5304     TEST_REQUIRES_ARM_NEON_V8;
5305     for (size_t k = 17; k < 32; k++) {
5306       for (uint32_t n = 1; n <= 8; n++) {
5307         for (uint32_t m = 1; m <= 1; m++) {
5308           GemmMicrokernelTester()
5309             .mr(1)
5310             .nr(8)
5311             .kr(4)
5312             .sr(1)
5313             .m(m)
5314             .n(n)
5315             .k(k)
5316             .iterations(1)
5317             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5318         }
5319       }
5320     }
5321   }
5322 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16)5323   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16) {
5324     TEST_REQUIRES_ARM_NEON_V8;
5325     for (size_t k = 32; k <= 160; k += 16) {
5326       GemmMicrokernelTester()
5327         .mr(1)
5328         .nr(8)
5329         .kr(4)
5330         .sr(1)
5331         .m(1)
5332         .n(8)
5333         .k(k)
5334         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5335     }
5336   }
5337 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)5338   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
5339     TEST_REQUIRES_ARM_NEON_V8;
5340     for (size_t k = 32; k <= 160; k += 16) {
5341       for (uint32_t n = 1; n <= 8; n++) {
5342         for (uint32_t m = 1; m <= 1; m++) {
5343           GemmMicrokernelTester()
5344             .mr(1)
5345             .nr(8)
5346             .kr(4)
5347             .sr(1)
5348             .m(m)
5349             .n(n)
5350             .k(k)
5351             .iterations(1)
5352             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5353         }
5354       }
5355     }
5356   }
5357 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8)5358   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8) {
5359     TEST_REQUIRES_ARM_NEON_V8;
5360     for (uint32_t n = 9; n < 16; n++) {
5361       for (size_t k = 1; k <= 80; k += 17) {
5362         GemmMicrokernelTester()
5363           .mr(1)
5364           .nr(8)
5365           .kr(4)
5366           .sr(1)
5367           .m(1)
5368           .n(n)
5369           .k(k)
5370           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5371       }
5372     }
5373   }
5374 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)5375   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
5376     TEST_REQUIRES_ARM_NEON_V8;
5377     for (uint32_t n = 9; n < 16; n++) {
5378       for (size_t k = 1; k <= 80; k += 17) {
5379         GemmMicrokernelTester()
5380           .mr(1)
5381           .nr(8)
5382           .kr(4)
5383           .sr(1)
5384           .m(1)
5385           .n(n)
5386           .k(k)
5387           .cn_stride(11)
5388           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5389       }
5390     }
5391   }
5392 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)5393   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
5394     TEST_REQUIRES_ARM_NEON_V8;
5395     for (uint32_t n = 9; n < 16; n++) {
5396       for (size_t k = 1; k <= 80; k += 17) {
5397         for (uint32_t m = 1; m <= 1; m++) {
5398           GemmMicrokernelTester()
5399             .mr(1)
5400             .nr(8)
5401             .kr(4)
5402             .sr(1)
5403             .m(m)
5404             .n(n)
5405             .k(k)
5406             .iterations(1)
5407             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5408         }
5409       }
5410     }
5411   }
5412 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8)5413   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8) {
5414     TEST_REQUIRES_ARM_NEON_V8;
5415     for (uint32_t n = 16; n <= 24; n += 8) {
5416       for (size_t k = 1; k <= 80; k += 17) {
5417         GemmMicrokernelTester()
5418           .mr(1)
5419           .nr(8)
5420           .kr(4)
5421           .sr(1)
5422           .m(1)
5423           .n(n)
5424           .k(k)
5425           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5426       }
5427     }
5428   }
5429 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)5430   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
5431     TEST_REQUIRES_ARM_NEON_V8;
5432     for (uint32_t n = 16; n <= 24; n += 8) {
5433       for (size_t k = 1; k <= 80; k += 17) {
5434         GemmMicrokernelTester()
5435           .mr(1)
5436           .nr(8)
5437           .kr(4)
5438           .sr(1)
5439           .m(1)
5440           .n(n)
5441           .k(k)
5442           .cn_stride(11)
5443           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5444       }
5445     }
5446   }
5447 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)5448   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
5449     TEST_REQUIRES_ARM_NEON_V8;
5450     for (uint32_t n = 16; n <= 24; n += 8) {
5451       for (size_t k = 1; k <= 80; k += 17) {
5452         for (uint32_t m = 1; m <= 1; m++) {
5453           GemmMicrokernelTester()
5454             .mr(1)
5455             .nr(8)
5456             .kr(4)
5457             .sr(1)
5458             .m(m)
5459             .n(n)
5460             .k(k)
5461             .iterations(1)
5462             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5463         }
5464       }
5465     }
5466   }
5467 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel)5468   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel) {
5469     TEST_REQUIRES_ARM_NEON_V8;
5470     for (size_t k = 1; k <= 80; k += 17) {
5471       GemmMicrokernelTester()
5472         .mr(1)
5473         .nr(8)
5474         .kr(4)
5475         .sr(1)
5476         .m(1)
5477         .n(8)
5478         .k(k)
5479         .ks(3)
5480         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481     }
5482   }
5483 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)5484   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
5485     TEST_REQUIRES_ARM_NEON_V8;
5486     for (size_t k = 1; k <= 80; k += 17) {
5487       for (uint32_t n = 1; n <= 8; n++) {
5488         for (uint32_t m = 1; m <= 1; m++) {
5489           GemmMicrokernelTester()
5490             .mr(1)
5491             .nr(8)
5492             .kr(4)
5493             .sr(1)
5494             .m(m)
5495             .n(n)
5496             .k(k)
5497             .ks(3)
5498             .iterations(1)
5499             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5500         }
5501       }
5502     }
5503   }
5504 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)5505   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
5506     TEST_REQUIRES_ARM_NEON_V8;
5507     for (uint32_t n = 9; n < 16; n++) {
5508       for (size_t k = 1; k <= 80; k += 17) {
5509         GemmMicrokernelTester()
5510           .mr(1)
5511           .nr(8)
5512           .kr(4)
5513           .sr(1)
5514           .m(1)
5515           .n(n)
5516           .k(k)
5517           .ks(3)
5518           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5519       }
5520     }
5521   }
5522 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)5523   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
5524     TEST_REQUIRES_ARM_NEON_V8;
5525     for (uint32_t n = 16; n <= 24; n += 8) {
5526       for (size_t k = 1; k <= 80; k += 17) {
5527         GemmMicrokernelTester()
5528           .mr(1)
5529           .nr(8)
5530           .kr(4)
5531           .sr(1)
5532           .m(1)
5533           .n(n)
5534           .k(k)
5535           .ks(3)
5536           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5537       }
5538     }
5539   }
5540 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)5541   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
5542     TEST_REQUIRES_ARM_NEON_V8;
5543     for (size_t k = 1; k <= 80; k += 17) {
5544       for (uint32_t n = 1; n <= 8; n++) {
5545         for (uint32_t m = 1; m <= 1; m++) {
5546           GemmMicrokernelTester()
5547             .mr(1)
5548             .nr(8)
5549             .kr(4)
5550             .sr(1)
5551             .m(m)
5552             .n(n)
5553             .k(k)
5554             .cm_stride(11)
5555             .iterations(1)
5556             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5557         }
5558       }
5559     }
5560   }
5561 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,a_offset)5562   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, a_offset) {
5563     TEST_REQUIRES_ARM_NEON_V8;
5564     for (size_t k = 1; k <= 80; k += 17) {
5565       GemmMicrokernelTester()
5566         .mr(1)
5567         .nr(8)
5568         .kr(4)
5569         .sr(1)
5570         .m(1)
5571         .n(8)
5572         .k(k)
5573         .ks(3)
5574         .a_offset(83)
5575         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5576     }
5577   }
5578 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,zero)5579   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, zero) {
5580     TEST_REQUIRES_ARM_NEON_V8;
5581     for (size_t k = 1; k <= 80; k += 17) {
5582       for (uint32_t mz = 0; mz < 1; mz++) {
5583         GemmMicrokernelTester()
5584           .mr(1)
5585           .nr(8)
5586           .kr(4)
5587           .sr(1)
5588           .m(1)
5589           .n(8)
5590           .k(k)
5591           .ks(3)
5592           .a_offset(83)
5593           .zero_index(mz)
5594           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5595       }
5596     }
5597   }
5598 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmin)5599   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmin) {
5600     TEST_REQUIRES_ARM_NEON_V8;
5601     GemmMicrokernelTester()
5602       .mr(1)
5603       .nr(8)
5604       .kr(4)
5605       .sr(1)
5606       .m(1)
5607       .n(8)
5608       .k(16)
5609       .qmin(128)
5610       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5611   }
5612 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmax)5613   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmax) {
5614     TEST_REQUIRES_ARM_NEON_V8;
5615     GemmMicrokernelTester()
5616       .mr(1)
5617       .nr(8)
5618       .kr(4)
5619       .sr(1)
5620       .m(1)
5621       .n(8)
5622       .k(16)
5623       .qmax(128)
5624       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5625   }
5626 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm)5627   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm) {
5628     TEST_REQUIRES_ARM_NEON_V8;
5629     GemmMicrokernelTester()
5630       .mr(1)
5631       .nr(8)
5632       .kr(4)
5633       .sr(1)
5634       .m(1)
5635       .n(8)
5636       .k(16)
5637       .cm_stride(11)
5638       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5639   }
5640 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641 
5642 
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16)5644   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
5645     TEST_REQUIRES_ARM_NEON_V8;
5646     GemmMicrokernelTester()
5647       .mr(1)
5648       .nr(8)
5649       .kr(4)
5650       .sr(1)
5651       .m(1)
5652       .n(8)
5653       .k(16)
5654       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5655   }
5656 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cn)5657   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
5658     TEST_REQUIRES_ARM_NEON_V8;
5659     GemmMicrokernelTester()
5660       .mr(1)
5661       .nr(8)
5662       .kr(4)
5663       .sr(1)
5664       .m(1)
5665       .n(8)
5666       .k(16)
5667       .cn_stride(11)
5668       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5669   }
5670 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)5671   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
5672     TEST_REQUIRES_ARM_NEON_V8;
5673     for (uint32_t n = 1; n <= 8; n++) {
5674       for (uint32_t m = 1; m <= 1; m++) {
5675         GemmMicrokernelTester()
5676           .mr(1)
5677           .nr(8)
5678           .kr(4)
5679           .sr(1)
5680           .m(m)
5681           .n(n)
5682           .k(16)
5683           .iterations(1)
5684           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5685       }
5686     }
5687   }
5688 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)5689   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
5690     TEST_REQUIRES_ARM_NEON_V8;
5691     for (uint32_t m = 1; m <= 1; m++) {
5692       GemmMicrokernelTester()
5693         .mr(1)
5694         .nr(8)
5695         .kr(4)
5696         .sr(1)
5697         .m(m)
5698         .n(8)
5699         .k(16)
5700         .iterations(1)
5701         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5702     }
5703   }
5704 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)5705   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
5706     TEST_REQUIRES_ARM_NEON_V8;
5707     for (uint32_t n = 1; n <= 8; n++) {
5708       GemmMicrokernelTester()
5709         .mr(1)
5710         .nr(8)
5711         .kr(4)
5712         .sr(1)
5713         .m(1)
5714         .n(n)
5715         .k(16)
5716         .iterations(1)
5717         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5718     }
5719   }
5720 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16)5721   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
5722     TEST_REQUIRES_ARM_NEON_V8;
5723     for (size_t k = 1; k < 16; k++) {
5724       GemmMicrokernelTester()
5725         .mr(1)
5726         .nr(8)
5727         .kr(4)
5728         .sr(1)
5729         .m(1)
5730         .n(8)
5731         .k(k)
5732         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5733     }
5734   }
5735 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)5736   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
5737     TEST_REQUIRES_ARM_NEON_V8;
5738     for (size_t k = 1; k < 16; k++) {
5739       for (uint32_t n = 1; n <= 8; n++) {
5740         for (uint32_t m = 1; m <= 1; m++) {
5741           GemmMicrokernelTester()
5742             .mr(1)
5743             .nr(8)
5744             .kr(4)
5745             .sr(1)
5746             .m(m)
5747             .n(n)
5748             .k(k)
5749             .iterations(1)
5750             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5751         }
5752       }
5753     }
5754   }
5755 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16)5756   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
5757     TEST_REQUIRES_ARM_NEON_V8;
5758     for (size_t k = 17; k < 32; k++) {
5759       GemmMicrokernelTester()
5760         .mr(1)
5761         .nr(8)
5762         .kr(4)
5763         .sr(1)
5764         .m(1)
5765         .n(8)
5766         .k(k)
5767         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5768     }
5769   }
5770 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)5771   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
5772     TEST_REQUIRES_ARM_NEON_V8;
5773     for (size_t k = 17; k < 32; k++) {
5774       for (uint32_t n = 1; n <= 8; n++) {
5775         for (uint32_t m = 1; m <= 1; m++) {
5776           GemmMicrokernelTester()
5777             .mr(1)
5778             .nr(8)
5779             .kr(4)
5780             .sr(1)
5781             .m(m)
5782             .n(n)
5783             .k(k)
5784             .iterations(1)
5785             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5786         }
5787       }
5788     }
5789   }
5790 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16)5791   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
5792     TEST_REQUIRES_ARM_NEON_V8;
5793     for (size_t k = 32; k <= 160; k += 16) {
5794       GemmMicrokernelTester()
5795         .mr(1)
5796         .nr(8)
5797         .kr(4)
5798         .sr(1)
5799         .m(1)
5800         .n(8)
5801         .k(k)
5802         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5803     }
5804   }
5805 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)5806   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
5807     TEST_REQUIRES_ARM_NEON_V8;
5808     for (size_t k = 32; k <= 160; k += 16) {
5809       for (uint32_t n = 1; n <= 8; n++) {
5810         for (uint32_t m = 1; m <= 1; m++) {
5811           GemmMicrokernelTester()
5812             .mr(1)
5813             .nr(8)
5814             .kr(4)
5815             .sr(1)
5816             .m(m)
5817             .n(n)
5818             .k(k)
5819             .iterations(1)
5820             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5821         }
5822       }
5823     }
5824   }
5825 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8)5826   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
5827     TEST_REQUIRES_ARM_NEON_V8;
5828     for (uint32_t n = 9; n < 16; n++) {
5829       for (size_t k = 1; k <= 80; k += 17) {
5830         GemmMicrokernelTester()
5831           .mr(1)
5832           .nr(8)
5833           .kr(4)
5834           .sr(1)
5835           .m(1)
5836           .n(n)
5837           .k(k)
5838           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5839       }
5840     }
5841   }
5842 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)5843   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
5844     TEST_REQUIRES_ARM_NEON_V8;
5845     for (uint32_t n = 9; n < 16; n++) {
5846       for (size_t k = 1; k <= 80; k += 17) {
5847         GemmMicrokernelTester()
5848           .mr(1)
5849           .nr(8)
5850           .kr(4)
5851           .sr(1)
5852           .m(1)
5853           .n(n)
5854           .k(k)
5855           .cn_stride(11)
5856           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5857       }
5858     }
5859   }
5860 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)5861   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
5862     TEST_REQUIRES_ARM_NEON_V8;
5863     for (uint32_t n = 9; n < 16; n++) {
5864       for (size_t k = 1; k <= 80; k += 17) {
5865         for (uint32_t m = 1; m <= 1; m++) {
5866           GemmMicrokernelTester()
5867             .mr(1)
5868             .nr(8)
5869             .kr(4)
5870             .sr(1)
5871             .m(m)
5872             .n(n)
5873             .k(k)
5874             .iterations(1)
5875             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5876         }
5877       }
5878     }
5879   }
5880 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8)5881   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
5882     TEST_REQUIRES_ARM_NEON_V8;
5883     for (uint32_t n = 16; n <= 24; n += 8) {
5884       for (size_t k = 1; k <= 80; k += 17) {
5885         GemmMicrokernelTester()
5886           .mr(1)
5887           .nr(8)
5888           .kr(4)
5889           .sr(1)
5890           .m(1)
5891           .n(n)
5892           .k(k)
5893           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5894       }
5895     }
5896   }
5897 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)5898   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
5899     TEST_REQUIRES_ARM_NEON_V8;
5900     for (uint32_t n = 16; n <= 24; n += 8) {
5901       for (size_t k = 1; k <= 80; k += 17) {
5902         GemmMicrokernelTester()
5903           .mr(1)
5904           .nr(8)
5905           .kr(4)
5906           .sr(1)
5907           .m(1)
5908           .n(n)
5909           .k(k)
5910           .cn_stride(11)
5911           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5912       }
5913     }
5914   }
5915 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)5916   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
5917     TEST_REQUIRES_ARM_NEON_V8;
5918     for (uint32_t n = 16; n <= 24; n += 8) {
5919       for (size_t k = 1; k <= 80; k += 17) {
5920         for (uint32_t m = 1; m <= 1; m++) {
5921           GemmMicrokernelTester()
5922             .mr(1)
5923             .nr(8)
5924             .kr(4)
5925             .sr(1)
5926             .m(m)
5927             .n(n)
5928             .k(k)
5929             .iterations(1)
5930             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5931         }
5932       }
5933     }
5934   }
5935 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel)5936   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel) {
5937     TEST_REQUIRES_ARM_NEON_V8;
5938     for (size_t k = 1; k <= 80; k += 17) {
5939       GemmMicrokernelTester()
5940         .mr(1)
5941         .nr(8)
5942         .kr(4)
5943         .sr(1)
5944         .m(1)
5945         .n(8)
5946         .k(k)
5947         .ks(3)
5948         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5949     }
5950   }
5951 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel_subtile)5952   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
5953     TEST_REQUIRES_ARM_NEON_V8;
5954     for (size_t k = 1; k <= 80; k += 17) {
5955       for (uint32_t n = 1; n <= 8; n++) {
5956         for (uint32_t m = 1; m <= 1; m++) {
5957           GemmMicrokernelTester()
5958             .mr(1)
5959             .nr(8)
5960             .kr(4)
5961             .sr(1)
5962             .m(m)
5963             .n(n)
5964             .k(k)
5965             .ks(3)
5966             .iterations(1)
5967             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5968         }
5969       }
5970     }
5971   }
5972 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)5973   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
5974     TEST_REQUIRES_ARM_NEON_V8;
5975     for (uint32_t n = 9; n < 16; n++) {
5976       for (size_t k = 1; k <= 80; k += 17) {
5977         GemmMicrokernelTester()
5978           .mr(1)
5979           .nr(8)
5980           .kr(4)
5981           .sr(1)
5982           .m(1)
5983           .n(n)
5984           .k(k)
5985           .ks(3)
5986           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5987       }
5988     }
5989   }
5990 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_small_kernel)5991   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
5992     TEST_REQUIRES_ARM_NEON_V8;
5993     for (uint32_t n = 16; n <= 24; n += 8) {
5994       for (size_t k = 1; k <= 80; k += 17) {
5995         GemmMicrokernelTester()
5996           .mr(1)
5997           .nr(8)
5998           .kr(4)
5999           .sr(1)
6000           .m(1)
6001           .n(n)
6002           .k(k)
6003           .ks(3)
6004           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6005       }
6006     }
6007   }
6008 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)6009   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
6010     TEST_REQUIRES_ARM_NEON_V8;
6011     for (size_t k = 1; k <= 80; k += 17) {
6012       for (uint32_t n = 1; n <= 8; n++) {
6013         for (uint32_t m = 1; m <= 1; m++) {
6014           GemmMicrokernelTester()
6015             .mr(1)
6016             .nr(8)
6017             .kr(4)
6018             .sr(1)
6019             .m(m)
6020             .n(n)
6021             .k(k)
6022             .cm_stride(11)
6023             .iterations(1)
6024             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6025         }
6026       }
6027     }
6028   }
6029 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,a_offset)6030   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, a_offset) {
6031     TEST_REQUIRES_ARM_NEON_V8;
6032     for (size_t k = 1; k <= 80; k += 17) {
6033       GemmMicrokernelTester()
6034         .mr(1)
6035         .nr(8)
6036         .kr(4)
6037         .sr(1)
6038         .m(1)
6039         .n(8)
6040         .k(k)
6041         .ks(3)
6042         .a_offset(83)
6043         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6044     }
6045   }
6046 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,zero)6047   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, zero) {
6048     TEST_REQUIRES_ARM_NEON_V8;
6049     for (size_t k = 1; k <= 80; k += 17) {
6050       for (uint32_t mz = 0; mz < 1; mz++) {
6051         GemmMicrokernelTester()
6052           .mr(1)
6053           .nr(8)
6054           .kr(4)
6055           .sr(1)
6056           .m(1)
6057           .n(8)
6058           .k(k)
6059           .ks(3)
6060           .a_offset(83)
6061           .zero_index(mz)
6062           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6063       }
6064     }
6065   }
6066 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmin)6067   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
6068     TEST_REQUIRES_ARM_NEON_V8;
6069     GemmMicrokernelTester()
6070       .mr(1)
6071       .nr(8)
6072       .kr(4)
6073       .sr(1)
6074       .m(1)
6075       .n(8)
6076       .k(16)
6077       .qmin(128)
6078       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6079   }
6080 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmax)6081   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
6082     TEST_REQUIRES_ARM_NEON_V8;
6083     GemmMicrokernelTester()
6084       .mr(1)
6085       .nr(8)
6086       .kr(4)
6087       .sr(1)
6088       .m(1)
6089       .n(8)
6090       .k(16)
6091       .qmax(128)
6092       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6093   }
6094 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm)6095   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
6096     TEST_REQUIRES_ARM_NEON_V8;
6097     GemmMicrokernelTester()
6098       .mr(1)
6099       .nr(8)
6100       .kr(4)
6101       .sr(1)
6102       .m(1)
6103       .n(8)
6104       .k(16)
6105       .cm_stride(11)
6106       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6107   }
6108 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109 
6110 
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16)6112   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16) {
6113     TEST_REQUIRES_ARM_NEON;
6114     GemmMicrokernelTester()
6115       .mr(1)
6116       .nr(8)
6117       .kr(4)
6118       .sr(2)
6119       .m(1)
6120       .n(8)
6121       .k(16)
6122       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6123   }
6124 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cn)6125   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cn) {
6126     TEST_REQUIRES_ARM_NEON;
6127     GemmMicrokernelTester()
6128       .mr(1)
6129       .nr(8)
6130       .kr(4)
6131       .sr(2)
6132       .m(1)
6133       .n(8)
6134       .k(16)
6135       .cn_stride(11)
6136       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6137   }
6138 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile)6139   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile) {
6140     TEST_REQUIRES_ARM_NEON;
6141     for (uint32_t n = 1; n <= 8; n++) {
6142       for (uint32_t m = 1; m <= 1; m++) {
6143         GemmMicrokernelTester()
6144           .mr(1)
6145           .nr(8)
6146           .kr(4)
6147           .sr(2)
6148           .m(m)
6149           .n(n)
6150           .k(16)
6151           .iterations(1)
6152           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6153       }
6154     }
6155   }
6156 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_m)6157   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
6158     TEST_REQUIRES_ARM_NEON;
6159     for (uint32_t m = 1; m <= 1; m++) {
6160       GemmMicrokernelTester()
6161         .mr(1)
6162         .nr(8)
6163         .kr(4)
6164         .sr(2)
6165         .m(m)
6166         .n(8)
6167         .k(16)
6168         .iterations(1)
6169         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6170     }
6171   }
6172 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_n)6173   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
6174     TEST_REQUIRES_ARM_NEON;
6175     for (uint32_t n = 1; n <= 8; n++) {
6176       GemmMicrokernelTester()
6177         .mr(1)
6178         .nr(8)
6179         .kr(4)
6180         .sr(2)
6181         .m(1)
6182         .n(n)
6183         .k(16)
6184         .iterations(1)
6185         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6186     }
6187   }
6188 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16)6189   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16) {
6190     TEST_REQUIRES_ARM_NEON;
6191     for (size_t k = 1; k < 16; k++) {
6192       GemmMicrokernelTester()
6193         .mr(1)
6194         .nr(8)
6195         .kr(4)
6196         .sr(2)
6197         .m(1)
6198         .n(8)
6199         .k(k)
6200         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6201     }
6202   }
6203 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16_subtile)6204   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_subtile) {
6205     TEST_REQUIRES_ARM_NEON;
6206     for (size_t k = 1; k < 16; k++) {
6207       for (uint32_t n = 1; n <= 8; n++) {
6208         for (uint32_t m = 1; m <= 1; m++) {
6209           GemmMicrokernelTester()
6210             .mr(1)
6211             .nr(8)
6212             .kr(4)
6213             .sr(2)
6214             .m(m)
6215             .n(n)
6216             .k(k)
6217             .iterations(1)
6218             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6219         }
6220       }
6221     }
6222   }
6223 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16)6224   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16) {
6225     TEST_REQUIRES_ARM_NEON;
6226     for (size_t k = 17; k < 32; k++) {
6227       GemmMicrokernelTester()
6228         .mr(1)
6229         .nr(8)
6230         .kr(4)
6231         .sr(2)
6232         .m(1)
6233         .n(8)
6234         .k(k)
6235         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6236     }
6237   }
6238 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16_subtile)6239   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_subtile) {
6240     TEST_REQUIRES_ARM_NEON;
6241     for (size_t k = 17; k < 32; k++) {
6242       for (uint32_t n = 1; n <= 8; n++) {
6243         for (uint32_t m = 1; m <= 1; m++) {
6244           GemmMicrokernelTester()
6245             .mr(1)
6246             .nr(8)
6247             .kr(4)
6248             .sr(2)
6249             .m(m)
6250             .n(n)
6251             .k(k)
6252             .iterations(1)
6253             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6254         }
6255       }
6256     }
6257   }
6258 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16)6259   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16) {
6260     TEST_REQUIRES_ARM_NEON;
6261     for (size_t k = 32; k <= 160; k += 16) {
6262       GemmMicrokernelTester()
6263         .mr(1)
6264         .nr(8)
6265         .kr(4)
6266         .sr(2)
6267         .m(1)
6268         .n(8)
6269         .k(k)
6270         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6271     }
6272   }
6273 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16_subtile)6274   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_subtile) {
6275     TEST_REQUIRES_ARM_NEON;
6276     for (size_t k = 32; k <= 160; k += 16) {
6277       for (uint32_t n = 1; n <= 8; n++) {
6278         for (uint32_t m = 1; m <= 1; m++) {
6279           GemmMicrokernelTester()
6280             .mr(1)
6281             .nr(8)
6282             .kr(4)
6283             .sr(2)
6284             .m(m)
6285             .n(n)
6286             .k(k)
6287             .iterations(1)
6288             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289         }
6290       }
6291     }
6292   }
6293 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8)6294   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8) {
6295     TEST_REQUIRES_ARM_NEON;
6296     for (uint32_t n = 9; n < 16; n++) {
6297       for (size_t k = 1; k <= 80; k += 17) {
6298         GemmMicrokernelTester()
6299           .mr(1)
6300           .nr(8)
6301           .kr(4)
6302           .sr(2)
6303           .m(1)
6304           .n(n)
6305           .k(k)
6306           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6307       }
6308     }
6309   }
6310 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_strided_cn)6311   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
6312     TEST_REQUIRES_ARM_NEON;
6313     for (uint32_t n = 9; n < 16; n++) {
6314       for (size_t k = 1; k <= 80; k += 17) {
6315         GemmMicrokernelTester()
6316           .mr(1)
6317           .nr(8)
6318           .kr(4)
6319           .sr(2)
6320           .m(1)
6321           .n(n)
6322           .k(k)
6323           .cn_stride(11)
6324           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6325       }
6326     }
6327   }
6328 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_subtile)6329   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_subtile) {
6330     TEST_REQUIRES_ARM_NEON;
6331     for (uint32_t n = 9; n < 16; n++) {
6332       for (size_t k = 1; k <= 80; k += 17) {
6333         for (uint32_t m = 1; m <= 1; m++) {
6334           GemmMicrokernelTester()
6335             .mr(1)
6336             .nr(8)
6337             .kr(4)
6338             .sr(2)
6339             .m(m)
6340             .n(n)
6341             .k(k)
6342             .iterations(1)
6343             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6344         }
6345       }
6346     }
6347   }
6348 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8)6349   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8) {
6350     TEST_REQUIRES_ARM_NEON;
6351     for (uint32_t n = 16; n <= 24; n += 8) {
6352       for (size_t k = 1; k <= 80; k += 17) {
6353         GemmMicrokernelTester()
6354           .mr(1)
6355           .nr(8)
6356           .kr(4)
6357           .sr(2)
6358           .m(1)
6359           .n(n)
6360           .k(k)
6361           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6362       }
6363     }
6364   }
6365 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_strided_cn)6366   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
6367     TEST_REQUIRES_ARM_NEON;
6368     for (uint32_t n = 16; n <= 24; n += 8) {
6369       for (size_t k = 1; k <= 80; k += 17) {
6370         GemmMicrokernelTester()
6371           .mr(1)
6372           .nr(8)
6373           .kr(4)
6374           .sr(2)
6375           .m(1)
6376           .n(n)
6377           .k(k)
6378           .cn_stride(11)
6379           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6380       }
6381     }
6382   }
6383 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_subtile)6384   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_subtile) {
6385     TEST_REQUIRES_ARM_NEON;
6386     for (uint32_t n = 16; n <= 24; n += 8) {
6387       for (size_t k = 1; k <= 80; k += 17) {
6388         for (uint32_t m = 1; m <= 1; m++) {
6389           GemmMicrokernelTester()
6390             .mr(1)
6391             .nr(8)
6392             .kr(4)
6393             .sr(2)
6394             .m(m)
6395             .n(n)
6396             .k(k)
6397             .iterations(1)
6398             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6399         }
6400       }
6401     }
6402   }
6403 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel)6404   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel) {
6405     TEST_REQUIRES_ARM_NEON;
6406     for (size_t k = 1; k <= 80; k += 17) {
6407       GemmMicrokernelTester()
6408         .mr(1)
6409         .nr(8)
6410         .kr(4)
6411         .sr(2)
6412         .m(1)
6413         .n(8)
6414         .k(k)
6415         .ks(3)
6416         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6417     }
6418   }
6419 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel_subtile)6420   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel_subtile) {
6421     TEST_REQUIRES_ARM_NEON;
6422     for (size_t k = 1; k <= 80; k += 17) {
6423       for (uint32_t n = 1; n <= 8; n++) {
6424         for (uint32_t m = 1; m <= 1; m++) {
6425           GemmMicrokernelTester()
6426             .mr(1)
6427             .nr(8)
6428             .kr(4)
6429             .sr(2)
6430             .m(m)
6431             .n(n)
6432             .k(k)
6433             .ks(3)
6434             .iterations(1)
6435             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6436         }
6437       }
6438     }
6439   }
6440 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_small_kernel)6441   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
6442     TEST_REQUIRES_ARM_NEON;
6443     for (uint32_t n = 9; n < 16; n++) {
6444       for (size_t k = 1; k <= 80; k += 17) {
6445         GemmMicrokernelTester()
6446           .mr(1)
6447           .nr(8)
6448           .kr(4)
6449           .sr(2)
6450           .m(1)
6451           .n(n)
6452           .k(k)
6453           .ks(3)
6454           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6455       }
6456     }
6457   }
6458 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_small_kernel)6459   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
6460     TEST_REQUIRES_ARM_NEON;
6461     for (uint32_t n = 16; n <= 24; n += 8) {
6462       for (size_t k = 1; k <= 80; k += 17) {
6463         GemmMicrokernelTester()
6464           .mr(1)
6465           .nr(8)
6466           .kr(4)
6467           .sr(2)
6468           .m(1)
6469           .n(n)
6470           .k(k)
6471           .ks(3)
6472           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6473       }
6474     }
6475   }
6476 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm_subtile)6477   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm_subtile) {
6478     TEST_REQUIRES_ARM_NEON;
6479     for (size_t k = 1; k <= 80; k += 17) {
6480       for (uint32_t n = 1; n <= 8; n++) {
6481         for (uint32_t m = 1; m <= 1; m++) {
6482           GemmMicrokernelTester()
6483             .mr(1)
6484             .nr(8)
6485             .kr(4)
6486             .sr(2)
6487             .m(m)
6488             .n(n)
6489             .k(k)
6490             .cm_stride(11)
6491             .iterations(1)
6492             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6493         }
6494       }
6495     }
6496   }
6497 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,a_offset)6498   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, a_offset) {
6499     TEST_REQUIRES_ARM_NEON;
6500     for (size_t k = 1; k <= 80; k += 17) {
6501       GemmMicrokernelTester()
6502         .mr(1)
6503         .nr(8)
6504         .kr(4)
6505         .sr(2)
6506         .m(1)
6507         .n(8)
6508         .k(k)
6509         .ks(3)
6510         .a_offset(83)
6511         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6512     }
6513   }
6514 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,zero)6515   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, zero) {
6516     TEST_REQUIRES_ARM_NEON;
6517     for (size_t k = 1; k <= 80; k += 17) {
6518       for (uint32_t mz = 0; mz < 1; mz++) {
6519         GemmMicrokernelTester()
6520           .mr(1)
6521           .nr(8)
6522           .kr(4)
6523           .sr(2)
6524           .m(1)
6525           .n(8)
6526           .k(k)
6527           .ks(3)
6528           .a_offset(83)
6529           .zero_index(mz)
6530           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6531       }
6532     }
6533   }
6534 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmin)6535   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmin) {
6536     TEST_REQUIRES_ARM_NEON;
6537     GemmMicrokernelTester()
6538       .mr(1)
6539       .nr(8)
6540       .kr(4)
6541       .sr(2)
6542       .m(1)
6543       .n(8)
6544       .k(16)
6545       .qmin(128)
6546       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6547   }
6548 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmax)6549   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmax) {
6550     TEST_REQUIRES_ARM_NEON;
6551     GemmMicrokernelTester()
6552       .mr(1)
6553       .nr(8)
6554       .kr(4)
6555       .sr(2)
6556       .m(1)
6557       .n(8)
6558       .k(16)
6559       .qmax(128)
6560       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6561   }
6562 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm)6563   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm) {
6564     TEST_REQUIRES_ARM_NEON;
6565     GemmMicrokernelTester()
6566       .mr(1)
6567       .nr(8)
6568       .kr(4)
6569       .sr(2)
6570       .m(1)
6571       .n(8)
6572       .k(16)
6573       .cm_stride(11)
6574       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6575   }
6576 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577 
6578 
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16)6580   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
6581     TEST_REQUIRES_ARM_NEON_V8;
6582     GemmMicrokernelTester()
6583       .mr(1)
6584       .nr(8)
6585       .kr(4)
6586       .sr(2)
6587       .m(1)
6588       .n(8)
6589       .k(16)
6590       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591   }
6592 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cn)6593   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
6594     TEST_REQUIRES_ARM_NEON_V8;
6595     GemmMicrokernelTester()
6596       .mr(1)
6597       .nr(8)
6598       .kr(4)
6599       .sr(2)
6600       .m(1)
6601       .n(8)
6602       .k(16)
6603       .cn_stride(11)
6604       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605   }
6606 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile)6607   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
6608     TEST_REQUIRES_ARM_NEON_V8;
6609     for (uint32_t n = 1; n <= 8; n++) {
6610       for (uint32_t m = 1; m <= 1; m++) {
6611         GemmMicrokernelTester()
6612           .mr(1)
6613           .nr(8)
6614           .kr(4)
6615           .sr(2)
6616           .m(m)
6617           .n(n)
6618           .k(16)
6619           .iterations(1)
6620           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621       }
6622     }
6623   }
6624 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)6625   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
6626     TEST_REQUIRES_ARM_NEON_V8;
6627     for (uint32_t m = 1; m <= 1; m++) {
6628       GemmMicrokernelTester()
6629         .mr(1)
6630         .nr(8)
6631         .kr(4)
6632         .sr(2)
6633         .m(m)
6634         .n(8)
6635         .k(16)
6636         .iterations(1)
6637         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638     }
6639   }
6640 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)6641   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
6642     TEST_REQUIRES_ARM_NEON_V8;
6643     for (uint32_t n = 1; n <= 8; n++) {
6644       GemmMicrokernelTester()
6645         .mr(1)
6646         .nr(8)
6647         .kr(4)
6648         .sr(2)
6649         .m(1)
6650         .n(n)
6651         .k(16)
6652         .iterations(1)
6653         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654     }
6655   }
6656 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16)6657   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
6658     TEST_REQUIRES_ARM_NEON_V8;
6659     for (size_t k = 1; k < 16; k++) {
6660       GemmMicrokernelTester()
6661         .mr(1)
6662         .nr(8)
6663         .kr(4)
6664         .sr(2)
6665         .m(1)
6666         .n(8)
6667         .k(k)
6668         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669     }
6670   }
6671 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_subtile)6672   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
6673     TEST_REQUIRES_ARM_NEON_V8;
6674     for (size_t k = 1; k < 16; k++) {
6675       for (uint32_t n = 1; n <= 8; n++) {
6676         for (uint32_t m = 1; m <= 1; m++) {
6677           GemmMicrokernelTester()
6678             .mr(1)
6679             .nr(8)
6680             .kr(4)
6681             .sr(2)
6682             .m(m)
6683             .n(n)
6684             .k(k)
6685             .iterations(1)
6686             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687         }
6688       }
6689     }
6690   }
6691 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16)6692   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
6693     TEST_REQUIRES_ARM_NEON_V8;
6694     for (size_t k = 17; k < 32; k++) {
6695       GemmMicrokernelTester()
6696         .mr(1)
6697         .nr(8)
6698         .kr(4)
6699         .sr(2)
6700         .m(1)
6701         .n(8)
6702         .k(k)
6703         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704     }
6705   }
6706 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_subtile)6707   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
6708     TEST_REQUIRES_ARM_NEON_V8;
6709     for (size_t k = 17; k < 32; k++) {
6710       for (uint32_t n = 1; n <= 8; n++) {
6711         for (uint32_t m = 1; m <= 1; m++) {
6712           GemmMicrokernelTester()
6713             .mr(1)
6714             .nr(8)
6715             .kr(4)
6716             .sr(2)
6717             .m(m)
6718             .n(n)
6719             .k(k)
6720             .iterations(1)
6721             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722         }
6723       }
6724     }
6725   }
6726 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16)6727   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
6728     TEST_REQUIRES_ARM_NEON_V8;
6729     for (size_t k = 32; k <= 160; k += 16) {
6730       GemmMicrokernelTester()
6731         .mr(1)
6732         .nr(8)
6733         .kr(4)
6734         .sr(2)
6735         .m(1)
6736         .n(8)
6737         .k(k)
6738         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739     }
6740   }
6741 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_subtile)6742   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
6743     TEST_REQUIRES_ARM_NEON_V8;
6744     for (size_t k = 32; k <= 160; k += 16) {
6745       for (uint32_t n = 1; n <= 8; n++) {
6746         for (uint32_t m = 1; m <= 1; m++) {
6747           GemmMicrokernelTester()
6748             .mr(1)
6749             .nr(8)
6750             .kr(4)
6751             .sr(2)
6752             .m(m)
6753             .n(n)
6754             .k(k)
6755             .iterations(1)
6756             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757         }
6758       }
6759     }
6760   }
6761 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8)6762   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
6763     TEST_REQUIRES_ARM_NEON_V8;
6764     for (uint32_t n = 9; n < 16; n++) {
6765       for (size_t k = 1; k <= 80; k += 17) {
6766         GemmMicrokernelTester()
6767           .mr(1)
6768           .nr(8)
6769           .kr(4)
6770           .sr(2)
6771           .m(1)
6772           .n(n)
6773           .k(k)
6774           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775       }
6776     }
6777   }
6778 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)6779   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
6780     TEST_REQUIRES_ARM_NEON_V8;
6781     for (uint32_t n = 9; n < 16; n++) {
6782       for (size_t k = 1; k <= 80; k += 17) {
6783         GemmMicrokernelTester()
6784           .mr(1)
6785           .nr(8)
6786           .kr(4)
6787           .sr(2)
6788           .m(1)
6789           .n(n)
6790           .k(k)
6791           .cn_stride(11)
6792           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793       }
6794     }
6795   }
6796 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_subtile)6797   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
6798     TEST_REQUIRES_ARM_NEON_V8;
6799     for (uint32_t n = 9; n < 16; n++) {
6800       for (size_t k = 1; k <= 80; k += 17) {
6801         for (uint32_t m = 1; m <= 1; m++) {
6802           GemmMicrokernelTester()
6803             .mr(1)
6804             .nr(8)
6805             .kr(4)
6806             .sr(2)
6807             .m(m)
6808             .n(n)
6809             .k(k)
6810             .iterations(1)
6811             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812         }
6813       }
6814     }
6815   }
6816 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8)6817   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
6818     TEST_REQUIRES_ARM_NEON_V8;
6819     for (uint32_t n = 16; n <= 24; n += 8) {
6820       for (size_t k = 1; k <= 80; k += 17) {
6821         GemmMicrokernelTester()
6822           .mr(1)
6823           .nr(8)
6824           .kr(4)
6825           .sr(2)
6826           .m(1)
6827           .n(n)
6828           .k(k)
6829           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830       }
6831     }
6832   }
6833 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)6834   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
6835     TEST_REQUIRES_ARM_NEON_V8;
6836     for (uint32_t n = 16; n <= 24; n += 8) {
6837       for (size_t k = 1; k <= 80; k += 17) {
6838         GemmMicrokernelTester()
6839           .mr(1)
6840           .nr(8)
6841           .kr(4)
6842           .sr(2)
6843           .m(1)
6844           .n(n)
6845           .k(k)
6846           .cn_stride(11)
6847           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848       }
6849     }
6850   }
6851 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_subtile)6852   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
6853     TEST_REQUIRES_ARM_NEON_V8;
6854     for (uint32_t n = 16; n <= 24; n += 8) {
6855       for (size_t k = 1; k <= 80; k += 17) {
6856         for (uint32_t m = 1; m <= 1; m++) {
6857           GemmMicrokernelTester()
6858             .mr(1)
6859             .nr(8)
6860             .kr(4)
6861             .sr(2)
6862             .m(m)
6863             .n(n)
6864             .k(k)
6865             .iterations(1)
6866             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867         }
6868       }
6869     }
6870   }
6871 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel)6872   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel) {
6873     TEST_REQUIRES_ARM_NEON_V8;
6874     for (size_t k = 1; k <= 80; k += 17) {
6875       GemmMicrokernelTester()
6876         .mr(1)
6877         .nr(8)
6878         .kr(4)
6879         .sr(2)
6880         .m(1)
6881         .n(8)
6882         .k(k)
6883         .ks(3)
6884         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885     }
6886   }
6887 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel_subtile)6888   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
6889     TEST_REQUIRES_ARM_NEON_V8;
6890     for (size_t k = 1; k <= 80; k += 17) {
6891       for (uint32_t n = 1; n <= 8; n++) {
6892         for (uint32_t m = 1; m <= 1; m++) {
6893           GemmMicrokernelTester()
6894             .mr(1)
6895             .nr(8)
6896             .kr(4)
6897             .sr(2)
6898             .m(m)
6899             .n(n)
6900             .k(k)
6901             .ks(3)
6902             .iterations(1)
6903             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904         }
6905       }
6906     }
6907   }
6908 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)6909   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
6910     TEST_REQUIRES_ARM_NEON_V8;
6911     for (uint32_t n = 9; n < 16; n++) {
6912       for (size_t k = 1; k <= 80; k += 17) {
6913         GemmMicrokernelTester()
6914           .mr(1)
6915           .nr(8)
6916           .kr(4)
6917           .sr(2)
6918           .m(1)
6919           .n(n)
6920           .k(k)
6921           .ks(3)
6922           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923       }
6924     }
6925   }
6926 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)6927   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
6928     TEST_REQUIRES_ARM_NEON_V8;
6929     for (uint32_t n = 16; n <= 24; n += 8) {
6930       for (size_t k = 1; k <= 80; k += 17) {
6931         GemmMicrokernelTester()
6932           .mr(1)
6933           .nr(8)
6934           .kr(4)
6935           .sr(2)
6936           .m(1)
6937           .n(n)
6938           .k(k)
6939           .ks(3)
6940           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941       }
6942     }
6943   }
6944 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm_subtile)6945   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
6946     TEST_REQUIRES_ARM_NEON_V8;
6947     for (size_t k = 1; k <= 80; k += 17) {
6948       for (uint32_t n = 1; n <= 8; n++) {
6949         for (uint32_t m = 1; m <= 1; m++) {
6950           GemmMicrokernelTester()
6951             .mr(1)
6952             .nr(8)
6953             .kr(4)
6954             .sr(2)
6955             .m(m)
6956             .n(n)
6957             .k(k)
6958             .cm_stride(11)
6959             .iterations(1)
6960             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961         }
6962       }
6963     }
6964   }
6965 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,a_offset)6966   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, a_offset) {
6967     TEST_REQUIRES_ARM_NEON_V8;
6968     for (size_t k = 1; k <= 80; k += 17) {
6969       GemmMicrokernelTester()
6970         .mr(1)
6971         .nr(8)
6972         .kr(4)
6973         .sr(2)
6974         .m(1)
6975         .n(8)
6976         .k(k)
6977         .ks(3)
6978         .a_offset(83)
6979         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980     }
6981   }
6982 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,zero)6983   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, zero) {
6984     TEST_REQUIRES_ARM_NEON_V8;
6985     for (size_t k = 1; k <= 80; k += 17) {
6986       for (uint32_t mz = 0; mz < 1; mz++) {
6987         GemmMicrokernelTester()
6988           .mr(1)
6989           .nr(8)
6990           .kr(4)
6991           .sr(2)
6992           .m(1)
6993           .n(8)
6994           .k(k)
6995           .ks(3)
6996           .a_offset(83)
6997           .zero_index(mz)
6998           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999       }
7000     }
7001   }
7002 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmin)7003   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
7004     TEST_REQUIRES_ARM_NEON_V8;
7005     GemmMicrokernelTester()
7006       .mr(1)
7007       .nr(8)
7008       .kr(4)
7009       .sr(2)
7010       .m(1)
7011       .n(8)
7012       .k(16)
7013       .qmin(128)
7014       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015   }
7016 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmax)7017   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
7018     TEST_REQUIRES_ARM_NEON_V8;
7019     GemmMicrokernelTester()
7020       .mr(1)
7021       .nr(8)
7022       .kr(4)
7023       .sr(2)
7024       .m(1)
7025       .n(8)
7026       .k(16)
7027       .qmax(128)
7028       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029   }
7030 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm)7031   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
7032     TEST_REQUIRES_ARM_NEON_V8;
7033     GemmMicrokernelTester()
7034       .mr(1)
7035       .nr(8)
7036       .kr(4)
7037       .sr(2)
7038       .m(1)
7039       .n(8)
7040       .k(16)
7041       .cm_stride(11)
7042       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043   }
7044 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045 
7046 
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16)7048   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16) {
7049     TEST_REQUIRES_ARM_NEON;
7050     GemmMicrokernelTester()
7051       .mr(2)
7052       .nr(8)
7053       .kr(2)
7054       .sr(1)
7055       .m(2)
7056       .n(8)
7057       .k(16)
7058       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7059   }
7060 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cn)7061   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cn) {
7062     TEST_REQUIRES_ARM_NEON;
7063     GemmMicrokernelTester()
7064       .mr(2)
7065       .nr(8)
7066       .kr(2)
7067       .sr(1)
7068       .m(2)
7069       .n(8)
7070       .k(16)
7071       .cn_stride(11)
7072       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7073   }
7074 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile)7075   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
7076     TEST_REQUIRES_ARM_NEON;
7077     for (uint32_t n = 1; n <= 8; n++) {
7078       for (uint32_t m = 1; m <= 2; m++) {
7079         GemmMicrokernelTester()
7080           .mr(2)
7081           .nr(8)
7082           .kr(2)
7083           .sr(1)
7084           .m(m)
7085           .n(n)
7086           .k(16)
7087           .iterations(1)
7088           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7089       }
7090     }
7091   }
7092 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)7093   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
7094     TEST_REQUIRES_ARM_NEON;
7095     for (uint32_t m = 1; m <= 2; m++) {
7096       GemmMicrokernelTester()
7097         .mr(2)
7098         .nr(8)
7099         .kr(2)
7100         .sr(1)
7101         .m(m)
7102         .n(8)
7103         .k(16)
7104         .iterations(1)
7105         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7106     }
7107   }
7108 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)7109   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
7110     TEST_REQUIRES_ARM_NEON;
7111     for (uint32_t n = 1; n <= 8; n++) {
7112       GemmMicrokernelTester()
7113         .mr(2)
7114         .nr(8)
7115         .kr(2)
7116         .sr(1)
7117         .m(2)
7118         .n(n)
7119         .k(16)
7120         .iterations(1)
7121         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7122     }
7123   }
7124 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16)7125   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16) {
7126     TEST_REQUIRES_ARM_NEON;
7127     for (size_t k = 1; k < 16; k++) {
7128       GemmMicrokernelTester()
7129         .mr(2)
7130         .nr(8)
7131         .kr(2)
7132         .sr(1)
7133         .m(2)
7134         .n(8)
7135         .k(k)
7136         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7137     }
7138   }
7139 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16_subtile)7140   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
7141     TEST_REQUIRES_ARM_NEON;
7142     for (size_t k = 1; k < 16; k++) {
7143       for (uint32_t n = 1; n <= 8; n++) {
7144         for (uint32_t m = 1; m <= 2; m++) {
7145           GemmMicrokernelTester()
7146             .mr(2)
7147             .nr(8)
7148             .kr(2)
7149             .sr(1)
7150             .m(m)
7151             .n(n)
7152             .k(k)
7153             .iterations(1)
7154             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7155         }
7156       }
7157     }
7158   }
7159 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16)7160   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16) {
7161     TEST_REQUIRES_ARM_NEON;
7162     for (size_t k = 17; k < 32; k++) {
7163       GemmMicrokernelTester()
7164         .mr(2)
7165         .nr(8)
7166         .kr(2)
7167         .sr(1)
7168         .m(2)
7169         .n(8)
7170         .k(k)
7171         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7172     }
7173   }
7174 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16_subtile)7175   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
7176     TEST_REQUIRES_ARM_NEON;
7177     for (size_t k = 17; k < 32; k++) {
7178       for (uint32_t n = 1; n <= 8; n++) {
7179         for (uint32_t m = 1; m <= 2; m++) {
7180           GemmMicrokernelTester()
7181             .mr(2)
7182             .nr(8)
7183             .kr(2)
7184             .sr(1)
7185             .m(m)
7186             .n(n)
7187             .k(k)
7188             .iterations(1)
7189             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7190         }
7191       }
7192     }
7193   }
7194 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16)7195   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16) {
7196     TEST_REQUIRES_ARM_NEON;
7197     for (size_t k = 32; k <= 160; k += 16) {
7198       GemmMicrokernelTester()
7199         .mr(2)
7200         .nr(8)
7201         .kr(2)
7202         .sr(1)
7203         .m(2)
7204         .n(8)
7205         .k(k)
7206         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7207     }
7208   }
7209 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16_subtile)7210   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
7211     TEST_REQUIRES_ARM_NEON;
7212     for (size_t k = 32; k <= 160; k += 16) {
7213       for (uint32_t n = 1; n <= 8; n++) {
7214         for (uint32_t m = 1; m <= 2; m++) {
7215           GemmMicrokernelTester()
7216             .mr(2)
7217             .nr(8)
7218             .kr(2)
7219             .sr(1)
7220             .m(m)
7221             .n(n)
7222             .k(k)
7223             .iterations(1)
7224             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7225         }
7226       }
7227     }
7228   }
7229 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8)7230   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8) {
7231     TEST_REQUIRES_ARM_NEON;
7232     for (uint32_t n = 9; n < 16; n++) {
7233       for (size_t k = 1; k <= 80; k += 17) {
7234         GemmMicrokernelTester()
7235           .mr(2)
7236           .nr(8)
7237           .kr(2)
7238           .sr(1)
7239           .m(2)
7240           .n(n)
7241           .k(k)
7242           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7243       }
7244     }
7245   }
7246 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)7247   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
7248     TEST_REQUIRES_ARM_NEON;
7249     for (uint32_t n = 9; n < 16; n++) {
7250       for (size_t k = 1; k <= 80; k += 17) {
7251         GemmMicrokernelTester()
7252           .mr(2)
7253           .nr(8)
7254           .kr(2)
7255           .sr(1)
7256           .m(2)
7257           .n(n)
7258           .k(k)
7259           .cn_stride(11)
7260           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7261       }
7262     }
7263   }
7264 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_subtile)7265   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
7266     TEST_REQUIRES_ARM_NEON;
7267     for (uint32_t n = 9; n < 16; n++) {
7268       for (size_t k = 1; k <= 80; k += 17) {
7269         for (uint32_t m = 1; m <= 2; m++) {
7270           GemmMicrokernelTester()
7271             .mr(2)
7272             .nr(8)
7273             .kr(2)
7274             .sr(1)
7275             .m(m)
7276             .n(n)
7277             .k(k)
7278             .iterations(1)
7279             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7280         }
7281       }
7282     }
7283   }
7284 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8)7285   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8) {
7286     TEST_REQUIRES_ARM_NEON;
7287     for (uint32_t n = 16; n <= 24; n += 8) {
7288       for (size_t k = 1; k <= 80; k += 17) {
7289         GemmMicrokernelTester()
7290           .mr(2)
7291           .nr(8)
7292           .kr(2)
7293           .sr(1)
7294           .m(2)
7295           .n(n)
7296           .k(k)
7297           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7298       }
7299     }
7300   }
7301 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)7302   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
7303     TEST_REQUIRES_ARM_NEON;
7304     for (uint32_t n = 16; n <= 24; n += 8) {
7305       for (size_t k = 1; k <= 80; k += 17) {
7306         GemmMicrokernelTester()
7307           .mr(2)
7308           .nr(8)
7309           .kr(2)
7310           .sr(1)
7311           .m(2)
7312           .n(n)
7313           .k(k)
7314           .cn_stride(11)
7315           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7316       }
7317     }
7318   }
7319 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_subtile)7320   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
7321     TEST_REQUIRES_ARM_NEON;
7322     for (uint32_t n = 16; n <= 24; n += 8) {
7323       for (size_t k = 1; k <= 80; k += 17) {
7324         for (uint32_t m = 1; m <= 2; m++) {
7325           GemmMicrokernelTester()
7326             .mr(2)
7327             .nr(8)
7328             .kr(2)
7329             .sr(1)
7330             .m(m)
7331             .n(n)
7332             .k(k)
7333             .iterations(1)
7334             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7335         }
7336       }
7337     }
7338   }
7339 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel)7340   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel) {
7341     TEST_REQUIRES_ARM_NEON;
7342     for (size_t k = 1; k <= 80; k += 17) {
7343       GemmMicrokernelTester()
7344         .mr(2)
7345         .nr(8)
7346         .kr(2)
7347         .sr(1)
7348         .m(2)
7349         .n(8)
7350         .k(k)
7351         .ks(3)
7352         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7353     }
7354   }
7355 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel_subtile)7356   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
7357     TEST_REQUIRES_ARM_NEON;
7358     for (size_t k = 1; k <= 80; k += 17) {
7359       for (uint32_t n = 1; n <= 8; n++) {
7360         for (uint32_t m = 1; m <= 2; m++) {
7361           GemmMicrokernelTester()
7362             .mr(2)
7363             .nr(8)
7364             .kr(2)
7365             .sr(1)
7366             .m(m)
7367             .n(n)
7368             .k(k)
7369             .ks(3)
7370             .iterations(1)
7371             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7372         }
7373       }
7374     }
7375   }
7376 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)7377   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
7378     TEST_REQUIRES_ARM_NEON;
7379     for (uint32_t n = 9; n < 16; n++) {
7380       for (size_t k = 1; k <= 80; k += 17) {
7381         GemmMicrokernelTester()
7382           .mr(2)
7383           .nr(8)
7384           .kr(2)
7385           .sr(1)
7386           .m(2)
7387           .n(n)
7388           .k(k)
7389           .ks(3)
7390           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7391       }
7392     }
7393   }
7394 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)7395   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
7396     TEST_REQUIRES_ARM_NEON;
7397     for (uint32_t n = 16; n <= 24; n += 8) {
7398       for (size_t k = 1; k <= 80; k += 17) {
7399         GemmMicrokernelTester()
7400           .mr(2)
7401           .nr(8)
7402           .kr(2)
7403           .sr(1)
7404           .m(2)
7405           .n(n)
7406           .k(k)
7407           .ks(3)
7408           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7409       }
7410     }
7411   }
7412 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm_subtile)7413   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
7414     TEST_REQUIRES_ARM_NEON;
7415     for (size_t k = 1; k <= 80; k += 17) {
7416       for (uint32_t n = 1; n <= 8; n++) {
7417         for (uint32_t m = 1; m <= 2; m++) {
7418           GemmMicrokernelTester()
7419             .mr(2)
7420             .nr(8)
7421             .kr(2)
7422             .sr(1)
7423             .m(m)
7424             .n(n)
7425             .k(k)
7426             .cm_stride(11)
7427             .iterations(1)
7428             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7429         }
7430       }
7431     }
7432   }
7433 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,a_offset)7434   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, a_offset) {
7435     TEST_REQUIRES_ARM_NEON;
7436     for (size_t k = 1; k <= 80; k += 17) {
7437       GemmMicrokernelTester()
7438         .mr(2)
7439         .nr(8)
7440         .kr(2)
7441         .sr(1)
7442         .m(2)
7443         .n(8)
7444         .k(k)
7445         .ks(3)
7446         .a_offset(163)
7447         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7448     }
7449   }
7450 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,zero)7451   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, zero) {
7452     TEST_REQUIRES_ARM_NEON;
7453     for (size_t k = 1; k <= 80; k += 17) {
7454       for (uint32_t mz = 0; mz < 2; mz++) {
7455         GemmMicrokernelTester()
7456           .mr(2)
7457           .nr(8)
7458           .kr(2)
7459           .sr(1)
7460           .m(2)
7461           .n(8)
7462           .k(k)
7463           .ks(3)
7464           .a_offset(163)
7465           .zero_index(mz)
7466           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7467       }
7468     }
7469   }
7470 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmin)7471   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmin) {
7472     TEST_REQUIRES_ARM_NEON;
7473     GemmMicrokernelTester()
7474       .mr(2)
7475       .nr(8)
7476       .kr(2)
7477       .sr(1)
7478       .m(2)
7479       .n(8)
7480       .k(16)
7481       .qmin(128)
7482       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7483   }
7484 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmax)7485   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmax) {
7486     TEST_REQUIRES_ARM_NEON;
7487     GemmMicrokernelTester()
7488       .mr(2)
7489       .nr(8)
7490       .kr(2)
7491       .sr(1)
7492       .m(2)
7493       .n(8)
7494       .k(16)
7495       .qmax(128)
7496       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7497   }
7498 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm)7499   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm) {
7500     TEST_REQUIRES_ARM_NEON;
7501     GemmMicrokernelTester()
7502       .mr(2)
7503       .nr(8)
7504       .kr(2)
7505       .sr(1)
7506       .m(2)
7507       .n(8)
7508       .k(16)
7509       .cm_stride(11)
7510       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7511   }
7512 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513 
7514 
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16)7516   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
7517     TEST_REQUIRES_ARM_NEON;
7518     GemmMicrokernelTester()
7519       .mr(2)
7520       .nr(8)
7521       .kr(2)
7522       .sr(1)
7523       .m(2)
7524       .n(8)
7525       .k(16)
7526       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527   }
7528 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cn)7529   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
7530     TEST_REQUIRES_ARM_NEON;
7531     GemmMicrokernelTester()
7532       .mr(2)
7533       .nr(8)
7534       .kr(2)
7535       .sr(1)
7536       .m(2)
7537       .n(8)
7538       .k(16)
7539       .cn_stride(11)
7540       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541   }
7542 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)7543   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
7544     TEST_REQUIRES_ARM_NEON;
7545     for (uint32_t n = 1; n <= 8; n++) {
7546       for (uint32_t m = 1; m <= 2; m++) {
7547         GemmMicrokernelTester()
7548           .mr(2)
7549           .nr(8)
7550           .kr(2)
7551           .sr(1)
7552           .m(m)
7553           .n(n)
7554           .k(16)
7555           .iterations(1)
7556           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557       }
7558     }
7559   }
7560 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)7561   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
7562     TEST_REQUIRES_ARM_NEON;
7563     for (uint32_t m = 1; m <= 2; m++) {
7564       GemmMicrokernelTester()
7565         .mr(2)
7566         .nr(8)
7567         .kr(2)
7568         .sr(1)
7569         .m(m)
7570         .n(8)
7571         .k(16)
7572         .iterations(1)
7573         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574     }
7575   }
7576 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)7577   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
7578     TEST_REQUIRES_ARM_NEON;
7579     for (uint32_t n = 1; n <= 8; n++) {
7580       GemmMicrokernelTester()
7581         .mr(2)
7582         .nr(8)
7583         .kr(2)
7584         .sr(1)
7585         .m(2)
7586         .n(n)
7587         .k(16)
7588         .iterations(1)
7589         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590     }
7591   }
7592 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16)7593   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
7594     TEST_REQUIRES_ARM_NEON;
7595     for (size_t k = 1; k < 16; k++) {
7596       GemmMicrokernelTester()
7597         .mr(2)
7598         .nr(8)
7599         .kr(2)
7600         .sr(1)
7601         .m(2)
7602         .n(8)
7603         .k(k)
7604         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605     }
7606   }
7607 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)7608   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
7609     TEST_REQUIRES_ARM_NEON;
7610     for (size_t k = 1; k < 16; k++) {
7611       for (uint32_t n = 1; n <= 8; n++) {
7612         for (uint32_t m = 1; m <= 2; m++) {
7613           GemmMicrokernelTester()
7614             .mr(2)
7615             .nr(8)
7616             .kr(2)
7617             .sr(1)
7618             .m(m)
7619             .n(n)
7620             .k(k)
7621             .iterations(1)
7622             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623         }
7624       }
7625     }
7626   }
7627 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16)7628   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
7629     TEST_REQUIRES_ARM_NEON;
7630     for (size_t k = 17; k < 32; k++) {
7631       GemmMicrokernelTester()
7632         .mr(2)
7633         .nr(8)
7634         .kr(2)
7635         .sr(1)
7636         .m(2)
7637         .n(8)
7638         .k(k)
7639         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640     }
7641   }
7642 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)7643   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
7644     TEST_REQUIRES_ARM_NEON;
7645     for (size_t k = 17; k < 32; k++) {
7646       for (uint32_t n = 1; n <= 8; n++) {
7647         for (uint32_t m = 1; m <= 2; m++) {
7648           GemmMicrokernelTester()
7649             .mr(2)
7650             .nr(8)
7651             .kr(2)
7652             .sr(1)
7653             .m(m)
7654             .n(n)
7655             .k(k)
7656             .iterations(1)
7657             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658         }
7659       }
7660     }
7661   }
7662 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16)7663   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
7664     TEST_REQUIRES_ARM_NEON;
7665     for (size_t k = 32; k <= 160; k += 16) {
7666       GemmMicrokernelTester()
7667         .mr(2)
7668         .nr(8)
7669         .kr(2)
7670         .sr(1)
7671         .m(2)
7672         .n(8)
7673         .k(k)
7674         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675     }
7676   }
7677 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16_subtile)7678   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
7679     TEST_REQUIRES_ARM_NEON;
7680     for (size_t k = 32; k <= 160; k += 16) {
7681       for (uint32_t n = 1; n <= 8; n++) {
7682         for (uint32_t m = 1; m <= 2; m++) {
7683           GemmMicrokernelTester()
7684             .mr(2)
7685             .nr(8)
7686             .kr(2)
7687             .sr(1)
7688             .m(m)
7689             .n(n)
7690             .k(k)
7691             .iterations(1)
7692             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693         }
7694       }
7695     }
7696   }
7697 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8)7698   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
7699     TEST_REQUIRES_ARM_NEON;
7700     for (uint32_t n = 9; n < 16; n++) {
7701       for (size_t k = 1; k <= 80; k += 17) {
7702         GemmMicrokernelTester()
7703           .mr(2)
7704           .nr(8)
7705           .kr(2)
7706           .sr(1)
7707           .m(2)
7708           .n(n)
7709           .k(k)
7710           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711       }
7712     }
7713   }
7714 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)7715   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
7716     TEST_REQUIRES_ARM_NEON;
7717     for (uint32_t n = 9; n < 16; n++) {
7718       for (size_t k = 1; k <= 80; k += 17) {
7719         GemmMicrokernelTester()
7720           .mr(2)
7721           .nr(8)
7722           .kr(2)
7723           .sr(1)
7724           .m(2)
7725           .n(n)
7726           .k(k)
7727           .cn_stride(11)
7728           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729       }
7730     }
7731   }
7732 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)7733   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
7734     TEST_REQUIRES_ARM_NEON;
7735     for (uint32_t n = 9; n < 16; n++) {
7736       for (size_t k = 1; k <= 80; k += 17) {
7737         for (uint32_t m = 1; m <= 2; m++) {
7738           GemmMicrokernelTester()
7739             .mr(2)
7740             .nr(8)
7741             .kr(2)
7742             .sr(1)
7743             .m(m)
7744             .n(n)
7745             .k(k)
7746             .iterations(1)
7747             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748         }
7749       }
7750     }
7751   }
7752 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8)7753   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
7754     TEST_REQUIRES_ARM_NEON;
7755     for (uint32_t n = 16; n <= 24; n += 8) {
7756       for (size_t k = 1; k <= 80; k += 17) {
7757         GemmMicrokernelTester()
7758           .mr(2)
7759           .nr(8)
7760           .kr(2)
7761           .sr(1)
7762           .m(2)
7763           .n(n)
7764           .k(k)
7765           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766       }
7767     }
7768   }
7769 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)7770   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
7771     TEST_REQUIRES_ARM_NEON;
7772     for (uint32_t n = 16; n <= 24; n += 8) {
7773       for (size_t k = 1; k <= 80; k += 17) {
7774         GemmMicrokernelTester()
7775           .mr(2)
7776           .nr(8)
7777           .kr(2)
7778           .sr(1)
7779           .m(2)
7780           .n(n)
7781           .k(k)
7782           .cn_stride(11)
7783           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784       }
7785     }
7786   }
7787 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_subtile)7788   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
7789     TEST_REQUIRES_ARM_NEON;
7790     for (uint32_t n = 16; n <= 24; n += 8) {
7791       for (size_t k = 1; k <= 80; k += 17) {
7792         for (uint32_t m = 1; m <= 2; m++) {
7793           GemmMicrokernelTester()
7794             .mr(2)
7795             .nr(8)
7796             .kr(2)
7797             .sr(1)
7798             .m(m)
7799             .n(n)
7800             .k(k)
7801             .iterations(1)
7802             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803         }
7804       }
7805     }
7806   }
7807 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel)7808   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel) {
7809     TEST_REQUIRES_ARM_NEON;
7810     for (size_t k = 1; k <= 80; k += 17) {
7811       GemmMicrokernelTester()
7812         .mr(2)
7813         .nr(8)
7814         .kr(2)
7815         .sr(1)
7816         .m(2)
7817         .n(8)
7818         .k(k)
7819         .ks(3)
7820         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821     }
7822   }
7823 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel_subtile)7824   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
7825     TEST_REQUIRES_ARM_NEON;
7826     for (size_t k = 1; k <= 80; k += 17) {
7827       for (uint32_t n = 1; n <= 8; n++) {
7828         for (uint32_t m = 1; m <= 2; m++) {
7829           GemmMicrokernelTester()
7830             .mr(2)
7831             .nr(8)
7832             .kr(2)
7833             .sr(1)
7834             .m(m)
7835             .n(n)
7836             .k(k)
7837             .ks(3)
7838             .iterations(1)
7839             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840         }
7841       }
7842     }
7843   }
7844 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)7845   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
7846     TEST_REQUIRES_ARM_NEON;
7847     for (uint32_t n = 9; n < 16; n++) {
7848       for (size_t k = 1; k <= 80; k += 17) {
7849         GemmMicrokernelTester()
7850           .mr(2)
7851           .nr(8)
7852           .kr(2)
7853           .sr(1)
7854           .m(2)
7855           .n(n)
7856           .k(k)
7857           .ks(3)
7858           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859       }
7860     }
7861   }
7862 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)7863   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
7864     TEST_REQUIRES_ARM_NEON;
7865     for (uint32_t n = 16; n <= 24; n += 8) {
7866       for (size_t k = 1; k <= 80; k += 17) {
7867         GemmMicrokernelTester()
7868           .mr(2)
7869           .nr(8)
7870           .kr(2)
7871           .sr(1)
7872           .m(2)
7873           .n(n)
7874           .k(k)
7875           .ks(3)
7876           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877       }
7878     }
7879   }
7880 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm_subtile)7881   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
7882     TEST_REQUIRES_ARM_NEON;
7883     for (size_t k = 1; k <= 80; k += 17) {
7884       for (uint32_t n = 1; n <= 8; n++) {
7885         for (uint32_t m = 1; m <= 2; m++) {
7886           GemmMicrokernelTester()
7887             .mr(2)
7888             .nr(8)
7889             .kr(2)
7890             .sr(1)
7891             .m(m)
7892             .n(n)
7893             .k(k)
7894             .cm_stride(11)
7895             .iterations(1)
7896             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897         }
7898       }
7899     }
7900   }
7901 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,a_offset)7902   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, a_offset) {
7903     TEST_REQUIRES_ARM_NEON;
7904     for (size_t k = 1; k <= 80; k += 17) {
7905       GemmMicrokernelTester()
7906         .mr(2)
7907         .nr(8)
7908         .kr(2)
7909         .sr(1)
7910         .m(2)
7911         .n(8)
7912         .k(k)
7913         .ks(3)
7914         .a_offset(163)
7915         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916     }
7917   }
7918 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,zero)7919   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, zero) {
7920     TEST_REQUIRES_ARM_NEON;
7921     for (size_t k = 1; k <= 80; k += 17) {
7922       for (uint32_t mz = 0; mz < 2; mz++) {
7923         GemmMicrokernelTester()
7924           .mr(2)
7925           .nr(8)
7926           .kr(2)
7927           .sr(1)
7928           .m(2)
7929           .n(8)
7930           .k(k)
7931           .ks(3)
7932           .a_offset(163)
7933           .zero_index(mz)
7934           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935       }
7936     }
7937   }
7938 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmin)7939   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
7940     TEST_REQUIRES_ARM_NEON;
7941     GemmMicrokernelTester()
7942       .mr(2)
7943       .nr(8)
7944       .kr(2)
7945       .sr(1)
7946       .m(2)
7947       .n(8)
7948       .k(16)
7949       .qmin(128)
7950       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951   }
7952 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmax)7953   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
7954     TEST_REQUIRES_ARM_NEON;
7955     GemmMicrokernelTester()
7956       .mr(2)
7957       .nr(8)
7958       .kr(2)
7959       .sr(1)
7960       .m(2)
7961       .n(8)
7962       .k(16)
7963       .qmax(128)
7964       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965   }
7966 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm)7967   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
7968     TEST_REQUIRES_ARM_NEON;
7969     GemmMicrokernelTester()
7970       .mr(2)
7971       .nr(8)
7972       .kr(2)
7973       .sr(1)
7974       .m(2)
7975       .n(8)
7976       .k(16)
7977       .cm_stride(11)
7978       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979   }
7980 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981 
7982 
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16)7984   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
7985     TEST_REQUIRES_ARM_NEON_V8;
7986     GemmMicrokernelTester()
7987       .mr(2)
7988       .nr(8)
7989       .kr(2)
7990       .sr(1)
7991       .m(2)
7992       .n(8)
7993       .k(16)
7994       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995   }
7996 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cn)7997   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
7998     TEST_REQUIRES_ARM_NEON_V8;
7999     GemmMicrokernelTester()
8000       .mr(2)
8001       .nr(8)
8002       .kr(2)
8003       .sr(1)
8004       .m(2)
8005       .n(8)
8006       .k(16)
8007       .cn_stride(11)
8008       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009   }
8010 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)8011   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8012     TEST_REQUIRES_ARM_NEON_V8;
8013     for (uint32_t n = 1; n <= 8; n++) {
8014       for (uint32_t m = 1; m <= 2; m++) {
8015         GemmMicrokernelTester()
8016           .mr(2)
8017           .nr(8)
8018           .kr(2)
8019           .sr(1)
8020           .m(m)
8021           .n(n)
8022           .k(16)
8023           .iterations(1)
8024           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025       }
8026     }
8027   }
8028 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8029   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8030     TEST_REQUIRES_ARM_NEON_V8;
8031     for (uint32_t m = 1; m <= 2; m++) {
8032       GemmMicrokernelTester()
8033         .mr(2)
8034         .nr(8)
8035         .kr(2)
8036         .sr(1)
8037         .m(m)
8038         .n(8)
8039         .k(16)
8040         .iterations(1)
8041         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042     }
8043   }
8044 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8045   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8046     TEST_REQUIRES_ARM_NEON_V8;
8047     for (uint32_t n = 1; n <= 8; n++) {
8048       GemmMicrokernelTester()
8049         .mr(2)
8050         .nr(8)
8051         .kr(2)
8052         .sr(1)
8053         .m(2)
8054         .n(n)
8055         .k(16)
8056         .iterations(1)
8057         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058     }
8059   }
8060 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16)8061   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
8062     TEST_REQUIRES_ARM_NEON_V8;
8063     for (size_t k = 1; k < 16; k++) {
8064       GemmMicrokernelTester()
8065         .mr(2)
8066         .nr(8)
8067         .kr(2)
8068         .sr(1)
8069         .m(2)
8070         .n(8)
8071         .k(k)
8072         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073     }
8074   }
8075 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)8076   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
8077     TEST_REQUIRES_ARM_NEON_V8;
8078     for (size_t k = 1; k < 16; k++) {
8079       for (uint32_t n = 1; n <= 8; n++) {
8080         for (uint32_t m = 1; m <= 2; m++) {
8081           GemmMicrokernelTester()
8082             .mr(2)
8083             .nr(8)
8084             .kr(2)
8085             .sr(1)
8086             .m(m)
8087             .n(n)
8088             .k(k)
8089             .iterations(1)
8090             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091         }
8092       }
8093     }
8094   }
8095 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16)8096   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
8097     TEST_REQUIRES_ARM_NEON_V8;
8098     for (size_t k = 17; k < 32; k++) {
8099       GemmMicrokernelTester()
8100         .mr(2)
8101         .nr(8)
8102         .kr(2)
8103         .sr(1)
8104         .m(2)
8105         .n(8)
8106         .k(k)
8107         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108     }
8109   }
8110 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)8111   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
8112     TEST_REQUIRES_ARM_NEON_V8;
8113     for (size_t k = 17; k < 32; k++) {
8114       for (uint32_t n = 1; n <= 8; n++) {
8115         for (uint32_t m = 1; m <= 2; m++) {
8116           GemmMicrokernelTester()
8117             .mr(2)
8118             .nr(8)
8119             .kr(2)
8120             .sr(1)
8121             .m(m)
8122             .n(n)
8123             .k(k)
8124             .iterations(1)
8125             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126         }
8127       }
8128     }
8129   }
8130 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16)8131   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
8132     TEST_REQUIRES_ARM_NEON_V8;
8133     for (size_t k = 32; k <= 160; k += 16) {
8134       GemmMicrokernelTester()
8135         .mr(2)
8136         .nr(8)
8137         .kr(2)
8138         .sr(1)
8139         .m(2)
8140         .n(8)
8141         .k(k)
8142         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143     }
8144   }
8145 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)8146   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
8147     TEST_REQUIRES_ARM_NEON_V8;
8148     for (size_t k = 32; k <= 160; k += 16) {
8149       for (uint32_t n = 1; n <= 8; n++) {
8150         for (uint32_t m = 1; m <= 2; m++) {
8151           GemmMicrokernelTester()
8152             .mr(2)
8153             .nr(8)
8154             .kr(2)
8155             .sr(1)
8156             .m(m)
8157             .n(n)
8158             .k(k)
8159             .iterations(1)
8160             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161         }
8162       }
8163     }
8164   }
8165 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8)8166   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
8167     TEST_REQUIRES_ARM_NEON_V8;
8168     for (uint32_t n = 9; n < 16; n++) {
8169       for (size_t k = 1; k <= 80; k += 17) {
8170         GemmMicrokernelTester()
8171           .mr(2)
8172           .nr(8)
8173           .kr(2)
8174           .sr(1)
8175           .m(2)
8176           .n(n)
8177           .k(k)
8178           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179       }
8180     }
8181   }
8182 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)8183   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
8184     TEST_REQUIRES_ARM_NEON_V8;
8185     for (uint32_t n = 9; n < 16; n++) {
8186       for (size_t k = 1; k <= 80; k += 17) {
8187         GemmMicrokernelTester()
8188           .mr(2)
8189           .nr(8)
8190           .kr(2)
8191           .sr(1)
8192           .m(2)
8193           .n(n)
8194           .k(k)
8195           .cn_stride(11)
8196           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197       }
8198     }
8199   }
8200 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)8201   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
8202     TEST_REQUIRES_ARM_NEON_V8;
8203     for (uint32_t n = 9; n < 16; n++) {
8204       for (size_t k = 1; k <= 80; k += 17) {
8205         for (uint32_t m = 1; m <= 2; m++) {
8206           GemmMicrokernelTester()
8207             .mr(2)
8208             .nr(8)
8209             .kr(2)
8210             .sr(1)
8211             .m(m)
8212             .n(n)
8213             .k(k)
8214             .iterations(1)
8215             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216         }
8217       }
8218     }
8219   }
8220 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8)8221   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
8222     TEST_REQUIRES_ARM_NEON_V8;
8223     for (uint32_t n = 16; n <= 24; n += 8) {
8224       for (size_t k = 1; k <= 80; k += 17) {
8225         GemmMicrokernelTester()
8226           .mr(2)
8227           .nr(8)
8228           .kr(2)
8229           .sr(1)
8230           .m(2)
8231           .n(n)
8232           .k(k)
8233           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234       }
8235     }
8236   }
8237 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)8238   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
8239     TEST_REQUIRES_ARM_NEON_V8;
8240     for (uint32_t n = 16; n <= 24; n += 8) {
8241       for (size_t k = 1; k <= 80; k += 17) {
8242         GemmMicrokernelTester()
8243           .mr(2)
8244           .nr(8)
8245           .kr(2)
8246           .sr(1)
8247           .m(2)
8248           .n(n)
8249           .k(k)
8250           .cn_stride(11)
8251           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252       }
8253     }
8254   }
8255 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)8256   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
8257     TEST_REQUIRES_ARM_NEON_V8;
8258     for (uint32_t n = 16; n <= 24; n += 8) {
8259       for (size_t k = 1; k <= 80; k += 17) {
8260         for (uint32_t m = 1; m <= 2; m++) {
8261           GemmMicrokernelTester()
8262             .mr(2)
8263             .nr(8)
8264             .kr(2)
8265             .sr(1)
8266             .m(m)
8267             .n(n)
8268             .k(k)
8269             .iterations(1)
8270             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271         }
8272       }
8273     }
8274   }
8275 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel)8276   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel) {
8277     TEST_REQUIRES_ARM_NEON_V8;
8278     for (size_t k = 1; k <= 80; k += 17) {
8279       GemmMicrokernelTester()
8280         .mr(2)
8281         .nr(8)
8282         .kr(2)
8283         .sr(1)
8284         .m(2)
8285         .n(8)
8286         .k(k)
8287         .ks(3)
8288         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289     }
8290   }
8291 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel_subtile)8292   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
8293     TEST_REQUIRES_ARM_NEON_V8;
8294     for (size_t k = 1; k <= 80; k += 17) {
8295       for (uint32_t n = 1; n <= 8; n++) {
8296         for (uint32_t m = 1; m <= 2; m++) {
8297           GemmMicrokernelTester()
8298             .mr(2)
8299             .nr(8)
8300             .kr(2)
8301             .sr(1)
8302             .m(m)
8303             .n(n)
8304             .k(k)
8305             .ks(3)
8306             .iterations(1)
8307             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308         }
8309       }
8310     }
8311   }
8312 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)8313   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
8314     TEST_REQUIRES_ARM_NEON_V8;
8315     for (uint32_t n = 9; n < 16; n++) {
8316       for (size_t k = 1; k <= 80; k += 17) {
8317         GemmMicrokernelTester()
8318           .mr(2)
8319           .nr(8)
8320           .kr(2)
8321           .sr(1)
8322           .m(2)
8323           .n(n)
8324           .k(k)
8325           .ks(3)
8326           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327       }
8328     }
8329   }
8330 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_small_kernel)8331   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
8332     TEST_REQUIRES_ARM_NEON_V8;
8333     for (uint32_t n = 16; n <= 24; n += 8) {
8334       for (size_t k = 1; k <= 80; k += 17) {
8335         GemmMicrokernelTester()
8336           .mr(2)
8337           .nr(8)
8338           .kr(2)
8339           .sr(1)
8340           .m(2)
8341           .n(n)
8342           .k(k)
8343           .ks(3)
8344           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345       }
8346     }
8347   }
8348 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)8349   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
8350     TEST_REQUIRES_ARM_NEON_V8;
8351     for (size_t k = 1; k <= 80; k += 17) {
8352       for (uint32_t n = 1; n <= 8; n++) {
8353         for (uint32_t m = 1; m <= 2; m++) {
8354           GemmMicrokernelTester()
8355             .mr(2)
8356             .nr(8)
8357             .kr(2)
8358             .sr(1)
8359             .m(m)
8360             .n(n)
8361             .k(k)
8362             .cm_stride(11)
8363             .iterations(1)
8364             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365         }
8366       }
8367     }
8368   }
8369 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,a_offset)8370   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, a_offset) {
8371     TEST_REQUIRES_ARM_NEON_V8;
8372     for (size_t k = 1; k <= 80; k += 17) {
8373       GemmMicrokernelTester()
8374         .mr(2)
8375         .nr(8)
8376         .kr(2)
8377         .sr(1)
8378         .m(2)
8379         .n(8)
8380         .k(k)
8381         .ks(3)
8382         .a_offset(163)
8383         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384     }
8385   }
8386 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,zero)8387   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, zero) {
8388     TEST_REQUIRES_ARM_NEON_V8;
8389     for (size_t k = 1; k <= 80; k += 17) {
8390       for (uint32_t mz = 0; mz < 2; mz++) {
8391         GemmMicrokernelTester()
8392           .mr(2)
8393           .nr(8)
8394           .kr(2)
8395           .sr(1)
8396           .m(2)
8397           .n(8)
8398           .k(k)
8399           .ks(3)
8400           .a_offset(163)
8401           .zero_index(mz)
8402           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403       }
8404     }
8405   }
8406 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmin)8407   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
8408     TEST_REQUIRES_ARM_NEON_V8;
8409     GemmMicrokernelTester()
8410       .mr(2)
8411       .nr(8)
8412       .kr(2)
8413       .sr(1)
8414       .m(2)
8415       .n(8)
8416       .k(16)
8417       .qmin(128)
8418       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419   }
8420 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmax)8421   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
8422     TEST_REQUIRES_ARM_NEON_V8;
8423     GemmMicrokernelTester()
8424       .mr(2)
8425       .nr(8)
8426       .kr(2)
8427       .sr(1)
8428       .m(2)
8429       .n(8)
8430       .k(16)
8431       .qmax(128)
8432       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433   }
8434 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm)8435   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
8436     TEST_REQUIRES_ARM_NEON_V8;
8437     GemmMicrokernelTester()
8438       .mr(2)
8439       .nr(8)
8440       .kr(2)
8441       .sr(1)
8442       .m(2)
8443       .n(8)
8444       .k(16)
8445       .cm_stride(11)
8446       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447   }
8448 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449 
8450 
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16)8452   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16) {
8453     TEST_REQUIRES_ARM_NEON;
8454     GemmMicrokernelTester()
8455       .mr(2)
8456       .nr(8)
8457       .kr(2)
8458       .sr(4)
8459       .m(2)
8460       .n(8)
8461       .k(16)
8462       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8463   }
8464 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cn)8465   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cn) {
8466     TEST_REQUIRES_ARM_NEON;
8467     GemmMicrokernelTester()
8468       .mr(2)
8469       .nr(8)
8470       .kr(2)
8471       .sr(4)
8472       .m(2)
8473       .n(8)
8474       .k(16)
8475       .cn_stride(11)
8476       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8477   }
8478 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile)8479   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile) {
8480     TEST_REQUIRES_ARM_NEON;
8481     for (uint32_t n = 1; n <= 8; n++) {
8482       for (uint32_t m = 1; m <= 2; m++) {
8483         GemmMicrokernelTester()
8484           .mr(2)
8485           .nr(8)
8486           .kr(2)
8487           .sr(4)
8488           .m(m)
8489           .n(n)
8490           .k(16)
8491           .iterations(1)
8492           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8493       }
8494     }
8495   }
8496 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_m)8497   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
8498     TEST_REQUIRES_ARM_NEON;
8499     for (uint32_t m = 1; m <= 2; m++) {
8500       GemmMicrokernelTester()
8501         .mr(2)
8502         .nr(8)
8503         .kr(2)
8504         .sr(4)
8505         .m(m)
8506         .n(8)
8507         .k(16)
8508         .iterations(1)
8509         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8510     }
8511   }
8512 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_n)8513   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
8514     TEST_REQUIRES_ARM_NEON;
8515     for (uint32_t n = 1; n <= 8; n++) {
8516       GemmMicrokernelTester()
8517         .mr(2)
8518         .nr(8)
8519         .kr(2)
8520         .sr(4)
8521         .m(2)
8522         .n(n)
8523         .k(16)
8524         .iterations(1)
8525         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8526     }
8527   }
8528 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16)8529   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16) {
8530     TEST_REQUIRES_ARM_NEON;
8531     for (size_t k = 1; k < 16; k++) {
8532       GemmMicrokernelTester()
8533         .mr(2)
8534         .nr(8)
8535         .kr(2)
8536         .sr(4)
8537         .m(2)
8538         .n(8)
8539         .k(k)
8540         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8541     }
8542   }
8543 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16_subtile)8544   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_subtile) {
8545     TEST_REQUIRES_ARM_NEON;
8546     for (size_t k = 1; k < 16; k++) {
8547       for (uint32_t n = 1; n <= 8; n++) {
8548         for (uint32_t m = 1; m <= 2; m++) {
8549           GemmMicrokernelTester()
8550             .mr(2)
8551             .nr(8)
8552             .kr(2)
8553             .sr(4)
8554             .m(m)
8555             .n(n)
8556             .k(k)
8557             .iterations(1)
8558             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8559         }
8560       }
8561     }
8562   }
8563 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16)8564   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16) {
8565     TEST_REQUIRES_ARM_NEON;
8566     for (size_t k = 17; k < 32; k++) {
8567       GemmMicrokernelTester()
8568         .mr(2)
8569         .nr(8)
8570         .kr(2)
8571         .sr(4)
8572         .m(2)
8573         .n(8)
8574         .k(k)
8575         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8576     }
8577   }
8578 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16_subtile)8579   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_subtile) {
8580     TEST_REQUIRES_ARM_NEON;
8581     for (size_t k = 17; k < 32; k++) {
8582       for (uint32_t n = 1; n <= 8; n++) {
8583         for (uint32_t m = 1; m <= 2; m++) {
8584           GemmMicrokernelTester()
8585             .mr(2)
8586             .nr(8)
8587             .kr(2)
8588             .sr(4)
8589             .m(m)
8590             .n(n)
8591             .k(k)
8592             .iterations(1)
8593             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8594         }
8595       }
8596     }
8597   }
8598 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16)8599   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16) {
8600     TEST_REQUIRES_ARM_NEON;
8601     for (size_t k = 32; k <= 160; k += 16) {
8602       GemmMicrokernelTester()
8603         .mr(2)
8604         .nr(8)
8605         .kr(2)
8606         .sr(4)
8607         .m(2)
8608         .n(8)
8609         .k(k)
8610         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8611     }
8612   }
8613 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16_subtile)8614   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_subtile) {
8615     TEST_REQUIRES_ARM_NEON;
8616     for (size_t k = 32; k <= 160; k += 16) {
8617       for (uint32_t n = 1; n <= 8; n++) {
8618         for (uint32_t m = 1; m <= 2; m++) {
8619           GemmMicrokernelTester()
8620             .mr(2)
8621             .nr(8)
8622             .kr(2)
8623             .sr(4)
8624             .m(m)
8625             .n(n)
8626             .k(k)
8627             .iterations(1)
8628             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8629         }
8630       }
8631     }
8632   }
8633 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8)8634   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8) {
8635     TEST_REQUIRES_ARM_NEON;
8636     for (uint32_t n = 9; n < 16; n++) {
8637       for (size_t k = 1; k <= 80; k += 17) {
8638         GemmMicrokernelTester()
8639           .mr(2)
8640           .nr(8)
8641           .kr(2)
8642           .sr(4)
8643           .m(2)
8644           .n(n)
8645           .k(k)
8646           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8647       }
8648     }
8649   }
8650 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_strided_cn)8651   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
8652     TEST_REQUIRES_ARM_NEON;
8653     for (uint32_t n = 9; n < 16; n++) {
8654       for (size_t k = 1; k <= 80; k += 17) {
8655         GemmMicrokernelTester()
8656           .mr(2)
8657           .nr(8)
8658           .kr(2)
8659           .sr(4)
8660           .m(2)
8661           .n(n)
8662           .k(k)
8663           .cn_stride(11)
8664           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8665       }
8666     }
8667   }
8668 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_subtile)8669   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_subtile) {
8670     TEST_REQUIRES_ARM_NEON;
8671     for (uint32_t n = 9; n < 16; n++) {
8672       for (size_t k = 1; k <= 80; k += 17) {
8673         for (uint32_t m = 1; m <= 2; m++) {
8674           GemmMicrokernelTester()
8675             .mr(2)
8676             .nr(8)
8677             .kr(2)
8678             .sr(4)
8679             .m(m)
8680             .n(n)
8681             .k(k)
8682             .iterations(1)
8683             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8684         }
8685       }
8686     }
8687   }
8688 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8)8689   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8) {
8690     TEST_REQUIRES_ARM_NEON;
8691     for (uint32_t n = 16; n <= 24; n += 8) {
8692       for (size_t k = 1; k <= 80; k += 17) {
8693         GemmMicrokernelTester()
8694           .mr(2)
8695           .nr(8)
8696           .kr(2)
8697           .sr(4)
8698           .m(2)
8699           .n(n)
8700           .k(k)
8701           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8702       }
8703     }
8704   }
8705 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_strided_cn)8706   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
8707     TEST_REQUIRES_ARM_NEON;
8708     for (uint32_t n = 16; n <= 24; n += 8) {
8709       for (size_t k = 1; k <= 80; k += 17) {
8710         GemmMicrokernelTester()
8711           .mr(2)
8712           .nr(8)
8713           .kr(2)
8714           .sr(4)
8715           .m(2)
8716           .n(n)
8717           .k(k)
8718           .cn_stride(11)
8719           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720       }
8721     }
8722   }
8723 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_subtile)8724   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_subtile) {
8725     TEST_REQUIRES_ARM_NEON;
8726     for (uint32_t n = 16; n <= 24; n += 8) {
8727       for (size_t k = 1; k <= 80; k += 17) {
8728         for (uint32_t m = 1; m <= 2; m++) {
8729           GemmMicrokernelTester()
8730             .mr(2)
8731             .nr(8)
8732             .kr(2)
8733             .sr(4)
8734             .m(m)
8735             .n(n)
8736             .k(k)
8737             .iterations(1)
8738             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8739         }
8740       }
8741     }
8742   }
8743 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel)8744   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel) {
8745     TEST_REQUIRES_ARM_NEON;
8746     for (size_t k = 1; k <= 80; k += 17) {
8747       GemmMicrokernelTester()
8748         .mr(2)
8749         .nr(8)
8750         .kr(2)
8751         .sr(4)
8752         .m(2)
8753         .n(8)
8754         .k(k)
8755         .ks(3)
8756         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8757     }
8758   }
8759 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel_subtile)8760   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel_subtile) {
8761     TEST_REQUIRES_ARM_NEON;
8762     for (size_t k = 1; k <= 80; k += 17) {
8763       for (uint32_t n = 1; n <= 8; n++) {
8764         for (uint32_t m = 1; m <= 2; m++) {
8765           GemmMicrokernelTester()
8766             .mr(2)
8767             .nr(8)
8768             .kr(2)
8769             .sr(4)
8770             .m(m)
8771             .n(n)
8772             .k(k)
8773             .ks(3)
8774             .iterations(1)
8775             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8776         }
8777       }
8778     }
8779   }
8780 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_small_kernel)8781   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
8782     TEST_REQUIRES_ARM_NEON;
8783     for (uint32_t n = 9; n < 16; n++) {
8784       for (size_t k = 1; k <= 80; k += 17) {
8785         GemmMicrokernelTester()
8786           .mr(2)
8787           .nr(8)
8788           .kr(2)
8789           .sr(4)
8790           .m(2)
8791           .n(n)
8792           .k(k)
8793           .ks(3)
8794           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8795       }
8796     }
8797   }
8798 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_small_kernel)8799   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
8800     TEST_REQUIRES_ARM_NEON;
8801     for (uint32_t n = 16; n <= 24; n += 8) {
8802       for (size_t k = 1; k <= 80; k += 17) {
8803         GemmMicrokernelTester()
8804           .mr(2)
8805           .nr(8)
8806           .kr(2)
8807           .sr(4)
8808           .m(2)
8809           .n(n)
8810           .k(k)
8811           .ks(3)
8812           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8813       }
8814     }
8815   }
8816 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm_subtile)8817   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm_subtile) {
8818     TEST_REQUIRES_ARM_NEON;
8819     for (size_t k = 1; k <= 80; k += 17) {
8820       for (uint32_t n = 1; n <= 8; n++) {
8821         for (uint32_t m = 1; m <= 2; m++) {
8822           GemmMicrokernelTester()
8823             .mr(2)
8824             .nr(8)
8825             .kr(2)
8826             .sr(4)
8827             .m(m)
8828             .n(n)
8829             .k(k)
8830             .cm_stride(11)
8831             .iterations(1)
8832             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8833         }
8834       }
8835     }
8836   }
8837 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,a_offset)8838   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, a_offset) {
8839     TEST_REQUIRES_ARM_NEON;
8840     for (size_t k = 1; k <= 80; k += 17) {
8841       GemmMicrokernelTester()
8842         .mr(2)
8843         .nr(8)
8844         .kr(2)
8845         .sr(4)
8846         .m(2)
8847         .n(8)
8848         .k(k)
8849         .ks(3)
8850         .a_offset(163)
8851         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8852     }
8853   }
8854 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,zero)8855   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, zero) {
8856     TEST_REQUIRES_ARM_NEON;
8857     for (size_t k = 1; k <= 80; k += 17) {
8858       for (uint32_t mz = 0; mz < 2; mz++) {
8859         GemmMicrokernelTester()
8860           .mr(2)
8861           .nr(8)
8862           .kr(2)
8863           .sr(4)
8864           .m(2)
8865           .n(8)
8866           .k(k)
8867           .ks(3)
8868           .a_offset(163)
8869           .zero_index(mz)
8870           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8871       }
8872     }
8873   }
8874 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmin)8875   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmin) {
8876     TEST_REQUIRES_ARM_NEON;
8877     GemmMicrokernelTester()
8878       .mr(2)
8879       .nr(8)
8880       .kr(2)
8881       .sr(4)
8882       .m(2)
8883       .n(8)
8884       .k(16)
8885       .qmin(128)
8886       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8887   }
8888 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmax)8889   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmax) {
8890     TEST_REQUIRES_ARM_NEON;
8891     GemmMicrokernelTester()
8892       .mr(2)
8893       .nr(8)
8894       .kr(2)
8895       .sr(4)
8896       .m(2)
8897       .n(8)
8898       .k(16)
8899       .qmax(128)
8900       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8901   }
8902 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm)8903   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm) {
8904     TEST_REQUIRES_ARM_NEON;
8905     GemmMicrokernelTester()
8906       .mr(2)
8907       .nr(8)
8908       .kr(2)
8909       .sr(4)
8910       .m(2)
8911       .n(8)
8912       .k(16)
8913       .cm_stride(11)
8914       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8915   }
8916 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917 
8918 
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16)8920   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16) {
8921     TEST_REQUIRES_ARM_NEON_V8;
8922     GemmMicrokernelTester()
8923       .mr(2)
8924       .nr(8)
8925       .kr(2)
8926       .sr(4)
8927       .m(2)
8928       .n(8)
8929       .k(16)
8930       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931   }
8932 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cn)8933   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cn) {
8934     TEST_REQUIRES_ARM_NEON_V8;
8935     GemmMicrokernelTester()
8936       .mr(2)
8937       .nr(8)
8938       .kr(2)
8939       .sr(4)
8940       .m(2)
8941       .n(8)
8942       .k(16)
8943       .cn_stride(11)
8944       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945   }
8946 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile)8947   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
8948     TEST_REQUIRES_ARM_NEON_V8;
8949     for (uint32_t n = 1; n <= 8; n++) {
8950       for (uint32_t m = 1; m <= 2; m++) {
8951         GemmMicrokernelTester()
8952           .mr(2)
8953           .nr(8)
8954           .kr(2)
8955           .sr(4)
8956           .m(m)
8957           .n(n)
8958           .k(16)
8959           .iterations(1)
8960           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8961       }
8962     }
8963   }
8964 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)8965   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
8966     TEST_REQUIRES_ARM_NEON_V8;
8967     for (uint32_t m = 1; m <= 2; m++) {
8968       GemmMicrokernelTester()
8969         .mr(2)
8970         .nr(8)
8971         .kr(2)
8972         .sr(4)
8973         .m(m)
8974         .n(8)
8975         .k(16)
8976         .iterations(1)
8977         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8978     }
8979   }
8980 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)8981   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
8982     TEST_REQUIRES_ARM_NEON_V8;
8983     for (uint32_t n = 1; n <= 8; n++) {
8984       GemmMicrokernelTester()
8985         .mr(2)
8986         .nr(8)
8987         .kr(2)
8988         .sr(4)
8989         .m(2)
8990         .n(n)
8991         .k(16)
8992         .iterations(1)
8993         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8994     }
8995   }
8996 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16)8997   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16) {
8998     TEST_REQUIRES_ARM_NEON_V8;
8999     for (size_t k = 1; k < 16; k++) {
9000       GemmMicrokernelTester()
9001         .mr(2)
9002         .nr(8)
9003         .kr(2)
9004         .sr(4)
9005         .m(2)
9006         .n(8)
9007         .k(k)
9008         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9009     }
9010   }
9011 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16_subtile)9012   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
9013     TEST_REQUIRES_ARM_NEON_V8;
9014     for (size_t k = 1; k < 16; k++) {
9015       for (uint32_t n = 1; n <= 8; n++) {
9016         for (uint32_t m = 1; m <= 2; m++) {
9017           GemmMicrokernelTester()
9018             .mr(2)
9019             .nr(8)
9020             .kr(2)
9021             .sr(4)
9022             .m(m)
9023             .n(n)
9024             .k(k)
9025             .iterations(1)
9026             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9027         }
9028       }
9029     }
9030   }
9031 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16)9032   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16) {
9033     TEST_REQUIRES_ARM_NEON_V8;
9034     for (size_t k = 17; k < 32; k++) {
9035       GemmMicrokernelTester()
9036         .mr(2)
9037         .nr(8)
9038         .kr(2)
9039         .sr(4)
9040         .m(2)
9041         .n(8)
9042         .k(k)
9043         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9044     }
9045   }
9046 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16_subtile)9047   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
9048     TEST_REQUIRES_ARM_NEON_V8;
9049     for (size_t k = 17; k < 32; k++) {
9050       for (uint32_t n = 1; n <= 8; n++) {
9051         for (uint32_t m = 1; m <= 2; m++) {
9052           GemmMicrokernelTester()
9053             .mr(2)
9054             .nr(8)
9055             .kr(2)
9056             .sr(4)
9057             .m(m)
9058             .n(n)
9059             .k(k)
9060             .iterations(1)
9061             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9062         }
9063       }
9064     }
9065   }
9066 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16)9067   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16) {
9068     TEST_REQUIRES_ARM_NEON_V8;
9069     for (size_t k = 32; k <= 160; k += 16) {
9070       GemmMicrokernelTester()
9071         .mr(2)
9072         .nr(8)
9073         .kr(2)
9074         .sr(4)
9075         .m(2)
9076         .n(8)
9077         .k(k)
9078         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9079     }
9080   }
9081 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16_subtile)9082   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
9083     TEST_REQUIRES_ARM_NEON_V8;
9084     for (size_t k = 32; k <= 160; k += 16) {
9085       for (uint32_t n = 1; n <= 8; n++) {
9086         for (uint32_t m = 1; m <= 2; m++) {
9087           GemmMicrokernelTester()
9088             .mr(2)
9089             .nr(8)
9090             .kr(2)
9091             .sr(4)
9092             .m(m)
9093             .n(n)
9094             .k(k)
9095             .iterations(1)
9096             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9097         }
9098       }
9099     }
9100   }
9101 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8)9102   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8) {
9103     TEST_REQUIRES_ARM_NEON_V8;
9104     for (uint32_t n = 9; n < 16; n++) {
9105       for (size_t k = 1; k <= 80; k += 17) {
9106         GemmMicrokernelTester()
9107           .mr(2)
9108           .nr(8)
9109           .kr(2)
9110           .sr(4)
9111           .m(2)
9112           .n(n)
9113           .k(k)
9114           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115       }
9116     }
9117   }
9118 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)9119   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
9120     TEST_REQUIRES_ARM_NEON_V8;
9121     for (uint32_t n = 9; n < 16; n++) {
9122       for (size_t k = 1; k <= 80; k += 17) {
9123         GemmMicrokernelTester()
9124           .mr(2)
9125           .nr(8)
9126           .kr(2)
9127           .sr(4)
9128           .m(2)
9129           .n(n)
9130           .k(k)
9131           .cn_stride(11)
9132           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9133       }
9134     }
9135   }
9136 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_subtile)9137   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
9138     TEST_REQUIRES_ARM_NEON_V8;
9139     for (uint32_t n = 9; n < 16; n++) {
9140       for (size_t k = 1; k <= 80; k += 17) {
9141         for (uint32_t m = 1; m <= 2; m++) {
9142           GemmMicrokernelTester()
9143             .mr(2)
9144             .nr(8)
9145             .kr(2)
9146             .sr(4)
9147             .m(m)
9148             .n(n)
9149             .k(k)
9150             .iterations(1)
9151             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9152         }
9153       }
9154     }
9155   }
9156 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8)9157   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8) {
9158     TEST_REQUIRES_ARM_NEON_V8;
9159     for (uint32_t n = 16; n <= 24; n += 8) {
9160       for (size_t k = 1; k <= 80; k += 17) {
9161         GemmMicrokernelTester()
9162           .mr(2)
9163           .nr(8)
9164           .kr(2)
9165           .sr(4)
9166           .m(2)
9167           .n(n)
9168           .k(k)
9169           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9170       }
9171     }
9172   }
9173 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)9174   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
9175     TEST_REQUIRES_ARM_NEON_V8;
9176     for (uint32_t n = 16; n <= 24; n += 8) {
9177       for (size_t k = 1; k <= 80; k += 17) {
9178         GemmMicrokernelTester()
9179           .mr(2)
9180           .nr(8)
9181           .kr(2)
9182           .sr(4)
9183           .m(2)
9184           .n(n)
9185           .k(k)
9186           .cn_stride(11)
9187           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9188       }
9189     }
9190   }
9191 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_subtile)9192   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
9193     TEST_REQUIRES_ARM_NEON_V8;
9194     for (uint32_t n = 16; n <= 24; n += 8) {
9195       for (size_t k = 1; k <= 80; k += 17) {
9196         for (uint32_t m = 1; m <= 2; m++) {
9197           GemmMicrokernelTester()
9198             .mr(2)
9199             .nr(8)
9200             .kr(2)
9201             .sr(4)
9202             .m(m)
9203             .n(n)
9204             .k(k)
9205             .iterations(1)
9206             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9207         }
9208       }
9209     }
9210   }
9211 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel)9212   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel) {
9213     TEST_REQUIRES_ARM_NEON_V8;
9214     for (size_t k = 1; k <= 80; k += 17) {
9215       GemmMicrokernelTester()
9216         .mr(2)
9217         .nr(8)
9218         .kr(2)
9219         .sr(4)
9220         .m(2)
9221         .n(8)
9222         .k(k)
9223         .ks(3)
9224         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9225     }
9226   }
9227 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel_subtile)9228   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
9229     TEST_REQUIRES_ARM_NEON_V8;
9230     for (size_t k = 1; k <= 80; k += 17) {
9231       for (uint32_t n = 1; n <= 8; n++) {
9232         for (uint32_t m = 1; m <= 2; m++) {
9233           GemmMicrokernelTester()
9234             .mr(2)
9235             .nr(8)
9236             .kr(2)
9237             .sr(4)
9238             .m(m)
9239             .n(n)
9240             .k(k)
9241             .ks(3)
9242             .iterations(1)
9243             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9244         }
9245       }
9246     }
9247   }
9248 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)9249   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
9250     TEST_REQUIRES_ARM_NEON_V8;
9251     for (uint32_t n = 9; n < 16; n++) {
9252       for (size_t k = 1; k <= 80; k += 17) {
9253         GemmMicrokernelTester()
9254           .mr(2)
9255           .nr(8)
9256           .kr(2)
9257           .sr(4)
9258           .m(2)
9259           .n(n)
9260           .k(k)
9261           .ks(3)
9262           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9263       }
9264     }
9265   }
9266 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)9267   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
9268     TEST_REQUIRES_ARM_NEON_V8;
9269     for (uint32_t n = 16; n <= 24; n += 8) {
9270       for (size_t k = 1; k <= 80; k += 17) {
9271         GemmMicrokernelTester()
9272           .mr(2)
9273           .nr(8)
9274           .kr(2)
9275           .sr(4)
9276           .m(2)
9277           .n(n)
9278           .k(k)
9279           .ks(3)
9280           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9281       }
9282     }
9283   }
9284 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm_subtile)9285   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
9286     TEST_REQUIRES_ARM_NEON_V8;
9287     for (size_t k = 1; k <= 80; k += 17) {
9288       for (uint32_t n = 1; n <= 8; n++) {
9289         for (uint32_t m = 1; m <= 2; m++) {
9290           GemmMicrokernelTester()
9291             .mr(2)
9292             .nr(8)
9293             .kr(2)
9294             .sr(4)
9295             .m(m)
9296             .n(n)
9297             .k(k)
9298             .cm_stride(11)
9299             .iterations(1)
9300             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9301         }
9302       }
9303     }
9304   }
9305 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,a_offset)9306   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, a_offset) {
9307     TEST_REQUIRES_ARM_NEON_V8;
9308     for (size_t k = 1; k <= 80; k += 17) {
9309       GemmMicrokernelTester()
9310         .mr(2)
9311         .nr(8)
9312         .kr(2)
9313         .sr(4)
9314         .m(2)
9315         .n(8)
9316         .k(k)
9317         .ks(3)
9318         .a_offset(163)
9319         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9320     }
9321   }
9322 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,zero)9323   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, zero) {
9324     TEST_REQUIRES_ARM_NEON_V8;
9325     for (size_t k = 1; k <= 80; k += 17) {
9326       for (uint32_t mz = 0; mz < 2; mz++) {
9327         GemmMicrokernelTester()
9328           .mr(2)
9329           .nr(8)
9330           .kr(2)
9331           .sr(4)
9332           .m(2)
9333           .n(8)
9334           .k(k)
9335           .ks(3)
9336           .a_offset(163)
9337           .zero_index(mz)
9338           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9339       }
9340     }
9341   }
9342 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmin)9343   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmin) {
9344     TEST_REQUIRES_ARM_NEON_V8;
9345     GemmMicrokernelTester()
9346       .mr(2)
9347       .nr(8)
9348       .kr(2)
9349       .sr(4)
9350       .m(2)
9351       .n(8)
9352       .k(16)
9353       .qmin(128)
9354       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9355   }
9356 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmax)9357   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmax) {
9358     TEST_REQUIRES_ARM_NEON_V8;
9359     GemmMicrokernelTester()
9360       .mr(2)
9361       .nr(8)
9362       .kr(2)
9363       .sr(4)
9364       .m(2)
9365       .n(8)
9366       .k(16)
9367       .qmax(128)
9368       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9369   }
9370 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm)9371   TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm) {
9372     TEST_REQUIRES_ARM_NEON_V8;
9373     GemmMicrokernelTester()
9374       .mr(2)
9375       .nr(8)
9376       .kr(2)
9377       .sr(4)
9378       .m(2)
9379       .n(8)
9380       .k(16)
9381       .cm_stride(11)
9382       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9383   }
9384 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385 
9386 
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16)9388   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
9389     TEST_REQUIRES_ARM_NEON;
9390     GemmMicrokernelTester()
9391       .mr(2)
9392       .nr(8)
9393       .kr(4)
9394       .sr(1)
9395       .m(2)
9396       .n(8)
9397       .k(16)
9398       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9399   }
9400 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cn)9401   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
9402     TEST_REQUIRES_ARM_NEON;
9403     GemmMicrokernelTester()
9404       .mr(2)
9405       .nr(8)
9406       .kr(4)
9407       .sr(1)
9408       .m(2)
9409       .n(8)
9410       .k(16)
9411       .cn_stride(11)
9412       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9413   }
9414 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9415   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9416     TEST_REQUIRES_ARM_NEON;
9417     for (uint32_t n = 1; n <= 8; n++) {
9418       for (uint32_t m = 1; m <= 2; m++) {
9419         GemmMicrokernelTester()
9420           .mr(2)
9421           .nr(8)
9422           .kr(4)
9423           .sr(1)
9424           .m(m)
9425           .n(n)
9426           .k(16)
9427           .iterations(1)
9428           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9429       }
9430     }
9431   }
9432 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9433   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9434     TEST_REQUIRES_ARM_NEON;
9435     for (uint32_t m = 1; m <= 2; m++) {
9436       GemmMicrokernelTester()
9437         .mr(2)
9438         .nr(8)
9439         .kr(4)
9440         .sr(1)
9441         .m(m)
9442         .n(8)
9443         .k(16)
9444         .iterations(1)
9445         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9446     }
9447   }
9448 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9449   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9450     TEST_REQUIRES_ARM_NEON;
9451     for (uint32_t n = 1; n <= 8; n++) {
9452       GemmMicrokernelTester()
9453         .mr(2)
9454         .nr(8)
9455         .kr(4)
9456         .sr(1)
9457         .m(2)
9458         .n(n)
9459         .k(16)
9460         .iterations(1)
9461         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9462     }
9463   }
9464 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16)9465   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
9466     TEST_REQUIRES_ARM_NEON;
9467     for (size_t k = 1; k < 16; k++) {
9468       GemmMicrokernelTester()
9469         .mr(2)
9470         .nr(8)
9471         .kr(4)
9472         .sr(1)
9473         .m(2)
9474         .n(8)
9475         .k(k)
9476         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9477     }
9478   }
9479 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9480   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9481     TEST_REQUIRES_ARM_NEON;
9482     for (size_t k = 1; k < 16; k++) {
9483       for (uint32_t n = 1; n <= 8; n++) {
9484         for (uint32_t m = 1; m <= 2; m++) {
9485           GemmMicrokernelTester()
9486             .mr(2)
9487             .nr(8)
9488             .kr(4)
9489             .sr(1)
9490             .m(m)
9491             .n(n)
9492             .k(k)
9493             .iterations(1)
9494             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9495         }
9496       }
9497     }
9498   }
9499 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16)9500   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
9501     TEST_REQUIRES_ARM_NEON;
9502     for (size_t k = 17; k < 32; k++) {
9503       GemmMicrokernelTester()
9504         .mr(2)
9505         .nr(8)
9506         .kr(4)
9507         .sr(1)
9508         .m(2)
9509         .n(8)
9510         .k(k)
9511         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9512     }
9513   }
9514 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9515   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9516     TEST_REQUIRES_ARM_NEON;
9517     for (size_t k = 17; k < 32; k++) {
9518       for (uint32_t n = 1; n <= 8; n++) {
9519         for (uint32_t m = 1; m <= 2; m++) {
9520           GemmMicrokernelTester()
9521             .mr(2)
9522             .nr(8)
9523             .kr(4)
9524             .sr(1)
9525             .m(m)
9526             .n(n)
9527             .k(k)
9528             .iterations(1)
9529             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9530         }
9531       }
9532     }
9533   }
9534 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16)9535   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
9536     TEST_REQUIRES_ARM_NEON;
9537     for (size_t k = 32; k <= 160; k += 16) {
9538       GemmMicrokernelTester()
9539         .mr(2)
9540         .nr(8)
9541         .kr(4)
9542         .sr(1)
9543         .m(2)
9544         .n(8)
9545         .k(k)
9546         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9547     }
9548   }
9549 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_subtile)9550   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
9551     TEST_REQUIRES_ARM_NEON;
9552     for (size_t k = 32; k <= 160; k += 16) {
9553       for (uint32_t n = 1; n <= 8; n++) {
9554         for (uint32_t m = 1; m <= 2; m++) {
9555           GemmMicrokernelTester()
9556             .mr(2)
9557             .nr(8)
9558             .kr(4)
9559             .sr(1)
9560             .m(m)
9561             .n(n)
9562             .k(k)
9563             .iterations(1)
9564             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9565         }
9566       }
9567     }
9568   }
9569 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8)9570   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
9571     TEST_REQUIRES_ARM_NEON;
9572     for (uint32_t n = 9; n < 16; n++) {
9573       for (size_t k = 1; k <= 80; k += 17) {
9574         GemmMicrokernelTester()
9575           .mr(2)
9576           .nr(8)
9577           .kr(4)
9578           .sr(1)
9579           .m(2)
9580           .n(n)
9581           .k(k)
9582           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9583       }
9584     }
9585   }
9586 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9587   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
9588     TEST_REQUIRES_ARM_NEON;
9589     for (uint32_t n = 9; n < 16; n++) {
9590       for (size_t k = 1; k <= 80; k += 17) {
9591         GemmMicrokernelTester()
9592           .mr(2)
9593           .nr(8)
9594           .kr(4)
9595           .sr(1)
9596           .m(2)
9597           .n(n)
9598           .k(k)
9599           .cn_stride(11)
9600           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9601       }
9602     }
9603   }
9604 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9605   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
9606     TEST_REQUIRES_ARM_NEON;
9607     for (uint32_t n = 9; n < 16; n++) {
9608       for (size_t k = 1; k <= 80; k += 17) {
9609         for (uint32_t m = 1; m <= 2; m++) {
9610           GemmMicrokernelTester()
9611             .mr(2)
9612             .nr(8)
9613             .kr(4)
9614             .sr(1)
9615             .m(m)
9616             .n(n)
9617             .k(k)
9618             .iterations(1)
9619             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9620         }
9621       }
9622     }
9623   }
9624 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8)9625   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
9626     TEST_REQUIRES_ARM_NEON;
9627     for (uint32_t n = 16; n <= 24; n += 8) {
9628       for (size_t k = 1; k <= 80; k += 17) {
9629         GemmMicrokernelTester()
9630           .mr(2)
9631           .nr(8)
9632           .kr(4)
9633           .sr(1)
9634           .m(2)
9635           .n(n)
9636           .k(k)
9637           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9638       }
9639     }
9640   }
9641 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9642   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
9643     TEST_REQUIRES_ARM_NEON;
9644     for (uint32_t n = 16; n <= 24; n += 8) {
9645       for (size_t k = 1; k <= 80; k += 17) {
9646         GemmMicrokernelTester()
9647           .mr(2)
9648           .nr(8)
9649           .kr(4)
9650           .sr(1)
9651           .m(2)
9652           .n(n)
9653           .k(k)
9654           .cn_stride(11)
9655           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9656       }
9657     }
9658   }
9659 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_subtile)9660   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
9661     TEST_REQUIRES_ARM_NEON;
9662     for (uint32_t n = 16; n <= 24; n += 8) {
9663       for (size_t k = 1; k <= 80; k += 17) {
9664         for (uint32_t m = 1; m <= 2; m++) {
9665           GemmMicrokernelTester()
9666             .mr(2)
9667             .nr(8)
9668             .kr(4)
9669             .sr(1)
9670             .m(m)
9671             .n(n)
9672             .k(k)
9673             .iterations(1)
9674             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9675         }
9676       }
9677     }
9678   }
9679 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel)9680   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel) {
9681     TEST_REQUIRES_ARM_NEON;
9682     for (size_t k = 1; k <= 80; k += 17) {
9683       GemmMicrokernelTester()
9684         .mr(2)
9685         .nr(8)
9686         .kr(4)
9687         .sr(1)
9688         .m(2)
9689         .n(8)
9690         .k(k)
9691         .ks(3)
9692         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9693     }
9694   }
9695 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel_subtile)9696   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
9697     TEST_REQUIRES_ARM_NEON;
9698     for (size_t k = 1; k <= 80; k += 17) {
9699       for (uint32_t n = 1; n <= 8; n++) {
9700         for (uint32_t m = 1; m <= 2; m++) {
9701           GemmMicrokernelTester()
9702             .mr(2)
9703             .nr(8)
9704             .kr(4)
9705             .sr(1)
9706             .m(m)
9707             .n(n)
9708             .k(k)
9709             .ks(3)
9710             .iterations(1)
9711             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9712         }
9713       }
9714     }
9715   }
9716 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)9717   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
9718     TEST_REQUIRES_ARM_NEON;
9719     for (uint32_t n = 9; n < 16; n++) {
9720       for (size_t k = 1; k <= 80; k += 17) {
9721         GemmMicrokernelTester()
9722           .mr(2)
9723           .nr(8)
9724           .kr(4)
9725           .sr(1)
9726           .m(2)
9727           .n(n)
9728           .k(k)
9729           .ks(3)
9730           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9731       }
9732     }
9733   }
9734 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)9735   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
9736     TEST_REQUIRES_ARM_NEON;
9737     for (uint32_t n = 16; n <= 24; n += 8) {
9738       for (size_t k = 1; k <= 80; k += 17) {
9739         GemmMicrokernelTester()
9740           .mr(2)
9741           .nr(8)
9742           .kr(4)
9743           .sr(1)
9744           .m(2)
9745           .n(n)
9746           .k(k)
9747           .ks(3)
9748           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9749       }
9750     }
9751   }
9752 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm_subtile)9753   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
9754     TEST_REQUIRES_ARM_NEON;
9755     for (size_t k = 1; k <= 80; k += 17) {
9756       for (uint32_t n = 1; n <= 8; n++) {
9757         for (uint32_t m = 1; m <= 2; m++) {
9758           GemmMicrokernelTester()
9759             .mr(2)
9760             .nr(8)
9761             .kr(4)
9762             .sr(1)
9763             .m(m)
9764             .n(n)
9765             .k(k)
9766             .cm_stride(11)
9767             .iterations(1)
9768             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9769         }
9770       }
9771     }
9772   }
9773 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,a_offset)9774   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, a_offset) {
9775     TEST_REQUIRES_ARM_NEON;
9776     for (size_t k = 1; k <= 80; k += 17) {
9777       GemmMicrokernelTester()
9778         .mr(2)
9779         .nr(8)
9780         .kr(4)
9781         .sr(1)
9782         .m(2)
9783         .n(8)
9784         .k(k)
9785         .ks(3)
9786         .a_offset(163)
9787         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9788     }
9789   }
9790 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,zero)9791   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, zero) {
9792     TEST_REQUIRES_ARM_NEON;
9793     for (size_t k = 1; k <= 80; k += 17) {
9794       for (uint32_t mz = 0; mz < 2; mz++) {
9795         GemmMicrokernelTester()
9796           .mr(2)
9797           .nr(8)
9798           .kr(4)
9799           .sr(1)
9800           .m(2)
9801           .n(8)
9802           .k(k)
9803           .ks(3)
9804           .a_offset(163)
9805           .zero_index(mz)
9806           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9807       }
9808     }
9809   }
9810 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmin)9811   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
9812     TEST_REQUIRES_ARM_NEON;
9813     GemmMicrokernelTester()
9814       .mr(2)
9815       .nr(8)
9816       .kr(4)
9817       .sr(1)
9818       .m(2)
9819       .n(8)
9820       .k(16)
9821       .qmin(128)
9822       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9823   }
9824 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmax)9825   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
9826     TEST_REQUIRES_ARM_NEON;
9827     GemmMicrokernelTester()
9828       .mr(2)
9829       .nr(8)
9830       .kr(4)
9831       .sr(1)
9832       .m(2)
9833       .n(8)
9834       .k(16)
9835       .qmax(128)
9836       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9837   }
9838 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm)9839   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
9840     TEST_REQUIRES_ARM_NEON;
9841     GemmMicrokernelTester()
9842       .mr(2)
9843       .nr(8)
9844       .kr(4)
9845       .sr(1)
9846       .m(2)
9847       .n(8)
9848       .k(16)
9849       .cm_stride(11)
9850       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9851   }
9852 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853 
9854 
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16)9856   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
9857     TEST_REQUIRES_ARM_NEON;
9858     GemmMicrokernelTester()
9859       .mr(2)
9860       .nr(8)
9861       .kr(4)
9862       .sr(1)
9863       .m(2)
9864       .n(8)
9865       .k(16)
9866       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867   }
9868 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cn)9869   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
9870     TEST_REQUIRES_ARM_NEON;
9871     GemmMicrokernelTester()
9872       .mr(2)
9873       .nr(8)
9874       .kr(4)
9875       .sr(1)
9876       .m(2)
9877       .n(8)
9878       .k(16)
9879       .cn_stride(11)
9880       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881   }
9882 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)9883   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
9884     TEST_REQUIRES_ARM_NEON;
9885     for (uint32_t n = 1; n <= 8; n++) {
9886       for (uint32_t m = 1; m <= 2; m++) {
9887         GemmMicrokernelTester()
9888           .mr(2)
9889           .nr(8)
9890           .kr(4)
9891           .sr(1)
9892           .m(m)
9893           .n(n)
9894           .k(16)
9895           .iterations(1)
9896           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897       }
9898     }
9899   }
9900 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)9901   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
9902     TEST_REQUIRES_ARM_NEON;
9903     for (uint32_t m = 1; m <= 2; m++) {
9904       GemmMicrokernelTester()
9905         .mr(2)
9906         .nr(8)
9907         .kr(4)
9908         .sr(1)
9909         .m(m)
9910         .n(8)
9911         .k(16)
9912         .iterations(1)
9913         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914     }
9915   }
9916 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)9917   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
9918     TEST_REQUIRES_ARM_NEON;
9919     for (uint32_t n = 1; n <= 8; n++) {
9920       GemmMicrokernelTester()
9921         .mr(2)
9922         .nr(8)
9923         .kr(4)
9924         .sr(1)
9925         .m(2)
9926         .n(n)
9927         .k(16)
9928         .iterations(1)
9929         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930     }
9931   }
9932 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16)9933   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
9934     TEST_REQUIRES_ARM_NEON;
9935     for (size_t k = 1; k < 16; k++) {
9936       GemmMicrokernelTester()
9937         .mr(2)
9938         .nr(8)
9939         .kr(4)
9940         .sr(1)
9941         .m(2)
9942         .n(8)
9943         .k(k)
9944         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945     }
9946   }
9947 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)9948   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
9949     TEST_REQUIRES_ARM_NEON;
9950     for (size_t k = 1; k < 16; k++) {
9951       for (uint32_t n = 1; n <= 8; n++) {
9952         for (uint32_t m = 1; m <= 2; m++) {
9953           GemmMicrokernelTester()
9954             .mr(2)
9955             .nr(8)
9956             .kr(4)
9957             .sr(1)
9958             .m(m)
9959             .n(n)
9960             .k(k)
9961             .iterations(1)
9962             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963         }
9964       }
9965     }
9966   }
9967 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16)9968   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
9969     TEST_REQUIRES_ARM_NEON;
9970     for (size_t k = 17; k < 32; k++) {
9971       GemmMicrokernelTester()
9972         .mr(2)
9973         .nr(8)
9974         .kr(4)
9975         .sr(1)
9976         .m(2)
9977         .n(8)
9978         .k(k)
9979         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980     }
9981   }
9982 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)9983   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
9984     TEST_REQUIRES_ARM_NEON;
9985     for (size_t k = 17; k < 32; k++) {
9986       for (uint32_t n = 1; n <= 8; n++) {
9987         for (uint32_t m = 1; m <= 2; m++) {
9988           GemmMicrokernelTester()
9989             .mr(2)
9990             .nr(8)
9991             .kr(4)
9992             .sr(1)
9993             .m(m)
9994             .n(n)
9995             .k(k)
9996             .iterations(1)
9997             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998         }
9999       }
10000     }
10001   }
10002 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16)10003   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
10004     TEST_REQUIRES_ARM_NEON;
10005     for (size_t k = 32; k <= 160; k += 16) {
10006       GemmMicrokernelTester()
10007         .mr(2)
10008         .nr(8)
10009         .kr(4)
10010         .sr(1)
10011         .m(2)
10012         .n(8)
10013         .k(k)
10014         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015     }
10016   }
10017 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16_subtile)10018   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
10019     TEST_REQUIRES_ARM_NEON;
10020     for (size_t k = 32; k <= 160; k += 16) {
10021       for (uint32_t n = 1; n <= 8; n++) {
10022         for (uint32_t m = 1; m <= 2; m++) {
10023           GemmMicrokernelTester()
10024             .mr(2)
10025             .nr(8)
10026             .kr(4)
10027             .sr(1)
10028             .m(m)
10029             .n(n)
10030             .k(k)
10031             .iterations(1)
10032             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033         }
10034       }
10035     }
10036   }
10037 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8)10038   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
10039     TEST_REQUIRES_ARM_NEON;
10040     for (uint32_t n = 9; n < 16; n++) {
10041       for (size_t k = 1; k <= 80; k += 17) {
10042         GemmMicrokernelTester()
10043           .mr(2)
10044           .nr(8)
10045           .kr(4)
10046           .sr(1)
10047           .m(2)
10048           .n(n)
10049           .k(k)
10050           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051       }
10052     }
10053   }
10054 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)10055   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
10056     TEST_REQUIRES_ARM_NEON;
10057     for (uint32_t n = 9; n < 16; n++) {
10058       for (size_t k = 1; k <= 80; k += 17) {
10059         GemmMicrokernelTester()
10060           .mr(2)
10061           .nr(8)
10062           .kr(4)
10063           .sr(1)
10064           .m(2)
10065           .n(n)
10066           .k(k)
10067           .cn_stride(11)
10068           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069       }
10070     }
10071   }
10072 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)10073   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
10074     TEST_REQUIRES_ARM_NEON;
10075     for (uint32_t n = 9; n < 16; n++) {
10076       for (size_t k = 1; k <= 80; k += 17) {
10077         for (uint32_t m = 1; m <= 2; m++) {
10078           GemmMicrokernelTester()
10079             .mr(2)
10080             .nr(8)
10081             .kr(4)
10082             .sr(1)
10083             .m(m)
10084             .n(n)
10085             .k(k)
10086             .iterations(1)
10087             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088         }
10089       }
10090     }
10091   }
10092 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8)10093   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
10094     TEST_REQUIRES_ARM_NEON;
10095     for (uint32_t n = 16; n <= 24; n += 8) {
10096       for (size_t k = 1; k <= 80; k += 17) {
10097         GemmMicrokernelTester()
10098           .mr(2)
10099           .nr(8)
10100           .kr(4)
10101           .sr(1)
10102           .m(2)
10103           .n(n)
10104           .k(k)
10105           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106       }
10107     }
10108   }
10109 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)10110   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
10111     TEST_REQUIRES_ARM_NEON;
10112     for (uint32_t n = 16; n <= 24; n += 8) {
10113       for (size_t k = 1; k <= 80; k += 17) {
10114         GemmMicrokernelTester()
10115           .mr(2)
10116           .nr(8)
10117           .kr(4)
10118           .sr(1)
10119           .m(2)
10120           .n(n)
10121           .k(k)
10122           .cn_stride(11)
10123           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124       }
10125     }
10126   }
10127 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_subtile)10128   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
10129     TEST_REQUIRES_ARM_NEON;
10130     for (uint32_t n = 16; n <= 24; n += 8) {
10131       for (size_t k = 1; k <= 80; k += 17) {
10132         for (uint32_t m = 1; m <= 2; m++) {
10133           GemmMicrokernelTester()
10134             .mr(2)
10135             .nr(8)
10136             .kr(4)
10137             .sr(1)
10138             .m(m)
10139             .n(n)
10140             .k(k)
10141             .iterations(1)
10142             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143         }
10144       }
10145     }
10146   }
10147 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel)10148   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel) {
10149     TEST_REQUIRES_ARM_NEON;
10150     for (size_t k = 1; k <= 80; k += 17) {
10151       GemmMicrokernelTester()
10152         .mr(2)
10153         .nr(8)
10154         .kr(4)
10155         .sr(1)
10156         .m(2)
10157         .n(8)
10158         .k(k)
10159         .ks(3)
10160         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161     }
10162   }
10163 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel_subtile)10164   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
10165     TEST_REQUIRES_ARM_NEON;
10166     for (size_t k = 1; k <= 80; k += 17) {
10167       for (uint32_t n = 1; n <= 8; n++) {
10168         for (uint32_t m = 1; m <= 2; m++) {
10169           GemmMicrokernelTester()
10170             .mr(2)
10171             .nr(8)
10172             .kr(4)
10173             .sr(1)
10174             .m(m)
10175             .n(n)
10176             .k(k)
10177             .ks(3)
10178             .iterations(1)
10179             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180         }
10181       }
10182     }
10183   }
10184 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)10185   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
10186     TEST_REQUIRES_ARM_NEON;
10187     for (uint32_t n = 9; n < 16; n++) {
10188       for (size_t k = 1; k <= 80; k += 17) {
10189         GemmMicrokernelTester()
10190           .mr(2)
10191           .nr(8)
10192           .kr(4)
10193           .sr(1)
10194           .m(2)
10195           .n(n)
10196           .k(k)
10197           .ks(3)
10198           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199       }
10200     }
10201   }
10202 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)10203   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
10204     TEST_REQUIRES_ARM_NEON;
10205     for (uint32_t n = 16; n <= 24; n += 8) {
10206       for (size_t k = 1; k <= 80; k += 17) {
10207         GemmMicrokernelTester()
10208           .mr(2)
10209           .nr(8)
10210           .kr(4)
10211           .sr(1)
10212           .m(2)
10213           .n(n)
10214           .k(k)
10215           .ks(3)
10216           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217       }
10218     }
10219   }
10220 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm_subtile)10221   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
10222     TEST_REQUIRES_ARM_NEON;
10223     for (size_t k = 1; k <= 80; k += 17) {
10224       for (uint32_t n = 1; n <= 8; n++) {
10225         for (uint32_t m = 1; m <= 2; m++) {
10226           GemmMicrokernelTester()
10227             .mr(2)
10228             .nr(8)
10229             .kr(4)
10230             .sr(1)
10231             .m(m)
10232             .n(n)
10233             .k(k)
10234             .cm_stride(11)
10235             .iterations(1)
10236             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237         }
10238       }
10239     }
10240   }
10241 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,a_offset)10242   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, a_offset) {
10243     TEST_REQUIRES_ARM_NEON;
10244     for (size_t k = 1; k <= 80; k += 17) {
10245       GemmMicrokernelTester()
10246         .mr(2)
10247         .nr(8)
10248         .kr(4)
10249         .sr(1)
10250         .m(2)
10251         .n(8)
10252         .k(k)
10253         .ks(3)
10254         .a_offset(163)
10255         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256     }
10257   }
10258 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,zero)10259   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, zero) {
10260     TEST_REQUIRES_ARM_NEON;
10261     for (size_t k = 1; k <= 80; k += 17) {
10262       for (uint32_t mz = 0; mz < 2; mz++) {
10263         GemmMicrokernelTester()
10264           .mr(2)
10265           .nr(8)
10266           .kr(4)
10267           .sr(1)
10268           .m(2)
10269           .n(8)
10270           .k(k)
10271           .ks(3)
10272           .a_offset(163)
10273           .zero_index(mz)
10274           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275       }
10276     }
10277   }
10278 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmin)10279   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
10280     TEST_REQUIRES_ARM_NEON;
10281     GemmMicrokernelTester()
10282       .mr(2)
10283       .nr(8)
10284       .kr(4)
10285       .sr(1)
10286       .m(2)
10287       .n(8)
10288       .k(16)
10289       .qmin(128)
10290       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291   }
10292 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmax)10293   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
10294     TEST_REQUIRES_ARM_NEON;
10295     GemmMicrokernelTester()
10296       .mr(2)
10297       .nr(8)
10298       .kr(4)
10299       .sr(1)
10300       .m(2)
10301       .n(8)
10302       .k(16)
10303       .qmax(128)
10304       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305   }
10306 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm)10307   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
10308     TEST_REQUIRES_ARM_NEON;
10309     GemmMicrokernelTester()
10310       .mr(2)
10311       .nr(8)
10312       .kr(4)
10313       .sr(1)
10314       .m(2)
10315       .n(8)
10316       .k(16)
10317       .cm_stride(11)
10318       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319   }
10320 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321 
10322 
10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16)10324   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16) {
10325     TEST_REQUIRES_ARM_NEON_V8;
10326     GemmMicrokernelTester()
10327       .mr(2)
10328       .nr(8)
10329       .kr(4)
10330       .sr(1)
10331       .m(2)
10332       .n(8)
10333       .k(16)
10334       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10335   }
10336 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cn)10337   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cn) {
10338     TEST_REQUIRES_ARM_NEON_V8;
10339     GemmMicrokernelTester()
10340       .mr(2)
10341       .nr(8)
10342       .kr(4)
10343       .sr(1)
10344       .m(2)
10345       .n(8)
10346       .k(16)
10347       .cn_stride(11)
10348       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10349   }
10350 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)10351   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
10352     TEST_REQUIRES_ARM_NEON_V8;
10353     for (uint32_t n = 1; n <= 8; n++) {
10354       for (uint32_t m = 1; m <= 2; m++) {
10355         GemmMicrokernelTester()
10356           .mr(2)
10357           .nr(8)
10358           .kr(4)
10359           .sr(1)
10360           .m(m)
10361           .n(n)
10362           .k(16)
10363           .iterations(1)
10364           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10365       }
10366     }
10367   }
10368 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)10369   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
10370     TEST_REQUIRES_ARM_NEON_V8;
10371     for (uint32_t m = 1; m <= 2; m++) {
10372       GemmMicrokernelTester()
10373         .mr(2)
10374         .nr(8)
10375         .kr(4)
10376         .sr(1)
10377         .m(m)
10378         .n(8)
10379         .k(16)
10380         .iterations(1)
10381         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10382     }
10383   }
10384 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)10385   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
10386     TEST_REQUIRES_ARM_NEON_V8;
10387     for (uint32_t n = 1; n <= 8; n++) {
10388       GemmMicrokernelTester()
10389         .mr(2)
10390         .nr(8)
10391         .kr(4)
10392         .sr(1)
10393         .m(2)
10394         .n(n)
10395         .k(16)
10396         .iterations(1)
10397         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10398     }
10399   }
10400 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16)10401   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16) {
10402     TEST_REQUIRES_ARM_NEON_V8;
10403     for (size_t k = 1; k < 16; k++) {
10404       GemmMicrokernelTester()
10405         .mr(2)
10406         .nr(8)
10407         .kr(4)
10408         .sr(1)
10409         .m(2)
10410         .n(8)
10411         .k(k)
10412         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10413     }
10414   }
10415 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)10416   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
10417     TEST_REQUIRES_ARM_NEON_V8;
10418     for (size_t k = 1; k < 16; k++) {
10419       for (uint32_t n = 1; n <= 8; n++) {
10420         for (uint32_t m = 1; m <= 2; m++) {
10421           GemmMicrokernelTester()
10422             .mr(2)
10423             .nr(8)
10424             .kr(4)
10425             .sr(1)
10426             .m(m)
10427             .n(n)
10428             .k(k)
10429             .iterations(1)
10430             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10431         }
10432       }
10433     }
10434   }
10435 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16)10436   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16) {
10437     TEST_REQUIRES_ARM_NEON_V8;
10438     for (size_t k = 17; k < 32; k++) {
10439       GemmMicrokernelTester()
10440         .mr(2)
10441         .nr(8)
10442         .kr(4)
10443         .sr(1)
10444         .m(2)
10445         .n(8)
10446         .k(k)
10447         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10448     }
10449   }
10450 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)10451   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
10452     TEST_REQUIRES_ARM_NEON_V8;
10453     for (size_t k = 17; k < 32; k++) {
10454       for (uint32_t n = 1; n <= 8; n++) {
10455         for (uint32_t m = 1; m <= 2; m++) {
10456           GemmMicrokernelTester()
10457             .mr(2)
10458             .nr(8)
10459             .kr(4)
10460             .sr(1)
10461             .m(m)
10462             .n(n)
10463             .k(k)
10464             .iterations(1)
10465             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10466         }
10467       }
10468     }
10469   }
10470 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16)10471   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16) {
10472     TEST_REQUIRES_ARM_NEON_V8;
10473     for (size_t k = 32; k <= 160; k += 16) {
10474       GemmMicrokernelTester()
10475         .mr(2)
10476         .nr(8)
10477         .kr(4)
10478         .sr(1)
10479         .m(2)
10480         .n(8)
10481         .k(k)
10482         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10483     }
10484   }
10485 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)10486   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
10487     TEST_REQUIRES_ARM_NEON_V8;
10488     for (size_t k = 32; k <= 160; k += 16) {
10489       for (uint32_t n = 1; n <= 8; n++) {
10490         for (uint32_t m = 1; m <= 2; m++) {
10491           GemmMicrokernelTester()
10492             .mr(2)
10493             .nr(8)
10494             .kr(4)
10495             .sr(1)
10496             .m(m)
10497             .n(n)
10498             .k(k)
10499             .iterations(1)
10500             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10501         }
10502       }
10503     }
10504   }
10505 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8)10506   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8) {
10507     TEST_REQUIRES_ARM_NEON_V8;
10508     for (uint32_t n = 9; n < 16; n++) {
10509       for (size_t k = 1; k <= 80; k += 17) {
10510         GemmMicrokernelTester()
10511           .mr(2)
10512           .nr(8)
10513           .kr(4)
10514           .sr(1)
10515           .m(2)
10516           .n(n)
10517           .k(k)
10518           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10519       }
10520     }
10521   }
10522 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)10523   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
10524     TEST_REQUIRES_ARM_NEON_V8;
10525     for (uint32_t n = 9; n < 16; n++) {
10526       for (size_t k = 1; k <= 80; k += 17) {
10527         GemmMicrokernelTester()
10528           .mr(2)
10529           .nr(8)
10530           .kr(4)
10531           .sr(1)
10532           .m(2)
10533           .n(n)
10534           .k(k)
10535           .cn_stride(11)
10536           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10537       }
10538     }
10539   }
10540 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)10541   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
10542     TEST_REQUIRES_ARM_NEON_V8;
10543     for (uint32_t n = 9; n < 16; n++) {
10544       for (size_t k = 1; k <= 80; k += 17) {
10545         for (uint32_t m = 1; m <= 2; m++) {
10546           GemmMicrokernelTester()
10547             .mr(2)
10548             .nr(8)
10549             .kr(4)
10550             .sr(1)
10551             .m(m)
10552             .n(n)
10553             .k(k)
10554             .iterations(1)
10555             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10556         }
10557       }
10558     }
10559   }
10560 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8)10561   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8) {
10562     TEST_REQUIRES_ARM_NEON_V8;
10563     for (uint32_t n = 16; n <= 24; n += 8) {
10564       for (size_t k = 1; k <= 80; k += 17) {
10565         GemmMicrokernelTester()
10566           .mr(2)
10567           .nr(8)
10568           .kr(4)
10569           .sr(1)
10570           .m(2)
10571           .n(n)
10572           .k(k)
10573           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10574       }
10575     }
10576   }
10577 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)10578   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
10579     TEST_REQUIRES_ARM_NEON_V8;
10580     for (uint32_t n = 16; n <= 24; n += 8) {
10581       for (size_t k = 1; k <= 80; k += 17) {
10582         GemmMicrokernelTester()
10583           .mr(2)
10584           .nr(8)
10585           .kr(4)
10586           .sr(1)
10587           .m(2)
10588           .n(n)
10589           .k(k)
10590           .cn_stride(11)
10591           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10592       }
10593     }
10594   }
10595 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)10596   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
10597     TEST_REQUIRES_ARM_NEON_V8;
10598     for (uint32_t n = 16; n <= 24; n += 8) {
10599       for (size_t k = 1; k <= 80; k += 17) {
10600         for (uint32_t m = 1; m <= 2; m++) {
10601           GemmMicrokernelTester()
10602             .mr(2)
10603             .nr(8)
10604             .kr(4)
10605             .sr(1)
10606             .m(m)
10607             .n(n)
10608             .k(k)
10609             .iterations(1)
10610             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10611         }
10612       }
10613     }
10614   }
10615 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel)10616   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel) {
10617     TEST_REQUIRES_ARM_NEON_V8;
10618     for (size_t k = 1; k <= 80; k += 17) {
10619       GemmMicrokernelTester()
10620         .mr(2)
10621         .nr(8)
10622         .kr(4)
10623         .sr(1)
10624         .m(2)
10625         .n(8)
10626         .k(k)
10627         .ks(3)
10628         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10629     }
10630   }
10631 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)10632   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
10633     TEST_REQUIRES_ARM_NEON_V8;
10634     for (size_t k = 1; k <= 80; k += 17) {
10635       for (uint32_t n = 1; n <= 8; n++) {
10636         for (uint32_t m = 1; m <= 2; m++) {
10637           GemmMicrokernelTester()
10638             .mr(2)
10639             .nr(8)
10640             .kr(4)
10641             .sr(1)
10642             .m(m)
10643             .n(n)
10644             .k(k)
10645             .ks(3)
10646             .iterations(1)
10647             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10648         }
10649       }
10650     }
10651   }
10652 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)10653   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
10654     TEST_REQUIRES_ARM_NEON_V8;
10655     for (uint32_t n = 9; n < 16; n++) {
10656       for (size_t k = 1; k <= 80; k += 17) {
10657         GemmMicrokernelTester()
10658           .mr(2)
10659           .nr(8)
10660           .kr(4)
10661           .sr(1)
10662           .m(2)
10663           .n(n)
10664           .k(k)
10665           .ks(3)
10666           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10667       }
10668     }
10669   }
10670 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)10671   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
10672     TEST_REQUIRES_ARM_NEON_V8;
10673     for (uint32_t n = 16; n <= 24; n += 8) {
10674       for (size_t k = 1; k <= 80; k += 17) {
10675         GemmMicrokernelTester()
10676           .mr(2)
10677           .nr(8)
10678           .kr(4)
10679           .sr(1)
10680           .m(2)
10681           .n(n)
10682           .k(k)
10683           .ks(3)
10684           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10685       }
10686     }
10687   }
10688 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)10689   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
10690     TEST_REQUIRES_ARM_NEON_V8;
10691     for (size_t k = 1; k <= 80; k += 17) {
10692       for (uint32_t n = 1; n <= 8; n++) {
10693         for (uint32_t m = 1; m <= 2; m++) {
10694           GemmMicrokernelTester()
10695             .mr(2)
10696             .nr(8)
10697             .kr(4)
10698             .sr(1)
10699             .m(m)
10700             .n(n)
10701             .k(k)
10702             .cm_stride(11)
10703             .iterations(1)
10704             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10705         }
10706       }
10707     }
10708   }
10709 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,a_offset)10710   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, a_offset) {
10711     TEST_REQUIRES_ARM_NEON_V8;
10712     for (size_t k = 1; k <= 80; k += 17) {
10713       GemmMicrokernelTester()
10714         .mr(2)
10715         .nr(8)
10716         .kr(4)
10717         .sr(1)
10718         .m(2)
10719         .n(8)
10720         .k(k)
10721         .ks(3)
10722         .a_offset(163)
10723         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10724     }
10725   }
10726 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,zero)10727   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, zero) {
10728     TEST_REQUIRES_ARM_NEON_V8;
10729     for (size_t k = 1; k <= 80; k += 17) {
10730       for (uint32_t mz = 0; mz < 2; mz++) {
10731         GemmMicrokernelTester()
10732           .mr(2)
10733           .nr(8)
10734           .kr(4)
10735           .sr(1)
10736           .m(2)
10737           .n(8)
10738           .k(k)
10739           .ks(3)
10740           .a_offset(163)
10741           .zero_index(mz)
10742           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10743       }
10744     }
10745   }
10746 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmin)10747   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmin) {
10748     TEST_REQUIRES_ARM_NEON_V8;
10749     GemmMicrokernelTester()
10750       .mr(2)
10751       .nr(8)
10752       .kr(4)
10753       .sr(1)
10754       .m(2)
10755       .n(8)
10756       .k(16)
10757       .qmin(128)
10758       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10759   }
10760 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmax)10761   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmax) {
10762     TEST_REQUIRES_ARM_NEON_V8;
10763     GemmMicrokernelTester()
10764       .mr(2)
10765       .nr(8)
10766       .kr(4)
10767       .sr(1)
10768       .m(2)
10769       .n(8)
10770       .k(16)
10771       .qmax(128)
10772       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773   }
10774 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm)10775   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm) {
10776     TEST_REQUIRES_ARM_NEON_V8;
10777     GemmMicrokernelTester()
10778       .mr(2)
10779       .nr(8)
10780       .kr(4)
10781       .sr(1)
10782       .m(2)
10783       .n(8)
10784       .k(16)
10785       .cm_stride(11)
10786       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10787   }
10788 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10789 
10790 
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16)10792   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
10793     TEST_REQUIRES_ARM_NEON_V8;
10794     GemmMicrokernelTester()
10795       .mr(2)
10796       .nr(8)
10797       .kr(4)
10798       .sr(1)
10799       .m(2)
10800       .n(8)
10801       .k(16)
10802       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10803   }
10804 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cn)10805   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
10806     TEST_REQUIRES_ARM_NEON_V8;
10807     GemmMicrokernelTester()
10808       .mr(2)
10809       .nr(8)
10810       .kr(4)
10811       .sr(1)
10812       .m(2)
10813       .n(8)
10814       .k(16)
10815       .cn_stride(11)
10816       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10817   }
10818 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)10819   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
10820     TEST_REQUIRES_ARM_NEON_V8;
10821     for (uint32_t n = 1; n <= 8; n++) {
10822       for (uint32_t m = 1; m <= 2; m++) {
10823         GemmMicrokernelTester()
10824           .mr(2)
10825           .nr(8)
10826           .kr(4)
10827           .sr(1)
10828           .m(m)
10829           .n(n)
10830           .k(16)
10831           .iterations(1)
10832           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10833       }
10834     }
10835   }
10836 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)10837   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
10838     TEST_REQUIRES_ARM_NEON_V8;
10839     for (uint32_t m = 1; m <= 2; m++) {
10840       GemmMicrokernelTester()
10841         .mr(2)
10842         .nr(8)
10843         .kr(4)
10844         .sr(1)
10845         .m(m)
10846         .n(8)
10847         .k(16)
10848         .iterations(1)
10849         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10850     }
10851   }
10852 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)10853   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
10854     TEST_REQUIRES_ARM_NEON_V8;
10855     for (uint32_t n = 1; n <= 8; n++) {
10856       GemmMicrokernelTester()
10857         .mr(2)
10858         .nr(8)
10859         .kr(4)
10860         .sr(1)
10861         .m(2)
10862         .n(n)
10863         .k(16)
10864         .iterations(1)
10865         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10866     }
10867   }
10868 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16)10869   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
10870     TEST_REQUIRES_ARM_NEON_V8;
10871     for (size_t k = 1; k < 16; k++) {
10872       GemmMicrokernelTester()
10873         .mr(2)
10874         .nr(8)
10875         .kr(4)
10876         .sr(1)
10877         .m(2)
10878         .n(8)
10879         .k(k)
10880         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10881     }
10882   }
10883 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)10884   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
10885     TEST_REQUIRES_ARM_NEON_V8;
10886     for (size_t k = 1; k < 16; k++) {
10887       for (uint32_t n = 1; n <= 8; n++) {
10888         for (uint32_t m = 1; m <= 2; m++) {
10889           GemmMicrokernelTester()
10890             .mr(2)
10891             .nr(8)
10892             .kr(4)
10893             .sr(1)
10894             .m(m)
10895             .n(n)
10896             .k(k)
10897             .iterations(1)
10898             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10899         }
10900       }
10901     }
10902   }
10903 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16)10904   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
10905     TEST_REQUIRES_ARM_NEON_V8;
10906     for (size_t k = 17; k < 32; k++) {
10907       GemmMicrokernelTester()
10908         .mr(2)
10909         .nr(8)
10910         .kr(4)
10911         .sr(1)
10912         .m(2)
10913         .n(8)
10914         .k(k)
10915         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10916     }
10917   }
10918 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)10919   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
10920     TEST_REQUIRES_ARM_NEON_V8;
10921     for (size_t k = 17; k < 32; k++) {
10922       for (uint32_t n = 1; n <= 8; n++) {
10923         for (uint32_t m = 1; m <= 2; m++) {
10924           GemmMicrokernelTester()
10925             .mr(2)
10926             .nr(8)
10927             .kr(4)
10928             .sr(1)
10929             .m(m)
10930             .n(n)
10931             .k(k)
10932             .iterations(1)
10933             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10934         }
10935       }
10936     }
10937   }
10938 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16)10939   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
10940     TEST_REQUIRES_ARM_NEON_V8;
10941     for (size_t k = 32; k <= 160; k += 16) {
10942       GemmMicrokernelTester()
10943         .mr(2)
10944         .nr(8)
10945         .kr(4)
10946         .sr(1)
10947         .m(2)
10948         .n(8)
10949         .k(k)
10950         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10951     }
10952   }
10953 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)10954   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
10955     TEST_REQUIRES_ARM_NEON_V8;
10956     for (size_t k = 32; k <= 160; k += 16) {
10957       for (uint32_t n = 1; n <= 8; n++) {
10958         for (uint32_t m = 1; m <= 2; m++) {
10959           GemmMicrokernelTester()
10960             .mr(2)
10961             .nr(8)
10962             .kr(4)
10963             .sr(1)
10964             .m(m)
10965             .n(n)
10966             .k(k)
10967             .iterations(1)
10968             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10969         }
10970       }
10971     }
10972   }
10973 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8)10974   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
10975     TEST_REQUIRES_ARM_NEON_V8;
10976     for (uint32_t n = 9; n < 16; n++) {
10977       for (size_t k = 1; k <= 80; k += 17) {
10978         GemmMicrokernelTester()
10979           .mr(2)
10980           .nr(8)
10981           .kr(4)
10982           .sr(1)
10983           .m(2)
10984           .n(n)
10985           .k(k)
10986           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10987       }
10988     }
10989   }
10990 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)10991   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
10992     TEST_REQUIRES_ARM_NEON_V8;
10993     for (uint32_t n = 9; n < 16; n++) {
10994       for (size_t k = 1; k <= 80; k += 17) {
10995         GemmMicrokernelTester()
10996           .mr(2)
10997           .nr(8)
10998           .kr(4)
10999           .sr(1)
11000           .m(2)
11001           .n(n)
11002           .k(k)
11003           .cn_stride(11)
11004           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11005       }
11006     }
11007   }
11008 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)11009   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
11010     TEST_REQUIRES_ARM_NEON_V8;
11011     for (uint32_t n = 9; n < 16; n++) {
11012       for (size_t k = 1; k <= 80; k += 17) {
11013         for (uint32_t m = 1; m <= 2; m++) {
11014           GemmMicrokernelTester()
11015             .mr(2)
11016             .nr(8)
11017             .kr(4)
11018             .sr(1)
11019             .m(m)
11020             .n(n)
11021             .k(k)
11022             .iterations(1)
11023             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11024         }
11025       }
11026     }
11027   }
11028 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8)11029   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
11030     TEST_REQUIRES_ARM_NEON_V8;
11031     for (uint32_t n = 16; n <= 24; n += 8) {
11032       for (size_t k = 1; k <= 80; k += 17) {
11033         GemmMicrokernelTester()
11034           .mr(2)
11035           .nr(8)
11036           .kr(4)
11037           .sr(1)
11038           .m(2)
11039           .n(n)
11040           .k(k)
11041           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11042       }
11043     }
11044   }
11045 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)11046   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
11047     TEST_REQUIRES_ARM_NEON_V8;
11048     for (uint32_t n = 16; n <= 24; n += 8) {
11049       for (size_t k = 1; k <= 80; k += 17) {
11050         GemmMicrokernelTester()
11051           .mr(2)
11052           .nr(8)
11053           .kr(4)
11054           .sr(1)
11055           .m(2)
11056           .n(n)
11057           .k(k)
11058           .cn_stride(11)
11059           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11060       }
11061     }
11062   }
11063 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)11064   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
11065     TEST_REQUIRES_ARM_NEON_V8;
11066     for (uint32_t n = 16; n <= 24; n += 8) {
11067       for (size_t k = 1; k <= 80; k += 17) {
11068         for (uint32_t m = 1; m <= 2; m++) {
11069           GemmMicrokernelTester()
11070             .mr(2)
11071             .nr(8)
11072             .kr(4)
11073             .sr(1)
11074             .m(m)
11075             .n(n)
11076             .k(k)
11077             .iterations(1)
11078             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11079         }
11080       }
11081     }
11082   }
11083 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel)11084   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel) {
11085     TEST_REQUIRES_ARM_NEON_V8;
11086     for (size_t k = 1; k <= 80; k += 17) {
11087       GemmMicrokernelTester()
11088         .mr(2)
11089         .nr(8)
11090         .kr(4)
11091         .sr(1)
11092         .m(2)
11093         .n(8)
11094         .k(k)
11095         .ks(3)
11096         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11097     }
11098   }
11099 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel_subtile)11100   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
11101     TEST_REQUIRES_ARM_NEON_V8;
11102     for (size_t k = 1; k <= 80; k += 17) {
11103       for (uint32_t n = 1; n <= 8; n++) {
11104         for (uint32_t m = 1; m <= 2; m++) {
11105           GemmMicrokernelTester()
11106             .mr(2)
11107             .nr(8)
11108             .kr(4)
11109             .sr(1)
11110             .m(m)
11111             .n(n)
11112             .k(k)
11113             .ks(3)
11114             .iterations(1)
11115             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11116         }
11117       }
11118     }
11119   }
11120 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)11121   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
11122     TEST_REQUIRES_ARM_NEON_V8;
11123     for (uint32_t n = 9; n < 16; n++) {
11124       for (size_t k = 1; k <= 80; k += 17) {
11125         GemmMicrokernelTester()
11126           .mr(2)
11127           .nr(8)
11128           .kr(4)
11129           .sr(1)
11130           .m(2)
11131           .n(n)
11132           .k(k)
11133           .ks(3)
11134           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11135       }
11136     }
11137   }
11138 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_small_kernel)11139   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
11140     TEST_REQUIRES_ARM_NEON_V8;
11141     for (uint32_t n = 16; n <= 24; n += 8) {
11142       for (size_t k = 1; k <= 80; k += 17) {
11143         GemmMicrokernelTester()
11144           .mr(2)
11145           .nr(8)
11146           .kr(4)
11147           .sr(1)
11148           .m(2)
11149           .n(n)
11150           .k(k)
11151           .ks(3)
11152           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11153       }
11154     }
11155   }
11156 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)11157   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
11158     TEST_REQUIRES_ARM_NEON_V8;
11159     for (size_t k = 1; k <= 80; k += 17) {
11160       for (uint32_t n = 1; n <= 8; n++) {
11161         for (uint32_t m = 1; m <= 2; m++) {
11162           GemmMicrokernelTester()
11163             .mr(2)
11164             .nr(8)
11165             .kr(4)
11166             .sr(1)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .cm_stride(11)
11171             .iterations(1)
11172             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11173         }
11174       }
11175     }
11176   }
11177 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,a_offset)11178   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, a_offset) {
11179     TEST_REQUIRES_ARM_NEON_V8;
11180     for (size_t k = 1; k <= 80; k += 17) {
11181       GemmMicrokernelTester()
11182         .mr(2)
11183         .nr(8)
11184         .kr(4)
11185         .sr(1)
11186         .m(2)
11187         .n(8)
11188         .k(k)
11189         .ks(3)
11190         .a_offset(163)
11191         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11192     }
11193   }
11194 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,zero)11195   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, zero) {
11196     TEST_REQUIRES_ARM_NEON_V8;
11197     for (size_t k = 1; k <= 80; k += 17) {
11198       for (uint32_t mz = 0; mz < 2; mz++) {
11199         GemmMicrokernelTester()
11200           .mr(2)
11201           .nr(8)
11202           .kr(4)
11203           .sr(1)
11204           .m(2)
11205           .n(8)
11206           .k(k)
11207           .ks(3)
11208           .a_offset(163)
11209           .zero_index(mz)
11210           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11211       }
11212     }
11213   }
11214 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmin)11215   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
11216     TEST_REQUIRES_ARM_NEON_V8;
11217     GemmMicrokernelTester()
11218       .mr(2)
11219       .nr(8)
11220       .kr(4)
11221       .sr(1)
11222       .m(2)
11223       .n(8)
11224       .k(16)
11225       .qmin(128)
11226       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11227   }
11228 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmax)11229   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
11230     TEST_REQUIRES_ARM_NEON_V8;
11231     GemmMicrokernelTester()
11232       .mr(2)
11233       .nr(8)
11234       .kr(4)
11235       .sr(1)
11236       .m(2)
11237       .n(8)
11238       .k(16)
11239       .qmax(128)
11240       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11241   }
11242 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm)11243   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
11244     TEST_REQUIRES_ARM_NEON_V8;
11245     GemmMicrokernelTester()
11246       .mr(2)
11247       .nr(8)
11248       .kr(4)
11249       .sr(1)
11250       .m(2)
11251       .n(8)
11252       .k(16)
11253       .cm_stride(11)
11254       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11255   }
11256 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257 
11258 
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16)11260   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16) {
11261     TEST_REQUIRES_ARM_NEON;
11262     GemmMicrokernelTester()
11263       .mr(2)
11264       .nr(8)
11265       .kr(4)
11266       .sr(2)
11267       .m(2)
11268       .n(8)
11269       .k(16)
11270       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11271   }
11272 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cn)11273   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cn) {
11274     TEST_REQUIRES_ARM_NEON;
11275     GemmMicrokernelTester()
11276       .mr(2)
11277       .nr(8)
11278       .kr(4)
11279       .sr(2)
11280       .m(2)
11281       .n(8)
11282       .k(16)
11283       .cn_stride(11)
11284       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11285   }
11286 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile)11287   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
11288     TEST_REQUIRES_ARM_NEON;
11289     for (uint32_t n = 1; n <= 8; n++) {
11290       for (uint32_t m = 1; m <= 2; m++) {
11291         GemmMicrokernelTester()
11292           .mr(2)
11293           .nr(8)
11294           .kr(4)
11295           .sr(2)
11296           .m(m)
11297           .n(n)
11298           .k(16)
11299           .iterations(1)
11300           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11301       }
11302     }
11303   }
11304 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_m)11305   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
11306     TEST_REQUIRES_ARM_NEON;
11307     for (uint32_t m = 1; m <= 2; m++) {
11308       GemmMicrokernelTester()
11309         .mr(2)
11310         .nr(8)
11311         .kr(4)
11312         .sr(2)
11313         .m(m)
11314         .n(8)
11315         .k(16)
11316         .iterations(1)
11317         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11318     }
11319   }
11320 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_n)11321   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
11322     TEST_REQUIRES_ARM_NEON;
11323     for (uint32_t n = 1; n <= 8; n++) {
11324       GemmMicrokernelTester()
11325         .mr(2)
11326         .nr(8)
11327         .kr(4)
11328         .sr(2)
11329         .m(2)
11330         .n(n)
11331         .k(16)
11332         .iterations(1)
11333         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11334     }
11335   }
11336 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16)11337   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16) {
11338     TEST_REQUIRES_ARM_NEON;
11339     for (size_t k = 1; k < 16; k++) {
11340       GemmMicrokernelTester()
11341         .mr(2)
11342         .nr(8)
11343         .kr(4)
11344         .sr(2)
11345         .m(2)
11346         .n(8)
11347         .k(k)
11348         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11349     }
11350   }
11351 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16_subtile)11352   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
11353     TEST_REQUIRES_ARM_NEON;
11354     for (size_t k = 1; k < 16; k++) {
11355       for (uint32_t n = 1; n <= 8; n++) {
11356         for (uint32_t m = 1; m <= 2; m++) {
11357           GemmMicrokernelTester()
11358             .mr(2)
11359             .nr(8)
11360             .kr(4)
11361             .sr(2)
11362             .m(m)
11363             .n(n)
11364             .k(k)
11365             .iterations(1)
11366             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11367         }
11368       }
11369     }
11370   }
11371 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16)11372   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16) {
11373     TEST_REQUIRES_ARM_NEON;
11374     for (size_t k = 17; k < 32; k++) {
11375       GemmMicrokernelTester()
11376         .mr(2)
11377         .nr(8)
11378         .kr(4)
11379         .sr(2)
11380         .m(2)
11381         .n(8)
11382         .k(k)
11383         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11384     }
11385   }
11386 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16_subtile)11387   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
11388     TEST_REQUIRES_ARM_NEON;
11389     for (size_t k = 17; k < 32; k++) {
11390       for (uint32_t n = 1; n <= 8; n++) {
11391         for (uint32_t m = 1; m <= 2; m++) {
11392           GemmMicrokernelTester()
11393             .mr(2)
11394             .nr(8)
11395             .kr(4)
11396             .sr(2)
11397             .m(m)
11398             .n(n)
11399             .k(k)
11400             .iterations(1)
11401             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11402         }
11403       }
11404     }
11405   }
11406 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16)11407   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16) {
11408     TEST_REQUIRES_ARM_NEON;
11409     for (size_t k = 32; k <= 160; k += 16) {
11410       GemmMicrokernelTester()
11411         .mr(2)
11412         .nr(8)
11413         .kr(4)
11414         .sr(2)
11415         .m(2)
11416         .n(8)
11417         .k(k)
11418         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11419     }
11420   }
11421 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16_subtile)11422   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
11423     TEST_REQUIRES_ARM_NEON;
11424     for (size_t k = 32; k <= 160; k += 16) {
11425       for (uint32_t n = 1; n <= 8; n++) {
11426         for (uint32_t m = 1; m <= 2; m++) {
11427           GemmMicrokernelTester()
11428             .mr(2)
11429             .nr(8)
11430             .kr(4)
11431             .sr(2)
11432             .m(m)
11433             .n(n)
11434             .k(k)
11435             .iterations(1)
11436             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11437         }
11438       }
11439     }
11440   }
11441 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8)11442   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8) {
11443     TEST_REQUIRES_ARM_NEON;
11444     for (uint32_t n = 9; n < 16; n++) {
11445       for (size_t k = 1; k <= 80; k += 17) {
11446         GemmMicrokernelTester()
11447           .mr(2)
11448           .nr(8)
11449           .kr(4)
11450           .sr(2)
11451           .m(2)
11452           .n(n)
11453           .k(k)
11454           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11455       }
11456     }
11457   }
11458 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_strided_cn)11459   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
11460     TEST_REQUIRES_ARM_NEON;
11461     for (uint32_t n = 9; n < 16; n++) {
11462       for (size_t k = 1; k <= 80; k += 17) {
11463         GemmMicrokernelTester()
11464           .mr(2)
11465           .nr(8)
11466           .kr(4)
11467           .sr(2)
11468           .m(2)
11469           .n(n)
11470           .k(k)
11471           .cn_stride(11)
11472           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11473       }
11474     }
11475   }
11476 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_subtile)11477   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
11478     TEST_REQUIRES_ARM_NEON;
11479     for (uint32_t n = 9; n < 16; n++) {
11480       for (size_t k = 1; k <= 80; k += 17) {
11481         for (uint32_t m = 1; m <= 2; m++) {
11482           GemmMicrokernelTester()
11483             .mr(2)
11484             .nr(8)
11485             .kr(4)
11486             .sr(2)
11487             .m(m)
11488             .n(n)
11489             .k(k)
11490             .iterations(1)
11491             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11492         }
11493       }
11494     }
11495   }
11496 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8)11497   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8) {
11498     TEST_REQUIRES_ARM_NEON;
11499     for (uint32_t n = 16; n <= 24; n += 8) {
11500       for (size_t k = 1; k <= 80; k += 17) {
11501         GemmMicrokernelTester()
11502           .mr(2)
11503           .nr(8)
11504           .kr(4)
11505           .sr(2)
11506           .m(2)
11507           .n(n)
11508           .k(k)
11509           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11510       }
11511     }
11512   }
11513 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_strided_cn)11514   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
11515     TEST_REQUIRES_ARM_NEON;
11516     for (uint32_t n = 16; n <= 24; n += 8) {
11517       for (size_t k = 1; k <= 80; k += 17) {
11518         GemmMicrokernelTester()
11519           .mr(2)
11520           .nr(8)
11521           .kr(4)
11522           .sr(2)
11523           .m(2)
11524           .n(n)
11525           .k(k)
11526           .cn_stride(11)
11527           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11528       }
11529     }
11530   }
11531 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_subtile)11532   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
11533     TEST_REQUIRES_ARM_NEON;
11534     for (uint32_t n = 16; n <= 24; n += 8) {
11535       for (size_t k = 1; k <= 80; k += 17) {
11536         for (uint32_t m = 1; m <= 2; m++) {
11537           GemmMicrokernelTester()
11538             .mr(2)
11539             .nr(8)
11540             .kr(4)
11541             .sr(2)
11542             .m(m)
11543             .n(n)
11544             .k(k)
11545             .iterations(1)
11546             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547         }
11548       }
11549     }
11550   }
11551 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel)11552   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel) {
11553     TEST_REQUIRES_ARM_NEON;
11554     for (size_t k = 1; k <= 80; k += 17) {
11555       GemmMicrokernelTester()
11556         .mr(2)
11557         .nr(8)
11558         .kr(4)
11559         .sr(2)
11560         .m(2)
11561         .n(8)
11562         .k(k)
11563         .ks(3)
11564         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565     }
11566   }
11567 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel_subtile)11568   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel_subtile) {
11569     TEST_REQUIRES_ARM_NEON;
11570     for (size_t k = 1; k <= 80; k += 17) {
11571       for (uint32_t n = 1; n <= 8; n++) {
11572         for (uint32_t m = 1; m <= 2; m++) {
11573           GemmMicrokernelTester()
11574             .mr(2)
11575             .nr(8)
11576             .kr(4)
11577             .sr(2)
11578             .m(m)
11579             .n(n)
11580             .k(k)
11581             .ks(3)
11582             .iterations(1)
11583             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11584         }
11585       }
11586     }
11587   }
11588 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_small_kernel)11589   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
11590     TEST_REQUIRES_ARM_NEON;
11591     for (uint32_t n = 9; n < 16; n++) {
11592       for (size_t k = 1; k <= 80; k += 17) {
11593         GemmMicrokernelTester()
11594           .mr(2)
11595           .nr(8)
11596           .kr(4)
11597           .sr(2)
11598           .m(2)
11599           .n(n)
11600           .k(k)
11601           .ks(3)
11602           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11603       }
11604     }
11605   }
11606 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_small_kernel)11607   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
11608     TEST_REQUIRES_ARM_NEON;
11609     for (uint32_t n = 16; n <= 24; n += 8) {
11610       for (size_t k = 1; k <= 80; k += 17) {
11611         GemmMicrokernelTester()
11612           .mr(2)
11613           .nr(8)
11614           .kr(4)
11615           .sr(2)
11616           .m(2)
11617           .n(n)
11618           .k(k)
11619           .ks(3)
11620           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11621       }
11622     }
11623   }
11624 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm_subtile)11625   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
11626     TEST_REQUIRES_ARM_NEON;
11627     for (size_t k = 1; k <= 80; k += 17) {
11628       for (uint32_t n = 1; n <= 8; n++) {
11629         for (uint32_t m = 1; m <= 2; m++) {
11630           GemmMicrokernelTester()
11631             .mr(2)
11632             .nr(8)
11633             .kr(4)
11634             .sr(2)
11635             .m(m)
11636             .n(n)
11637             .k(k)
11638             .cm_stride(11)
11639             .iterations(1)
11640             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11641         }
11642       }
11643     }
11644   }
11645 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,a_offset)11646   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, a_offset) {
11647     TEST_REQUIRES_ARM_NEON;
11648     for (size_t k = 1; k <= 80; k += 17) {
11649       GemmMicrokernelTester()
11650         .mr(2)
11651         .nr(8)
11652         .kr(4)
11653         .sr(2)
11654         .m(2)
11655         .n(8)
11656         .k(k)
11657         .ks(3)
11658         .a_offset(163)
11659         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11660     }
11661   }
11662 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,zero)11663   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, zero) {
11664     TEST_REQUIRES_ARM_NEON;
11665     for (size_t k = 1; k <= 80; k += 17) {
11666       for (uint32_t mz = 0; mz < 2; mz++) {
11667         GemmMicrokernelTester()
11668           .mr(2)
11669           .nr(8)
11670           .kr(4)
11671           .sr(2)
11672           .m(2)
11673           .n(8)
11674           .k(k)
11675           .ks(3)
11676           .a_offset(163)
11677           .zero_index(mz)
11678           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11679       }
11680     }
11681   }
11682 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmin)11683   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmin) {
11684     TEST_REQUIRES_ARM_NEON;
11685     GemmMicrokernelTester()
11686       .mr(2)
11687       .nr(8)
11688       .kr(4)
11689       .sr(2)
11690       .m(2)
11691       .n(8)
11692       .k(16)
11693       .qmin(128)
11694       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11695   }
11696 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmax)11697   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmax) {
11698     TEST_REQUIRES_ARM_NEON;
11699     GemmMicrokernelTester()
11700       .mr(2)
11701       .nr(8)
11702       .kr(4)
11703       .sr(2)
11704       .m(2)
11705       .n(8)
11706       .k(16)
11707       .qmax(128)
11708       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11709   }
11710 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm)11711   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm) {
11712     TEST_REQUIRES_ARM_NEON;
11713     GemmMicrokernelTester()
11714       .mr(2)
11715       .nr(8)
11716       .kr(4)
11717       .sr(2)
11718       .m(2)
11719       .n(8)
11720       .k(16)
11721       .cm_stride(11)
11722       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11723   }
11724 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725 
11726 
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16)11728   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16) {
11729     TEST_REQUIRES_ARM_NEON_V8;
11730     GemmMicrokernelTester()
11731       .mr(2)
11732       .nr(8)
11733       .kr(4)
11734       .sr(2)
11735       .m(2)
11736       .n(8)
11737       .k(16)
11738       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11739   }
11740 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cn)11741   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cn) {
11742     TEST_REQUIRES_ARM_NEON_V8;
11743     GemmMicrokernelTester()
11744       .mr(2)
11745       .nr(8)
11746       .kr(4)
11747       .sr(2)
11748       .m(2)
11749       .n(8)
11750       .k(16)
11751       .cn_stride(11)
11752       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11753   }
11754 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile)11755   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
11756     TEST_REQUIRES_ARM_NEON_V8;
11757     for (uint32_t n = 1; n <= 8; n++) {
11758       for (uint32_t m = 1; m <= 2; m++) {
11759         GemmMicrokernelTester()
11760           .mr(2)
11761           .nr(8)
11762           .kr(4)
11763           .sr(2)
11764           .m(m)
11765           .n(n)
11766           .k(16)
11767           .iterations(1)
11768           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11769       }
11770     }
11771   }
11772 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)11773   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
11774     TEST_REQUIRES_ARM_NEON_V8;
11775     for (uint32_t m = 1; m <= 2; m++) {
11776       GemmMicrokernelTester()
11777         .mr(2)
11778         .nr(8)
11779         .kr(4)
11780         .sr(2)
11781         .m(m)
11782         .n(8)
11783         .k(16)
11784         .iterations(1)
11785         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11786     }
11787   }
11788 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)11789   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
11790     TEST_REQUIRES_ARM_NEON_V8;
11791     for (uint32_t n = 1; n <= 8; n++) {
11792       GemmMicrokernelTester()
11793         .mr(2)
11794         .nr(8)
11795         .kr(4)
11796         .sr(2)
11797         .m(2)
11798         .n(n)
11799         .k(16)
11800         .iterations(1)
11801         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11802     }
11803   }
11804 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16)11805   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16) {
11806     TEST_REQUIRES_ARM_NEON_V8;
11807     for (size_t k = 1; k < 16; k++) {
11808       GemmMicrokernelTester()
11809         .mr(2)
11810         .nr(8)
11811         .kr(4)
11812         .sr(2)
11813         .m(2)
11814         .n(8)
11815         .k(k)
11816         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11817     }
11818   }
11819 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16_subtile)11820   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
11821     TEST_REQUIRES_ARM_NEON_V8;
11822     for (size_t k = 1; k < 16; k++) {
11823       for (uint32_t n = 1; n <= 8; n++) {
11824         for (uint32_t m = 1; m <= 2; m++) {
11825           GemmMicrokernelTester()
11826             .mr(2)
11827             .nr(8)
11828             .kr(4)
11829             .sr(2)
11830             .m(m)
11831             .n(n)
11832             .k(k)
11833             .iterations(1)
11834             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11835         }
11836       }
11837     }
11838   }
11839 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16)11840   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16) {
11841     TEST_REQUIRES_ARM_NEON_V8;
11842     for (size_t k = 17; k < 32; k++) {
11843       GemmMicrokernelTester()
11844         .mr(2)
11845         .nr(8)
11846         .kr(4)
11847         .sr(2)
11848         .m(2)
11849         .n(8)
11850         .k(k)
11851         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11852     }
11853   }
11854 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16_subtile)11855   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
11856     TEST_REQUIRES_ARM_NEON_V8;
11857     for (size_t k = 17; k < 32; k++) {
11858       for (uint32_t n = 1; n <= 8; n++) {
11859         for (uint32_t m = 1; m <= 2; m++) {
11860           GemmMicrokernelTester()
11861             .mr(2)
11862             .nr(8)
11863             .kr(4)
11864             .sr(2)
11865             .m(m)
11866             .n(n)
11867             .k(k)
11868             .iterations(1)
11869             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11870         }
11871       }
11872     }
11873   }
11874 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16)11875   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16) {
11876     TEST_REQUIRES_ARM_NEON_V8;
11877     for (size_t k = 32; k <= 160; k += 16) {
11878       GemmMicrokernelTester()
11879         .mr(2)
11880         .nr(8)
11881         .kr(4)
11882         .sr(2)
11883         .m(2)
11884         .n(8)
11885         .k(k)
11886         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11887     }
11888   }
11889 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16_subtile)11890   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
11891     TEST_REQUIRES_ARM_NEON_V8;
11892     for (size_t k = 32; k <= 160; k += 16) {
11893       for (uint32_t n = 1; n <= 8; n++) {
11894         for (uint32_t m = 1; m <= 2; m++) {
11895           GemmMicrokernelTester()
11896             .mr(2)
11897             .nr(8)
11898             .kr(4)
11899             .sr(2)
11900             .m(m)
11901             .n(n)
11902             .k(k)
11903             .iterations(1)
11904             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11905         }
11906       }
11907     }
11908   }
11909 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8)11910   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8) {
11911     TEST_REQUIRES_ARM_NEON_V8;
11912     for (uint32_t n = 9; n < 16; n++) {
11913       for (size_t k = 1; k <= 80; k += 17) {
11914         GemmMicrokernelTester()
11915           .mr(2)
11916           .nr(8)
11917           .kr(4)
11918           .sr(2)
11919           .m(2)
11920           .n(n)
11921           .k(k)
11922           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923       }
11924     }
11925   }
11926 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)11927   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
11928     TEST_REQUIRES_ARM_NEON_V8;
11929     for (uint32_t n = 9; n < 16; n++) {
11930       for (size_t k = 1; k <= 80; k += 17) {
11931         GemmMicrokernelTester()
11932           .mr(2)
11933           .nr(8)
11934           .kr(4)
11935           .sr(2)
11936           .m(2)
11937           .n(n)
11938           .k(k)
11939           .cn_stride(11)
11940           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11941       }
11942     }
11943   }
11944 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_subtile)11945   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
11946     TEST_REQUIRES_ARM_NEON_V8;
11947     for (uint32_t n = 9; n < 16; n++) {
11948       for (size_t k = 1; k <= 80; k += 17) {
11949         for (uint32_t m = 1; m <= 2; m++) {
11950           GemmMicrokernelTester()
11951             .mr(2)
11952             .nr(8)
11953             .kr(4)
11954             .sr(2)
11955             .m(m)
11956             .n(n)
11957             .k(k)
11958             .iterations(1)
11959             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11960         }
11961       }
11962     }
11963   }
11964 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8)11965   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8) {
11966     TEST_REQUIRES_ARM_NEON_V8;
11967     for (uint32_t n = 16; n <= 24; n += 8) {
11968       for (size_t k = 1; k <= 80; k += 17) {
11969         GemmMicrokernelTester()
11970           .mr(2)
11971           .nr(8)
11972           .kr(4)
11973           .sr(2)
11974           .m(2)
11975           .n(n)
11976           .k(k)
11977           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11978       }
11979     }
11980   }
11981 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)11982   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
11983     TEST_REQUIRES_ARM_NEON_V8;
11984     for (uint32_t n = 16; n <= 24; n += 8) {
11985       for (size_t k = 1; k <= 80; k += 17) {
11986         GemmMicrokernelTester()
11987           .mr(2)
11988           .nr(8)
11989           .kr(4)
11990           .sr(2)
11991           .m(2)
11992           .n(n)
11993           .k(k)
11994           .cn_stride(11)
11995           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11996       }
11997     }
11998   }
11999 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_subtile)12000   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
12001     TEST_REQUIRES_ARM_NEON_V8;
12002     for (uint32_t n = 16; n <= 24; n += 8) {
12003       for (size_t k = 1; k <= 80; k += 17) {
12004         for (uint32_t m = 1; m <= 2; m++) {
12005           GemmMicrokernelTester()
12006             .mr(2)
12007             .nr(8)
12008             .kr(4)
12009             .sr(2)
12010             .m(m)
12011             .n(n)
12012             .k(k)
12013             .iterations(1)
12014             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12015         }
12016       }
12017     }
12018   }
12019 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel)12020   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel) {
12021     TEST_REQUIRES_ARM_NEON_V8;
12022     for (size_t k = 1; k <= 80; k += 17) {
12023       GemmMicrokernelTester()
12024         .mr(2)
12025         .nr(8)
12026         .kr(4)
12027         .sr(2)
12028         .m(2)
12029         .n(8)
12030         .k(k)
12031         .ks(3)
12032         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12033     }
12034   }
12035 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel_subtile)12036   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
12037     TEST_REQUIRES_ARM_NEON_V8;
12038     for (size_t k = 1; k <= 80; k += 17) {
12039       for (uint32_t n = 1; n <= 8; n++) {
12040         for (uint32_t m = 1; m <= 2; m++) {
12041           GemmMicrokernelTester()
12042             .mr(2)
12043             .nr(8)
12044             .kr(4)
12045             .sr(2)
12046             .m(m)
12047             .n(n)
12048             .k(k)
12049             .ks(3)
12050             .iterations(1)
12051             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12052         }
12053       }
12054     }
12055   }
12056 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)12057   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
12058     TEST_REQUIRES_ARM_NEON_V8;
12059     for (uint32_t n = 9; n < 16; n++) {
12060       for (size_t k = 1; k <= 80; k += 17) {
12061         GemmMicrokernelTester()
12062           .mr(2)
12063           .nr(8)
12064           .kr(4)
12065           .sr(2)
12066           .m(2)
12067           .n(n)
12068           .k(k)
12069           .ks(3)
12070           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12071       }
12072     }
12073   }
12074 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)12075   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
12076     TEST_REQUIRES_ARM_NEON_V8;
12077     for (uint32_t n = 16; n <= 24; n += 8) {
12078       for (size_t k = 1; k <= 80; k += 17) {
12079         GemmMicrokernelTester()
12080           .mr(2)
12081           .nr(8)
12082           .kr(4)
12083           .sr(2)
12084           .m(2)
12085           .n(n)
12086           .k(k)
12087           .ks(3)
12088           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089       }
12090     }
12091   }
12092 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm_subtile)12093   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
12094     TEST_REQUIRES_ARM_NEON_V8;
12095     for (size_t k = 1; k <= 80; k += 17) {
12096       for (uint32_t n = 1; n <= 8; n++) {
12097         for (uint32_t m = 1; m <= 2; m++) {
12098           GemmMicrokernelTester()
12099             .mr(2)
12100             .nr(8)
12101             .kr(4)
12102             .sr(2)
12103             .m(m)
12104             .n(n)
12105             .k(k)
12106             .cm_stride(11)
12107             .iterations(1)
12108             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12109         }
12110       }
12111     }
12112   }
12113 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,a_offset)12114   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, a_offset) {
12115     TEST_REQUIRES_ARM_NEON_V8;
12116     for (size_t k = 1; k <= 80; k += 17) {
12117       GemmMicrokernelTester()
12118         .mr(2)
12119         .nr(8)
12120         .kr(4)
12121         .sr(2)
12122         .m(2)
12123         .n(8)
12124         .k(k)
12125         .ks(3)
12126         .a_offset(163)
12127         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12128     }
12129   }
12130 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,zero)12131   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, zero) {
12132     TEST_REQUIRES_ARM_NEON_V8;
12133     for (size_t k = 1; k <= 80; k += 17) {
12134       for (uint32_t mz = 0; mz < 2; mz++) {
12135         GemmMicrokernelTester()
12136           .mr(2)
12137           .nr(8)
12138           .kr(4)
12139           .sr(2)
12140           .m(2)
12141           .n(8)
12142           .k(k)
12143           .ks(3)
12144           .a_offset(163)
12145           .zero_index(mz)
12146           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12147       }
12148     }
12149   }
12150 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmin)12151   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmin) {
12152     TEST_REQUIRES_ARM_NEON_V8;
12153     GemmMicrokernelTester()
12154       .mr(2)
12155       .nr(8)
12156       .kr(4)
12157       .sr(2)
12158       .m(2)
12159       .n(8)
12160       .k(16)
12161       .qmin(128)
12162       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12163   }
12164 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmax)12165   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmax) {
12166     TEST_REQUIRES_ARM_NEON_V8;
12167     GemmMicrokernelTester()
12168       .mr(2)
12169       .nr(8)
12170       .kr(4)
12171       .sr(2)
12172       .m(2)
12173       .n(8)
12174       .k(16)
12175       .qmax(128)
12176       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177   }
12178 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm)12179   TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm) {
12180     TEST_REQUIRES_ARM_NEON_V8;
12181     GemmMicrokernelTester()
12182       .mr(2)
12183       .nr(8)
12184       .kr(4)
12185       .sr(2)
12186       .m(2)
12187       .n(8)
12188       .k(16)
12189       .cm_stride(11)
12190       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12191   }
12192 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193 
12194 
12195 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)12196   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
12197     TEST_REQUIRES_ARM_NEON;
12198     GemmMicrokernelTester()
12199       .mr(1)
12200       .nr(8)
12201       .kr(8)
12202       .sr(1)
12203       .m(1)
12204       .n(8)
12205       .k(16)
12206       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12207   }
12208 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)12209   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
12210     TEST_REQUIRES_ARM_NEON;
12211     GemmMicrokernelTester()
12212       .mr(1)
12213       .nr(8)
12214       .kr(8)
12215       .sr(1)
12216       .m(1)
12217       .n(8)
12218       .k(16)
12219       .cn_stride(11)
12220       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12221   }
12222 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)12223   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
12224     TEST_REQUIRES_ARM_NEON;
12225     for (uint32_t n = 1; n <= 8; n++) {
12226       for (uint32_t m = 1; m <= 1; m++) {
12227         GemmMicrokernelTester()
12228           .mr(1)
12229           .nr(8)
12230           .kr(8)
12231           .sr(1)
12232           .m(m)
12233           .n(n)
12234           .k(16)
12235           .iterations(1)
12236           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12237       }
12238     }
12239   }
12240 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)12241   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
12242     TEST_REQUIRES_ARM_NEON;
12243     for (uint32_t m = 1; m <= 1; m++) {
12244       GemmMicrokernelTester()
12245         .mr(1)
12246         .nr(8)
12247         .kr(8)
12248         .sr(1)
12249         .m(m)
12250         .n(8)
12251         .k(16)
12252         .iterations(1)
12253         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12254     }
12255   }
12256 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)12257   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
12258     TEST_REQUIRES_ARM_NEON;
12259     for (uint32_t n = 1; n <= 8; n++) {
12260       GemmMicrokernelTester()
12261         .mr(1)
12262         .nr(8)
12263         .kr(8)
12264         .sr(1)
12265         .m(1)
12266         .n(n)
12267         .k(16)
12268         .iterations(1)
12269         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12270     }
12271   }
12272 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)12273   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
12274     TEST_REQUIRES_ARM_NEON;
12275     for (size_t k = 1; k < 16; k++) {
12276       GemmMicrokernelTester()
12277         .mr(1)
12278         .nr(8)
12279         .kr(8)
12280         .sr(1)
12281         .m(1)
12282         .n(8)
12283         .k(k)
12284         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12285     }
12286   }
12287 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)12288   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
12289     TEST_REQUIRES_ARM_NEON;
12290     for (size_t k = 1; k < 16; k++) {
12291       for (uint32_t n = 1; n <= 8; n++) {
12292         for (uint32_t m = 1; m <= 1; m++) {
12293           GemmMicrokernelTester()
12294             .mr(1)
12295             .nr(8)
12296             .kr(8)
12297             .sr(1)
12298             .m(m)
12299             .n(n)
12300             .k(k)
12301             .iterations(1)
12302             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12303         }
12304       }
12305     }
12306   }
12307 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)12308   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
12309     TEST_REQUIRES_ARM_NEON;
12310     for (size_t k = 17; k < 32; k++) {
12311       GemmMicrokernelTester()
12312         .mr(1)
12313         .nr(8)
12314         .kr(8)
12315         .sr(1)
12316         .m(1)
12317         .n(8)
12318         .k(k)
12319         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12320     }
12321   }
12322 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)12323   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
12324     TEST_REQUIRES_ARM_NEON;
12325     for (size_t k = 17; k < 32; k++) {
12326       for (uint32_t n = 1; n <= 8; n++) {
12327         for (uint32_t m = 1; m <= 1; m++) {
12328           GemmMicrokernelTester()
12329             .mr(1)
12330             .nr(8)
12331             .kr(8)
12332             .sr(1)
12333             .m(m)
12334             .n(n)
12335             .k(k)
12336             .iterations(1)
12337             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12338         }
12339       }
12340     }
12341   }
12342 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)12343   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
12344     TEST_REQUIRES_ARM_NEON;
12345     for (size_t k = 32; k <= 160; k += 16) {
12346       GemmMicrokernelTester()
12347         .mr(1)
12348         .nr(8)
12349         .kr(8)
12350         .sr(1)
12351         .m(1)
12352         .n(8)
12353         .k(k)
12354         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12355     }
12356   }
12357 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)12358   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
12359     TEST_REQUIRES_ARM_NEON;
12360     for (size_t k = 32; k <= 160; k += 16) {
12361       for (uint32_t n = 1; n <= 8; n++) {
12362         for (uint32_t m = 1; m <= 1; m++) {
12363           GemmMicrokernelTester()
12364             .mr(1)
12365             .nr(8)
12366             .kr(8)
12367             .sr(1)
12368             .m(m)
12369             .n(n)
12370             .k(k)
12371             .iterations(1)
12372             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12373         }
12374       }
12375     }
12376   }
12377 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)12378   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
12379     TEST_REQUIRES_ARM_NEON;
12380     for (uint32_t n = 9; n < 16; n++) {
12381       for (size_t k = 1; k <= 80; k += 17) {
12382         GemmMicrokernelTester()
12383           .mr(1)
12384           .nr(8)
12385           .kr(8)
12386           .sr(1)
12387           .m(1)
12388           .n(n)
12389           .k(k)
12390           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12391       }
12392     }
12393   }
12394 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)12395   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
12396     TEST_REQUIRES_ARM_NEON;
12397     for (uint32_t n = 9; n < 16; n++) {
12398       for (size_t k = 1; k <= 80; k += 17) {
12399         GemmMicrokernelTester()
12400           .mr(1)
12401           .nr(8)
12402           .kr(8)
12403           .sr(1)
12404           .m(1)
12405           .n(n)
12406           .k(k)
12407           .cn_stride(11)
12408           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12409       }
12410     }
12411   }
12412 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)12413   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
12414     TEST_REQUIRES_ARM_NEON;
12415     for (uint32_t n = 9; n < 16; n++) {
12416       for (size_t k = 1; k <= 80; k += 17) {
12417         for (uint32_t m = 1; m <= 1; m++) {
12418           GemmMicrokernelTester()
12419             .mr(1)
12420             .nr(8)
12421             .kr(8)
12422             .sr(1)
12423             .m(m)
12424             .n(n)
12425             .k(k)
12426             .iterations(1)
12427             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12428         }
12429       }
12430     }
12431   }
12432 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)12433   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
12434     TEST_REQUIRES_ARM_NEON;
12435     for (uint32_t n = 16; n <= 24; n += 8) {
12436       for (size_t k = 1; k <= 80; k += 17) {
12437         GemmMicrokernelTester()
12438           .mr(1)
12439           .nr(8)
12440           .kr(8)
12441           .sr(1)
12442           .m(1)
12443           .n(n)
12444           .k(k)
12445           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12446       }
12447     }
12448   }
12449 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)12450   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
12451     TEST_REQUIRES_ARM_NEON;
12452     for (uint32_t n = 16; n <= 24; n += 8) {
12453       for (size_t k = 1; k <= 80; k += 17) {
12454         GemmMicrokernelTester()
12455           .mr(1)
12456           .nr(8)
12457           .kr(8)
12458           .sr(1)
12459           .m(1)
12460           .n(n)
12461           .k(k)
12462           .cn_stride(11)
12463           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12464       }
12465     }
12466   }
12467 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)12468   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
12469     TEST_REQUIRES_ARM_NEON;
12470     for (uint32_t n = 16; n <= 24; n += 8) {
12471       for (size_t k = 1; k <= 80; k += 17) {
12472         for (uint32_t m = 1; m <= 1; m++) {
12473           GemmMicrokernelTester()
12474             .mr(1)
12475             .nr(8)
12476             .kr(8)
12477             .sr(1)
12478             .m(m)
12479             .n(n)
12480             .k(k)
12481             .iterations(1)
12482             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12483         }
12484       }
12485     }
12486   }
12487 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)12488   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
12489     TEST_REQUIRES_ARM_NEON;
12490     for (size_t k = 1; k <= 80; k += 17) {
12491       GemmMicrokernelTester()
12492         .mr(1)
12493         .nr(8)
12494         .kr(8)
12495         .sr(1)
12496         .m(1)
12497         .n(8)
12498         .k(k)
12499         .ks(3)
12500         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12501     }
12502   }
12503 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)12504   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
12505     TEST_REQUIRES_ARM_NEON;
12506     for (size_t k = 1; k <= 80; k += 17) {
12507       for (uint32_t n = 1; n <= 8; n++) {
12508         for (uint32_t m = 1; m <= 1; m++) {
12509           GemmMicrokernelTester()
12510             .mr(1)
12511             .nr(8)
12512             .kr(8)
12513             .sr(1)
12514             .m(m)
12515             .n(n)
12516             .k(k)
12517             .ks(3)
12518             .iterations(1)
12519             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12520         }
12521       }
12522     }
12523   }
12524 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)12525   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
12526     TEST_REQUIRES_ARM_NEON;
12527     for (uint32_t n = 9; n < 16; n++) {
12528       for (size_t k = 1; k <= 80; k += 17) {
12529         GemmMicrokernelTester()
12530           .mr(1)
12531           .nr(8)
12532           .kr(8)
12533           .sr(1)
12534           .m(1)
12535           .n(n)
12536           .k(k)
12537           .ks(3)
12538           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12539       }
12540     }
12541   }
12542 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)12543   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
12544     TEST_REQUIRES_ARM_NEON;
12545     for (uint32_t n = 16; n <= 24; n += 8) {
12546       for (size_t k = 1; k <= 80; k += 17) {
12547         GemmMicrokernelTester()
12548           .mr(1)
12549           .nr(8)
12550           .kr(8)
12551           .sr(1)
12552           .m(1)
12553           .n(n)
12554           .k(k)
12555           .ks(3)
12556           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12557       }
12558     }
12559   }
12560 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)12561   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
12562     TEST_REQUIRES_ARM_NEON;
12563     for (size_t k = 1; k <= 80; k += 17) {
12564       for (uint32_t n = 1; n <= 8; n++) {
12565         for (uint32_t m = 1; m <= 1; m++) {
12566           GemmMicrokernelTester()
12567             .mr(1)
12568             .nr(8)
12569             .kr(8)
12570             .sr(1)
12571             .m(m)
12572             .n(n)
12573             .k(k)
12574             .cm_stride(11)
12575             .iterations(1)
12576             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12577         }
12578       }
12579     }
12580   }
12581 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)12582   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
12583     TEST_REQUIRES_ARM_NEON;
12584     for (size_t k = 1; k <= 80; k += 17) {
12585       GemmMicrokernelTester()
12586         .mr(1)
12587         .nr(8)
12588         .kr(8)
12589         .sr(1)
12590         .m(1)
12591         .n(8)
12592         .k(k)
12593         .ks(3)
12594         .a_offset(83)
12595         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12596     }
12597   }
12598 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)12599   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
12600     TEST_REQUIRES_ARM_NEON;
12601     for (size_t k = 1; k <= 80; k += 17) {
12602       for (uint32_t mz = 0; mz < 1; mz++) {
12603         GemmMicrokernelTester()
12604           .mr(1)
12605           .nr(8)
12606           .kr(8)
12607           .sr(1)
12608           .m(1)
12609           .n(8)
12610           .k(k)
12611           .ks(3)
12612           .a_offset(83)
12613           .zero_index(mz)
12614           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12615       }
12616     }
12617   }
12618 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)12619   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
12620     TEST_REQUIRES_ARM_NEON;
12621     GemmMicrokernelTester()
12622       .mr(1)
12623       .nr(8)
12624       .kr(8)
12625       .sr(1)
12626       .m(1)
12627       .n(8)
12628       .k(16)
12629       .qmin(128)
12630       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12631   }
12632 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)12633   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
12634     TEST_REQUIRES_ARM_NEON;
12635     GemmMicrokernelTester()
12636       .mr(1)
12637       .nr(8)
12638       .kr(8)
12639       .sr(1)
12640       .m(1)
12641       .n(8)
12642       .k(16)
12643       .qmax(128)
12644       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12645   }
12646 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)12647   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
12648     TEST_REQUIRES_ARM_NEON;
12649     GemmMicrokernelTester()
12650       .mr(1)
12651       .nr(8)
12652       .kr(8)
12653       .sr(1)
12654       .m(1)
12655       .n(8)
12656       .k(16)
12657       .cm_stride(11)
12658       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12659   }
12660 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
12661 
12662 
12663 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16)12664   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
12665     TEST_REQUIRES_ARM_NEON;
12666     GemmMicrokernelTester()
12667       .mr(2)
12668       .nr(8)
12669       .kr(16)
12670       .sr(1)
12671       .m(2)
12672       .n(8)
12673       .k(16)
12674       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675   }
12676 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cn)12677   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
12678     TEST_REQUIRES_ARM_NEON;
12679     GemmMicrokernelTester()
12680       .mr(2)
12681       .nr(8)
12682       .kr(16)
12683       .sr(1)
12684       .m(2)
12685       .n(8)
12686       .k(16)
12687       .cn_stride(11)
12688       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689   }
12690 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)12691   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
12692     TEST_REQUIRES_ARM_NEON;
12693     for (uint32_t n = 1; n <= 8; n++) {
12694       for (uint32_t m = 1; m <= 2; m++) {
12695         GemmMicrokernelTester()
12696           .mr(2)
12697           .nr(8)
12698           .kr(16)
12699           .sr(1)
12700           .m(m)
12701           .n(n)
12702           .k(16)
12703           .iterations(1)
12704           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705       }
12706     }
12707   }
12708 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)12709   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
12710     TEST_REQUIRES_ARM_NEON;
12711     for (uint32_t m = 1; m <= 2; m++) {
12712       GemmMicrokernelTester()
12713         .mr(2)
12714         .nr(8)
12715         .kr(16)
12716         .sr(1)
12717         .m(m)
12718         .n(8)
12719         .k(16)
12720         .iterations(1)
12721         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722     }
12723   }
12724 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)12725   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
12726     TEST_REQUIRES_ARM_NEON;
12727     for (uint32_t n = 1; n <= 8; n++) {
12728       GemmMicrokernelTester()
12729         .mr(2)
12730         .nr(8)
12731         .kr(16)
12732         .sr(1)
12733         .m(2)
12734         .n(n)
12735         .k(16)
12736         .iterations(1)
12737         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738     }
12739   }
12740 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16)12741   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
12742     TEST_REQUIRES_ARM_NEON;
12743     for (size_t k = 1; k < 16; k++) {
12744       GemmMicrokernelTester()
12745         .mr(2)
12746         .nr(8)
12747         .kr(16)
12748         .sr(1)
12749         .m(2)
12750         .n(8)
12751         .k(k)
12752         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753     }
12754   }
12755 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)12756   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
12757     TEST_REQUIRES_ARM_NEON;
12758     for (size_t k = 1; k < 16; k++) {
12759       for (uint32_t n = 1; n <= 8; n++) {
12760         for (uint32_t m = 1; m <= 2; m++) {
12761           GemmMicrokernelTester()
12762             .mr(2)
12763             .nr(8)
12764             .kr(16)
12765             .sr(1)
12766             .m(m)
12767             .n(n)
12768             .k(k)
12769             .iterations(1)
12770             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771         }
12772       }
12773     }
12774   }
12775 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16)12776   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
12777     TEST_REQUIRES_ARM_NEON;
12778     for (size_t k = 17; k < 32; k++) {
12779       GemmMicrokernelTester()
12780         .mr(2)
12781         .nr(8)
12782         .kr(16)
12783         .sr(1)
12784         .m(2)
12785         .n(8)
12786         .k(k)
12787         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788     }
12789   }
12790 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)12791   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
12792     TEST_REQUIRES_ARM_NEON;
12793     for (size_t k = 17; k < 32; k++) {
12794       for (uint32_t n = 1; n <= 8; n++) {
12795         for (uint32_t m = 1; m <= 2; m++) {
12796           GemmMicrokernelTester()
12797             .mr(2)
12798             .nr(8)
12799             .kr(16)
12800             .sr(1)
12801             .m(m)
12802             .n(n)
12803             .k(k)
12804             .iterations(1)
12805             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806         }
12807       }
12808     }
12809   }
12810 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16)12811   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
12812     TEST_REQUIRES_ARM_NEON;
12813     for (size_t k = 32; k <= 160; k += 16) {
12814       GemmMicrokernelTester()
12815         .mr(2)
12816         .nr(8)
12817         .kr(16)
12818         .sr(1)
12819         .m(2)
12820         .n(8)
12821         .k(k)
12822         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823     }
12824   }
12825 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)12826   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
12827     TEST_REQUIRES_ARM_NEON;
12828     for (size_t k = 32; k <= 160; k += 16) {
12829       for (uint32_t n = 1; n <= 8; n++) {
12830         for (uint32_t m = 1; m <= 2; m++) {
12831           GemmMicrokernelTester()
12832             .mr(2)
12833             .nr(8)
12834             .kr(16)
12835             .sr(1)
12836             .m(m)
12837             .n(n)
12838             .k(k)
12839             .iterations(1)
12840             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841         }
12842       }
12843     }
12844   }
12845 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8)12846   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
12847     TEST_REQUIRES_ARM_NEON;
12848     for (uint32_t n = 9; n < 16; n++) {
12849       for (size_t k = 1; k <= 80; k += 17) {
12850         GemmMicrokernelTester()
12851           .mr(2)
12852           .nr(8)
12853           .kr(16)
12854           .sr(1)
12855           .m(2)
12856           .n(n)
12857           .k(k)
12858           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859       }
12860     }
12861   }
12862 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)12863   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
12864     TEST_REQUIRES_ARM_NEON;
12865     for (uint32_t n = 9; n < 16; n++) {
12866       for (size_t k = 1; k <= 80; k += 17) {
12867         GemmMicrokernelTester()
12868           .mr(2)
12869           .nr(8)
12870           .kr(16)
12871           .sr(1)
12872           .m(2)
12873           .n(n)
12874           .k(k)
12875           .cn_stride(11)
12876           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877       }
12878     }
12879   }
12880 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)12881   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
12882     TEST_REQUIRES_ARM_NEON;
12883     for (uint32_t n = 9; n < 16; n++) {
12884       for (size_t k = 1; k <= 80; k += 17) {
12885         for (uint32_t m = 1; m <= 2; m++) {
12886           GemmMicrokernelTester()
12887             .mr(2)
12888             .nr(8)
12889             .kr(16)
12890             .sr(1)
12891             .m(m)
12892             .n(n)
12893             .k(k)
12894             .iterations(1)
12895             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896         }
12897       }
12898     }
12899   }
12900 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8)12901   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
12902     TEST_REQUIRES_ARM_NEON;
12903     for (uint32_t n = 16; n <= 24; n += 8) {
12904       for (size_t k = 1; k <= 80; k += 17) {
12905         GemmMicrokernelTester()
12906           .mr(2)
12907           .nr(8)
12908           .kr(16)
12909           .sr(1)
12910           .m(2)
12911           .n(n)
12912           .k(k)
12913           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914       }
12915     }
12916   }
12917 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)12918   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
12919     TEST_REQUIRES_ARM_NEON;
12920     for (uint32_t n = 16; n <= 24; n += 8) {
12921       for (size_t k = 1; k <= 80; k += 17) {
12922         GemmMicrokernelTester()
12923           .mr(2)
12924           .nr(8)
12925           .kr(16)
12926           .sr(1)
12927           .m(2)
12928           .n(n)
12929           .k(k)
12930           .cn_stride(11)
12931           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932       }
12933     }
12934   }
12935 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)12936   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
12937     TEST_REQUIRES_ARM_NEON;
12938     for (uint32_t n = 16; n <= 24; n += 8) {
12939       for (size_t k = 1; k <= 80; k += 17) {
12940         for (uint32_t m = 1; m <= 2; m++) {
12941           GemmMicrokernelTester()
12942             .mr(2)
12943             .nr(8)
12944             .kr(16)
12945             .sr(1)
12946             .m(m)
12947             .n(n)
12948             .k(k)
12949             .iterations(1)
12950             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951         }
12952       }
12953     }
12954   }
12955 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel)12956   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
12957     TEST_REQUIRES_ARM_NEON;
12958     for (size_t k = 1; k <= 80; k += 17) {
12959       GemmMicrokernelTester()
12960         .mr(2)
12961         .nr(8)
12962         .kr(16)
12963         .sr(1)
12964         .m(2)
12965         .n(8)
12966         .k(k)
12967         .ks(3)
12968         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969     }
12970   }
12971 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel_subtile)12972   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
12973     TEST_REQUIRES_ARM_NEON;
12974     for (size_t k = 1; k <= 80; k += 17) {
12975       for (uint32_t n = 1; n <= 8; n++) {
12976         for (uint32_t m = 1; m <= 2; m++) {
12977           GemmMicrokernelTester()
12978             .mr(2)
12979             .nr(8)
12980             .kr(16)
12981             .sr(1)
12982             .m(m)
12983             .n(n)
12984             .k(k)
12985             .ks(3)
12986             .iterations(1)
12987             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988         }
12989       }
12990     }
12991   }
12992 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_small_kernel)12993   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
12994     TEST_REQUIRES_ARM_NEON;
12995     for (uint32_t n = 9; n < 16; n++) {
12996       for (size_t k = 1; k <= 80; k += 17) {
12997         GemmMicrokernelTester()
12998           .mr(2)
12999           .nr(8)
13000           .kr(16)
13001           .sr(1)
13002           .m(2)
13003           .n(n)
13004           .k(k)
13005           .ks(3)
13006           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007       }
13008     }
13009   }
13010 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_small_kernel)13011   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
13012     TEST_REQUIRES_ARM_NEON;
13013     for (uint32_t n = 16; n <= 24; n += 8) {
13014       for (size_t k = 1; k <= 80; k += 17) {
13015         GemmMicrokernelTester()
13016           .mr(2)
13017           .nr(8)
13018           .kr(16)
13019           .sr(1)
13020           .m(2)
13021           .n(n)
13022           .k(k)
13023           .ks(3)
13024           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025       }
13026     }
13027   }
13028 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)13029   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
13030     TEST_REQUIRES_ARM_NEON;
13031     for (size_t k = 1; k <= 80; k += 17) {
13032       for (uint32_t n = 1; n <= 8; n++) {
13033         for (uint32_t m = 1; m <= 2; m++) {
13034           GemmMicrokernelTester()
13035             .mr(2)
13036             .nr(8)
13037             .kr(16)
13038             .sr(1)
13039             .m(m)
13040             .n(n)
13041             .k(k)
13042             .cm_stride(11)
13043             .iterations(1)
13044             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045         }
13046       }
13047     }
13048   }
13049 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,a_offset)13050   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, a_offset) {
13051     TEST_REQUIRES_ARM_NEON;
13052     for (size_t k = 1; k <= 80; k += 17) {
13053       GemmMicrokernelTester()
13054         .mr(2)
13055         .nr(8)
13056         .kr(16)
13057         .sr(1)
13058         .m(2)
13059         .n(8)
13060         .k(k)
13061         .ks(3)
13062         .a_offset(163)
13063         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064     }
13065   }
13066 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,zero)13067   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, zero) {
13068     TEST_REQUIRES_ARM_NEON;
13069     for (size_t k = 1; k <= 80; k += 17) {
13070       for (uint32_t mz = 0; mz < 2; mz++) {
13071         GemmMicrokernelTester()
13072           .mr(2)
13073           .nr(8)
13074           .kr(16)
13075           .sr(1)
13076           .m(2)
13077           .n(8)
13078           .k(k)
13079           .ks(3)
13080           .a_offset(163)
13081           .zero_index(mz)
13082           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083       }
13084     }
13085   }
13086 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmin)13087   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
13088     TEST_REQUIRES_ARM_NEON;
13089     GemmMicrokernelTester()
13090       .mr(2)
13091       .nr(8)
13092       .kr(16)
13093       .sr(1)
13094       .m(2)
13095       .n(8)
13096       .k(16)
13097       .qmin(128)
13098       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099   }
13100 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmax)13101   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
13102     TEST_REQUIRES_ARM_NEON;
13103     GemmMicrokernelTester()
13104       .mr(2)
13105       .nr(8)
13106       .kr(16)
13107       .sr(1)
13108       .m(2)
13109       .n(8)
13110       .k(16)
13111       .qmax(128)
13112       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113   }
13114 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm)13115   TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
13116     TEST_REQUIRES_ARM_NEON;
13117     GemmMicrokernelTester()
13118       .mr(2)
13119       .nr(8)
13120       .kr(16)
13121       .sr(1)
13122       .m(2)
13123       .n(8)
13124       .k(16)
13125       .cm_stride(11)
13126       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127   }
13128 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
13129 
13130 
13131 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)13132   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
13133     TEST_REQUIRES_ARM_NEON;
13134     GemmMicrokernelTester()
13135       .mr(4)
13136       .nr(16)
13137       .kr(1)
13138       .sr(1)
13139       .m(4)
13140       .n(16)
13141       .k(8)
13142       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13143   }
13144 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)13145   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
13146     TEST_REQUIRES_ARM_NEON;
13147     GemmMicrokernelTester()
13148       .mr(4)
13149       .nr(16)
13150       .kr(1)
13151       .sr(1)
13152       .m(4)
13153       .n(16)
13154       .k(8)
13155       .cn_stride(19)
13156       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13157   }
13158 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)13159   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
13160     TEST_REQUIRES_ARM_NEON;
13161     for (uint32_t n = 1; n <= 16; n++) {
13162       for (uint32_t m = 1; m <= 4; m++) {
13163         GemmMicrokernelTester()
13164           .mr(4)
13165           .nr(16)
13166           .kr(1)
13167           .sr(1)
13168           .m(m)
13169           .n(n)
13170           .k(8)
13171           .iterations(1)
13172           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13173       }
13174     }
13175   }
13176 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)13177   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
13178     TEST_REQUIRES_ARM_NEON;
13179     for (uint32_t m = 1; m <= 4; m++) {
13180       GemmMicrokernelTester()
13181         .mr(4)
13182         .nr(16)
13183         .kr(1)
13184         .sr(1)
13185         .m(m)
13186         .n(16)
13187         .k(8)
13188         .iterations(1)
13189         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13190     }
13191   }
13192 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)13193   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
13194     TEST_REQUIRES_ARM_NEON;
13195     for (uint32_t n = 1; n <= 16; n++) {
13196       GemmMicrokernelTester()
13197         .mr(4)
13198         .nr(16)
13199         .kr(1)
13200         .sr(1)
13201         .m(4)
13202         .n(n)
13203         .k(8)
13204         .iterations(1)
13205         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13206     }
13207   }
13208 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)13209   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
13210     TEST_REQUIRES_ARM_NEON;
13211     for (size_t k = 1; k < 8; k++) {
13212       GemmMicrokernelTester()
13213         .mr(4)
13214         .nr(16)
13215         .kr(1)
13216         .sr(1)
13217         .m(4)
13218         .n(16)
13219         .k(k)
13220         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13221     }
13222   }
13223 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)13224   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
13225     TEST_REQUIRES_ARM_NEON;
13226     for (size_t k = 1; k < 8; k++) {
13227       for (uint32_t n = 1; n <= 16; n++) {
13228         for (uint32_t m = 1; m <= 4; m++) {
13229           GemmMicrokernelTester()
13230             .mr(4)
13231             .nr(16)
13232             .kr(1)
13233             .sr(1)
13234             .m(m)
13235             .n(n)
13236             .k(k)
13237             .iterations(1)
13238             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13239         }
13240       }
13241     }
13242   }
13243 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)13244   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
13245     TEST_REQUIRES_ARM_NEON;
13246     for (size_t k = 9; k < 16; k++) {
13247       GemmMicrokernelTester()
13248         .mr(4)
13249         .nr(16)
13250         .kr(1)
13251         .sr(1)
13252         .m(4)
13253         .n(16)
13254         .k(k)
13255         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13256     }
13257   }
13258 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)13259   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
13260     TEST_REQUIRES_ARM_NEON;
13261     for (size_t k = 9; k < 16; k++) {
13262       for (uint32_t n = 1; n <= 16; n++) {
13263         for (uint32_t m = 1; m <= 4; m++) {
13264           GemmMicrokernelTester()
13265             .mr(4)
13266             .nr(16)
13267             .kr(1)
13268             .sr(1)
13269             .m(m)
13270             .n(n)
13271             .k(k)
13272             .iterations(1)
13273             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13274         }
13275       }
13276     }
13277   }
13278 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)13279   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
13280     TEST_REQUIRES_ARM_NEON;
13281     for (size_t k = 16; k <= 80; k += 8) {
13282       GemmMicrokernelTester()
13283         .mr(4)
13284         .nr(16)
13285         .kr(1)
13286         .sr(1)
13287         .m(4)
13288         .n(16)
13289         .k(k)
13290         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13291     }
13292   }
13293 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)13294   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
13295     TEST_REQUIRES_ARM_NEON;
13296     for (size_t k = 16; k <= 80; k += 8) {
13297       for (uint32_t n = 1; n <= 16; n++) {
13298         for (uint32_t m = 1; m <= 4; m++) {
13299           GemmMicrokernelTester()
13300             .mr(4)
13301             .nr(16)
13302             .kr(1)
13303             .sr(1)
13304             .m(m)
13305             .n(n)
13306             .k(k)
13307             .iterations(1)
13308             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13309         }
13310       }
13311     }
13312   }
13313 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)13314   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
13315     TEST_REQUIRES_ARM_NEON;
13316     for (uint32_t n = 17; n < 32; n++) {
13317       for (size_t k = 1; k <= 40; k += 9) {
13318         GemmMicrokernelTester()
13319           .mr(4)
13320           .nr(16)
13321           .kr(1)
13322           .sr(1)
13323           .m(4)
13324           .n(n)
13325           .k(k)
13326           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13327       }
13328     }
13329   }
13330 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)13331   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
13332     TEST_REQUIRES_ARM_NEON;
13333     for (uint32_t n = 17; n < 32; n++) {
13334       for (size_t k = 1; k <= 40; k += 9) {
13335         GemmMicrokernelTester()
13336           .mr(4)
13337           .nr(16)
13338           .kr(1)
13339           .sr(1)
13340           .m(4)
13341           .n(n)
13342           .k(k)
13343           .cn_stride(19)
13344           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13345       }
13346     }
13347   }
13348 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)13349   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
13350     TEST_REQUIRES_ARM_NEON;
13351     for (uint32_t n = 17; n < 32; n++) {
13352       for (size_t k = 1; k <= 40; k += 9) {
13353         for (uint32_t m = 1; m <= 4; m++) {
13354           GemmMicrokernelTester()
13355             .mr(4)
13356             .nr(16)
13357             .kr(1)
13358             .sr(1)
13359             .m(m)
13360             .n(n)
13361             .k(k)
13362             .iterations(1)
13363             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13364         }
13365       }
13366     }
13367   }
13368 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)13369   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
13370     TEST_REQUIRES_ARM_NEON;
13371     for (uint32_t n = 32; n <= 48; n += 16) {
13372       for (size_t k = 1; k <= 40; k += 9) {
13373         GemmMicrokernelTester()
13374           .mr(4)
13375           .nr(16)
13376           .kr(1)
13377           .sr(1)
13378           .m(4)
13379           .n(n)
13380           .k(k)
13381           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13382       }
13383     }
13384   }
13385 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)13386   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
13387     TEST_REQUIRES_ARM_NEON;
13388     for (uint32_t n = 32; n <= 48; n += 16) {
13389       for (size_t k = 1; k <= 40; k += 9) {
13390         GemmMicrokernelTester()
13391           .mr(4)
13392           .nr(16)
13393           .kr(1)
13394           .sr(1)
13395           .m(4)
13396           .n(n)
13397           .k(k)
13398           .cn_stride(19)
13399           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13400       }
13401     }
13402   }
13403 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)13404   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
13405     TEST_REQUIRES_ARM_NEON;
13406     for (uint32_t n = 32; n <= 48; n += 16) {
13407       for (size_t k = 1; k <= 40; k += 9) {
13408         for (uint32_t m = 1; m <= 4; m++) {
13409           GemmMicrokernelTester()
13410             .mr(4)
13411             .nr(16)
13412             .kr(1)
13413             .sr(1)
13414             .m(m)
13415             .n(n)
13416             .k(k)
13417             .iterations(1)
13418             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13419         }
13420       }
13421     }
13422   }
13423 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)13424   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
13425     TEST_REQUIRES_ARM_NEON;
13426     for (size_t k = 1; k <= 40; k += 9) {
13427       GemmMicrokernelTester()
13428         .mr(4)
13429         .nr(16)
13430         .kr(1)
13431         .sr(1)
13432         .m(4)
13433         .n(16)
13434         .k(k)
13435         .ks(3)
13436         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13437     }
13438   }
13439 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)13440   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
13441     TEST_REQUIRES_ARM_NEON;
13442     for (size_t k = 1; k <= 40; k += 9) {
13443       for (uint32_t n = 1; n <= 16; n++) {
13444         for (uint32_t m = 1; m <= 4; m++) {
13445           GemmMicrokernelTester()
13446             .mr(4)
13447             .nr(16)
13448             .kr(1)
13449             .sr(1)
13450             .m(m)
13451             .n(n)
13452             .k(k)
13453             .ks(3)
13454             .iterations(1)
13455             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13456         }
13457       }
13458     }
13459   }
13460 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_small_kernel)13461   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
13462     TEST_REQUIRES_ARM_NEON;
13463     for (uint32_t n = 17; n < 32; n++) {
13464       for (size_t k = 1; k <= 40; k += 9) {
13465         GemmMicrokernelTester()
13466           .mr(4)
13467           .nr(16)
13468           .kr(1)
13469           .sr(1)
13470           .m(4)
13471           .n(n)
13472           .k(k)
13473           .ks(3)
13474           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13475       }
13476     }
13477   }
13478 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_small_kernel)13479   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
13480     TEST_REQUIRES_ARM_NEON;
13481     for (uint32_t n = 32; n <= 48; n += 16) {
13482       for (size_t k = 1; k <= 40; k += 9) {
13483         GemmMicrokernelTester()
13484           .mr(4)
13485           .nr(16)
13486           .kr(1)
13487           .sr(1)
13488           .m(4)
13489           .n(n)
13490           .k(k)
13491           .ks(3)
13492           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13493       }
13494     }
13495   }
13496 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)13497   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
13498     TEST_REQUIRES_ARM_NEON;
13499     for (size_t k = 1; k <= 40; k += 9) {
13500       for (uint32_t n = 1; n <= 16; n++) {
13501         for (uint32_t m = 1; m <= 4; m++) {
13502           GemmMicrokernelTester()
13503             .mr(4)
13504             .nr(16)
13505             .kr(1)
13506             .sr(1)
13507             .m(m)
13508             .n(n)
13509             .k(k)
13510             .cm_stride(19)
13511             .iterations(1)
13512             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13513         }
13514       }
13515     }
13516   }
13517 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)13518   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
13519     TEST_REQUIRES_ARM_NEON;
13520     for (size_t k = 1; k <= 40; k += 9) {
13521       GemmMicrokernelTester()
13522         .mr(4)
13523         .nr(16)
13524         .kr(1)
13525         .sr(1)
13526         .m(4)
13527         .n(16)
13528         .k(k)
13529         .ks(3)
13530         .a_offset(163)
13531         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13532     }
13533   }
13534 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)13535   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
13536     TEST_REQUIRES_ARM_NEON;
13537     for (size_t k = 1; k <= 40; k += 9) {
13538       for (uint32_t mz = 0; mz < 4; mz++) {
13539         GemmMicrokernelTester()
13540           .mr(4)
13541           .nr(16)
13542           .kr(1)
13543           .sr(1)
13544           .m(4)
13545           .n(16)
13546           .k(k)
13547           .ks(3)
13548           .a_offset(163)
13549           .zero_index(mz)
13550           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13551       }
13552     }
13553   }
13554 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)13555   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
13556     TEST_REQUIRES_ARM_NEON;
13557     GemmMicrokernelTester()
13558       .mr(4)
13559       .nr(16)
13560       .kr(1)
13561       .sr(1)
13562       .m(4)
13563       .n(16)
13564       .k(8)
13565       .qmin(128)
13566       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13567   }
13568 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)13569   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
13570     TEST_REQUIRES_ARM_NEON;
13571     GemmMicrokernelTester()
13572       .mr(4)
13573       .nr(16)
13574       .kr(1)
13575       .sr(1)
13576       .m(4)
13577       .n(16)
13578       .k(8)
13579       .qmax(128)
13580       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13581   }
13582 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)13583   TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
13584     TEST_REQUIRES_ARM_NEON;
13585     GemmMicrokernelTester()
13586       .mr(4)
13587       .nr(16)
13588       .kr(1)
13589       .sr(1)
13590       .m(4)
13591       .n(16)
13592       .k(8)
13593       .cm_stride(19)
13594       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13595   }
13596 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
13597 
13598 
13599 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)13600   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
13601     TEST_REQUIRES_ARM_NEON_DOT;
13602     GemmMicrokernelTester()
13603       .mr(4)
13604       .nr(16)
13605       .kr(4)
13606       .sr(1)
13607       .m(4)
13608       .n(16)
13609       .k(16)
13610       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13611   }
13612 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)13613   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
13614     TEST_REQUIRES_ARM_NEON_DOT;
13615     GemmMicrokernelTester()
13616       .mr(4)
13617       .nr(16)
13618       .kr(4)
13619       .sr(1)
13620       .m(4)
13621       .n(16)
13622       .k(16)
13623       .cn_stride(19)
13624       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13625   }
13626 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)13627   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
13628     TEST_REQUIRES_ARM_NEON_DOT;
13629     for (uint32_t n = 1; n <= 16; n++) {
13630       for (uint32_t m = 1; m <= 4; m++) {
13631         GemmMicrokernelTester()
13632           .mr(4)
13633           .nr(16)
13634           .kr(4)
13635           .sr(1)
13636           .m(m)
13637           .n(n)
13638           .k(16)
13639           .iterations(1)
13640           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13641       }
13642     }
13643   }
13644 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)13645   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
13646     TEST_REQUIRES_ARM_NEON_DOT;
13647     for (uint32_t m = 1; m <= 4; m++) {
13648       GemmMicrokernelTester()
13649         .mr(4)
13650         .nr(16)
13651         .kr(4)
13652         .sr(1)
13653         .m(m)
13654         .n(16)
13655         .k(16)
13656         .iterations(1)
13657         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13658     }
13659   }
13660 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)13661   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
13662     TEST_REQUIRES_ARM_NEON_DOT;
13663     for (uint32_t n = 1; n <= 16; n++) {
13664       GemmMicrokernelTester()
13665         .mr(4)
13666         .nr(16)
13667         .kr(4)
13668         .sr(1)
13669         .m(4)
13670         .n(n)
13671         .k(16)
13672         .iterations(1)
13673         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13674     }
13675   }
13676 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)13677   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
13678     TEST_REQUIRES_ARM_NEON_DOT;
13679     for (size_t k = 1; k < 16; k++) {
13680       GemmMicrokernelTester()
13681         .mr(4)
13682         .nr(16)
13683         .kr(4)
13684         .sr(1)
13685         .m(4)
13686         .n(16)
13687         .k(k)
13688         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13689     }
13690   }
13691 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)13692   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
13693     TEST_REQUIRES_ARM_NEON_DOT;
13694     for (size_t k = 1; k < 16; k++) {
13695       for (uint32_t n = 1; n <= 16; n++) {
13696         for (uint32_t m = 1; m <= 4; m++) {
13697           GemmMicrokernelTester()
13698             .mr(4)
13699             .nr(16)
13700             .kr(4)
13701             .sr(1)
13702             .m(m)
13703             .n(n)
13704             .k(k)
13705             .iterations(1)
13706             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13707         }
13708       }
13709     }
13710   }
13711 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)13712   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
13713     TEST_REQUIRES_ARM_NEON_DOT;
13714     for (size_t k = 17; k < 32; k++) {
13715       GemmMicrokernelTester()
13716         .mr(4)
13717         .nr(16)
13718         .kr(4)
13719         .sr(1)
13720         .m(4)
13721         .n(16)
13722         .k(k)
13723         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13724     }
13725   }
13726 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)13727   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
13728     TEST_REQUIRES_ARM_NEON_DOT;
13729     for (size_t k = 17; k < 32; k++) {
13730       for (uint32_t n = 1; n <= 16; n++) {
13731         for (uint32_t m = 1; m <= 4; m++) {
13732           GemmMicrokernelTester()
13733             .mr(4)
13734             .nr(16)
13735             .kr(4)
13736             .sr(1)
13737             .m(m)
13738             .n(n)
13739             .k(k)
13740             .iterations(1)
13741             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13742         }
13743       }
13744     }
13745   }
13746 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)13747   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
13748     TEST_REQUIRES_ARM_NEON_DOT;
13749     for (size_t k = 32; k <= 160; k += 16) {
13750       GemmMicrokernelTester()
13751         .mr(4)
13752         .nr(16)
13753         .kr(4)
13754         .sr(1)
13755         .m(4)
13756         .n(16)
13757         .k(k)
13758         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13759     }
13760   }
13761 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)13762   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
13763     TEST_REQUIRES_ARM_NEON_DOT;
13764     for (size_t k = 32; k <= 160; k += 16) {
13765       for (uint32_t n = 1; n <= 16; n++) {
13766         for (uint32_t m = 1; m <= 4; m++) {
13767           GemmMicrokernelTester()
13768             .mr(4)
13769             .nr(16)
13770             .kr(4)
13771             .sr(1)
13772             .m(m)
13773             .n(n)
13774             .k(k)
13775             .iterations(1)
13776             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13777         }
13778       }
13779     }
13780   }
13781 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)13782   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
13783     TEST_REQUIRES_ARM_NEON_DOT;
13784     for (uint32_t n = 17; n < 32; n++) {
13785       for (size_t k = 1; k <= 80; k += 17) {
13786         GemmMicrokernelTester()
13787           .mr(4)
13788           .nr(16)
13789           .kr(4)
13790           .sr(1)
13791           .m(4)
13792           .n(n)
13793           .k(k)
13794           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13795       }
13796     }
13797   }
13798 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)13799   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
13800     TEST_REQUIRES_ARM_NEON_DOT;
13801     for (uint32_t n = 17; n < 32; n++) {
13802       for (size_t k = 1; k <= 80; k += 17) {
13803         GemmMicrokernelTester()
13804           .mr(4)
13805           .nr(16)
13806           .kr(4)
13807           .sr(1)
13808           .m(4)
13809           .n(n)
13810           .k(k)
13811           .cn_stride(19)
13812           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13813       }
13814     }
13815   }
13816 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)13817   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
13818     TEST_REQUIRES_ARM_NEON_DOT;
13819     for (uint32_t n = 17; n < 32; n++) {
13820       for (size_t k = 1; k <= 80; k += 17) {
13821         for (uint32_t m = 1; m <= 4; m++) {
13822           GemmMicrokernelTester()
13823             .mr(4)
13824             .nr(16)
13825             .kr(4)
13826             .sr(1)
13827             .m(m)
13828             .n(n)
13829             .k(k)
13830             .iterations(1)
13831             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13832         }
13833       }
13834     }
13835   }
13836 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)13837   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
13838     TEST_REQUIRES_ARM_NEON_DOT;
13839     for (uint32_t n = 32; n <= 48; n += 16) {
13840       for (size_t k = 1; k <= 80; k += 17) {
13841         GemmMicrokernelTester()
13842           .mr(4)
13843           .nr(16)
13844           .kr(4)
13845           .sr(1)
13846           .m(4)
13847           .n(n)
13848           .k(k)
13849           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13850       }
13851     }
13852   }
13853 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)13854   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
13855     TEST_REQUIRES_ARM_NEON_DOT;
13856     for (uint32_t n = 32; n <= 48; n += 16) {
13857       for (size_t k = 1; k <= 80; k += 17) {
13858         GemmMicrokernelTester()
13859           .mr(4)
13860           .nr(16)
13861           .kr(4)
13862           .sr(1)
13863           .m(4)
13864           .n(n)
13865           .k(k)
13866           .cn_stride(19)
13867           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13868       }
13869     }
13870   }
13871 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)13872   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
13873     TEST_REQUIRES_ARM_NEON_DOT;
13874     for (uint32_t n = 32; n <= 48; n += 16) {
13875       for (size_t k = 1; k <= 80; k += 17) {
13876         for (uint32_t m = 1; m <= 4; m++) {
13877           GemmMicrokernelTester()
13878             .mr(4)
13879             .nr(16)
13880             .kr(4)
13881             .sr(1)
13882             .m(m)
13883             .n(n)
13884             .k(k)
13885             .iterations(1)
13886             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13887         }
13888       }
13889     }
13890   }
13891 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)13892   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
13893     TEST_REQUIRES_ARM_NEON_DOT;
13894     for (size_t k = 1; k <= 80; k += 17) {
13895       GemmMicrokernelTester()
13896         .mr(4)
13897         .nr(16)
13898         .kr(4)
13899         .sr(1)
13900         .m(4)
13901         .n(16)
13902         .k(k)
13903         .ks(3)
13904         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13905     }
13906   }
13907 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)13908   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
13909     TEST_REQUIRES_ARM_NEON_DOT;
13910     for (size_t k = 1; k <= 80; k += 17) {
13911       for (uint32_t n = 1; n <= 16; n++) {
13912         for (uint32_t m = 1; m <= 4; m++) {
13913           GemmMicrokernelTester()
13914             .mr(4)
13915             .nr(16)
13916             .kr(4)
13917             .sr(1)
13918             .m(m)
13919             .n(n)
13920             .k(k)
13921             .ks(3)
13922             .iterations(1)
13923             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13924         }
13925       }
13926     }
13927   }
13928 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)13929   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
13930     TEST_REQUIRES_ARM_NEON_DOT;
13931     for (uint32_t n = 17; n < 32; n++) {
13932       for (size_t k = 1; k <= 80; k += 17) {
13933         GemmMicrokernelTester()
13934           .mr(4)
13935           .nr(16)
13936           .kr(4)
13937           .sr(1)
13938           .m(4)
13939           .n(n)
13940           .k(k)
13941           .ks(3)
13942           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13943       }
13944     }
13945   }
13946 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)13947   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
13948     TEST_REQUIRES_ARM_NEON_DOT;
13949     for (uint32_t n = 32; n <= 48; n += 16) {
13950       for (size_t k = 1; k <= 80; k += 17) {
13951         GemmMicrokernelTester()
13952           .mr(4)
13953           .nr(16)
13954           .kr(4)
13955           .sr(1)
13956           .m(4)
13957           .n(n)
13958           .k(k)
13959           .ks(3)
13960           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13961       }
13962     }
13963   }
13964 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)13965   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
13966     TEST_REQUIRES_ARM_NEON_DOT;
13967     for (size_t k = 1; k <= 80; k += 17) {
13968       for (uint32_t n = 1; n <= 16; n++) {
13969         for (uint32_t m = 1; m <= 4; m++) {
13970           GemmMicrokernelTester()
13971             .mr(4)
13972             .nr(16)
13973             .kr(4)
13974             .sr(1)
13975             .m(m)
13976             .n(n)
13977             .k(k)
13978             .cm_stride(19)
13979             .iterations(1)
13980             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13981         }
13982       }
13983     }
13984   }
13985 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)13986   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
13987     TEST_REQUIRES_ARM_NEON_DOT;
13988     for (size_t k = 1; k <= 80; k += 17) {
13989       GemmMicrokernelTester()
13990         .mr(4)
13991         .nr(16)
13992         .kr(4)
13993         .sr(1)
13994         .m(4)
13995         .n(16)
13996         .k(k)
13997         .ks(3)
13998         .a_offset(331)
13999         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14000     }
14001   }
14002 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)14003   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
14004     TEST_REQUIRES_ARM_NEON_DOT;
14005     for (size_t k = 1; k <= 80; k += 17) {
14006       for (uint32_t mz = 0; mz < 4; mz++) {
14007         GemmMicrokernelTester()
14008           .mr(4)
14009           .nr(16)
14010           .kr(4)
14011           .sr(1)
14012           .m(4)
14013           .n(16)
14014           .k(k)
14015           .ks(3)
14016           .a_offset(331)
14017           .zero_index(mz)
14018           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14019       }
14020     }
14021   }
14022 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)14023   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
14024     TEST_REQUIRES_ARM_NEON_DOT;
14025     GemmMicrokernelTester()
14026       .mr(4)
14027       .nr(16)
14028       .kr(4)
14029       .sr(1)
14030       .m(4)
14031       .n(16)
14032       .k(16)
14033       .qmin(128)
14034       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14035   }
14036 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)14037   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
14038     TEST_REQUIRES_ARM_NEON_DOT;
14039     GemmMicrokernelTester()
14040       .mr(4)
14041       .nr(16)
14042       .kr(4)
14043       .sr(1)
14044       .m(4)
14045       .n(16)
14046       .k(16)
14047       .qmax(128)
14048       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14049   }
14050 
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)14051   TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
14052     TEST_REQUIRES_ARM_NEON_DOT;
14053     GemmMicrokernelTester()
14054       .mr(4)
14055       .nr(16)
14056       .kr(4)
14057       .sr(1)
14058       .m(4)
14059       .n(16)
14060       .k(16)
14061       .cm_stride(19)
14062       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14063   }
14064 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
14065 
14066 
14067 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8)14068   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
14069     TEST_REQUIRES_ARM_NEON_DOT;
14070     GemmMicrokernelTester()
14071       .mr(1)
14072       .nr(8)
14073       .kr(4)
14074       .sr(1)
14075       .m(1)
14076       .n(8)
14077       .k(8)
14078       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079   }
14080 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cn)14081   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
14082     TEST_REQUIRES_ARM_NEON_DOT;
14083     GemmMicrokernelTester()
14084       .mr(1)
14085       .nr(8)
14086       .kr(4)
14087       .sr(1)
14088       .m(1)
14089       .n(8)
14090       .k(8)
14091       .cn_stride(11)
14092       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093   }
14094 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile)14095   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
14096     TEST_REQUIRES_ARM_NEON_DOT;
14097     for (uint32_t n = 1; n <= 8; n++) {
14098       for (uint32_t m = 1; m <= 1; m++) {
14099         GemmMicrokernelTester()
14100           .mr(1)
14101           .nr(8)
14102           .kr(4)
14103           .sr(1)
14104           .m(m)
14105           .n(n)
14106           .k(8)
14107           .iterations(1)
14108           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109       }
14110     }
14111   }
14112 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_m)14113   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
14114     TEST_REQUIRES_ARM_NEON_DOT;
14115     for (uint32_t m = 1; m <= 1; m++) {
14116       GemmMicrokernelTester()
14117         .mr(1)
14118         .nr(8)
14119         .kr(4)
14120         .sr(1)
14121         .m(m)
14122         .n(8)
14123         .k(8)
14124         .iterations(1)
14125         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126     }
14127   }
14128 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_n)14129   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
14130     TEST_REQUIRES_ARM_NEON_DOT;
14131     for (uint32_t n = 1; n <= 8; n++) {
14132       GemmMicrokernelTester()
14133         .mr(1)
14134         .nr(8)
14135         .kr(4)
14136         .sr(1)
14137         .m(1)
14138         .n(n)
14139         .k(8)
14140         .iterations(1)
14141         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142     }
14143   }
14144 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8)14145   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
14146     TEST_REQUIRES_ARM_NEON_DOT;
14147     for (size_t k = 1; k < 8; k++) {
14148       GemmMicrokernelTester()
14149         .mr(1)
14150         .nr(8)
14151         .kr(4)
14152         .sr(1)
14153         .m(1)
14154         .n(8)
14155         .k(k)
14156         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157     }
14158   }
14159 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8_subtile)14160   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
14161     TEST_REQUIRES_ARM_NEON_DOT;
14162     for (size_t k = 1; k < 8; k++) {
14163       for (uint32_t n = 1; n <= 8; n++) {
14164         for (uint32_t m = 1; m <= 1; m++) {
14165           GemmMicrokernelTester()
14166             .mr(1)
14167             .nr(8)
14168             .kr(4)
14169             .sr(1)
14170             .m(m)
14171             .n(n)
14172             .k(k)
14173             .iterations(1)
14174             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175         }
14176       }
14177     }
14178   }
14179 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8)14180   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
14181     TEST_REQUIRES_ARM_NEON_DOT;
14182     for (size_t k = 9; k < 16; k++) {
14183       GemmMicrokernelTester()
14184         .mr(1)
14185         .nr(8)
14186         .kr(4)
14187         .sr(1)
14188         .m(1)
14189         .n(8)
14190         .k(k)
14191         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192     }
14193   }
14194 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8_subtile)14195   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
14196     TEST_REQUIRES_ARM_NEON_DOT;
14197     for (size_t k = 9; k < 16; k++) {
14198       for (uint32_t n = 1; n <= 8; n++) {
14199         for (uint32_t m = 1; m <= 1; m++) {
14200           GemmMicrokernelTester()
14201             .mr(1)
14202             .nr(8)
14203             .kr(4)
14204             .sr(1)
14205             .m(m)
14206             .n(n)
14207             .k(k)
14208             .iterations(1)
14209             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210         }
14211       }
14212     }
14213   }
14214 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8)14215   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
14216     TEST_REQUIRES_ARM_NEON_DOT;
14217     for (size_t k = 16; k <= 80; k += 8) {
14218       GemmMicrokernelTester()
14219         .mr(1)
14220         .nr(8)
14221         .kr(4)
14222         .sr(1)
14223         .m(1)
14224         .n(8)
14225         .k(k)
14226         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227     }
14228   }
14229 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8_subtile)14230   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
14231     TEST_REQUIRES_ARM_NEON_DOT;
14232     for (size_t k = 16; k <= 80; k += 8) {
14233       for (uint32_t n = 1; n <= 8; n++) {
14234         for (uint32_t m = 1; m <= 1; m++) {
14235           GemmMicrokernelTester()
14236             .mr(1)
14237             .nr(8)
14238             .kr(4)
14239             .sr(1)
14240             .m(m)
14241             .n(n)
14242             .k(k)
14243             .iterations(1)
14244             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245         }
14246       }
14247     }
14248   }
14249 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8)14250   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
14251     TEST_REQUIRES_ARM_NEON_DOT;
14252     for (uint32_t n = 9; n < 16; n++) {
14253       for (size_t k = 1; k <= 40; k += 9) {
14254         GemmMicrokernelTester()
14255           .mr(1)
14256           .nr(8)
14257           .kr(4)
14258           .sr(1)
14259           .m(1)
14260           .n(n)
14261           .k(k)
14262           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263       }
14264     }
14265   }
14266 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_strided_cn)14267   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
14268     TEST_REQUIRES_ARM_NEON_DOT;
14269     for (uint32_t n = 9; n < 16; n++) {
14270       for (size_t k = 1; k <= 40; k += 9) {
14271         GemmMicrokernelTester()
14272           .mr(1)
14273           .nr(8)
14274           .kr(4)
14275           .sr(1)
14276           .m(1)
14277           .n(n)
14278           .k(k)
14279           .cn_stride(11)
14280           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281       }
14282     }
14283   }
14284 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_subtile)14285   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
14286     TEST_REQUIRES_ARM_NEON_DOT;
14287     for (uint32_t n = 9; n < 16; n++) {
14288       for (size_t k = 1; k <= 40; k += 9) {
14289         for (uint32_t m = 1; m <= 1; m++) {
14290           GemmMicrokernelTester()
14291             .mr(1)
14292             .nr(8)
14293             .kr(4)
14294             .sr(1)
14295             .m(m)
14296             .n(n)
14297             .k(k)
14298             .iterations(1)
14299             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300         }
14301       }
14302     }
14303   }
14304 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8)14305   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
14306     TEST_REQUIRES_ARM_NEON_DOT;
14307     for (uint32_t n = 16; n <= 24; n += 8) {
14308       for (size_t k = 1; k <= 40; k += 9) {
14309         GemmMicrokernelTester()
14310           .mr(1)
14311           .nr(8)
14312           .kr(4)
14313           .sr(1)
14314           .m(1)
14315           .n(n)
14316           .k(k)
14317           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318       }
14319     }
14320   }
14321 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_strided_cn)14322   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
14323     TEST_REQUIRES_ARM_NEON_DOT;
14324     for (uint32_t n = 16; n <= 24; n += 8) {
14325       for (size_t k = 1; k <= 40; k += 9) {
14326         GemmMicrokernelTester()
14327           .mr(1)
14328           .nr(8)
14329           .kr(4)
14330           .sr(1)
14331           .m(1)
14332           .n(n)
14333           .k(k)
14334           .cn_stride(11)
14335           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336       }
14337     }
14338   }
14339 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_subtile)14340   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
14341     TEST_REQUIRES_ARM_NEON_DOT;
14342     for (uint32_t n = 16; n <= 24; n += 8) {
14343       for (size_t k = 1; k <= 40; k += 9) {
14344         for (uint32_t m = 1; m <= 1; m++) {
14345           GemmMicrokernelTester()
14346             .mr(1)
14347             .nr(8)
14348             .kr(4)
14349             .sr(1)
14350             .m(m)
14351             .n(n)
14352             .k(k)
14353             .iterations(1)
14354             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355         }
14356       }
14357     }
14358   }
14359 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel)14360   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel) {
14361     TEST_REQUIRES_ARM_NEON_DOT;
14362     for (size_t k = 1; k <= 40; k += 9) {
14363       GemmMicrokernelTester()
14364         .mr(1)
14365         .nr(8)
14366         .kr(4)
14367         .sr(1)
14368         .m(1)
14369         .n(8)
14370         .k(k)
14371         .ks(3)
14372         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373     }
14374   }
14375 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel_subtile)14376   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel_subtile) {
14377     TEST_REQUIRES_ARM_NEON_DOT;
14378     for (size_t k = 1; k <= 40; k += 9) {
14379       for (uint32_t n = 1; n <= 8; n++) {
14380         for (uint32_t m = 1; m <= 1; m++) {
14381           GemmMicrokernelTester()
14382             .mr(1)
14383             .nr(8)
14384             .kr(4)
14385             .sr(1)
14386             .m(m)
14387             .n(n)
14388             .k(k)
14389             .ks(3)
14390             .iterations(1)
14391             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392         }
14393       }
14394     }
14395   }
14396 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_small_kernel)14397   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_small_kernel) {
14398     TEST_REQUIRES_ARM_NEON_DOT;
14399     for (uint32_t n = 9; n < 16; n++) {
14400       for (size_t k = 1; k <= 40; k += 9) {
14401         GemmMicrokernelTester()
14402           .mr(1)
14403           .nr(8)
14404           .kr(4)
14405           .sr(1)
14406           .m(1)
14407           .n(n)
14408           .k(k)
14409           .ks(3)
14410           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411       }
14412     }
14413   }
14414 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_small_kernel)14415   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_small_kernel) {
14416     TEST_REQUIRES_ARM_NEON_DOT;
14417     for (uint32_t n = 16; n <= 24; n += 8) {
14418       for (size_t k = 1; k <= 40; k += 9) {
14419         GemmMicrokernelTester()
14420           .mr(1)
14421           .nr(8)
14422           .kr(4)
14423           .sr(1)
14424           .m(1)
14425           .n(n)
14426           .k(k)
14427           .ks(3)
14428           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429       }
14430     }
14431   }
14432 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm_subtile)14433   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
14434     TEST_REQUIRES_ARM_NEON_DOT;
14435     for (size_t k = 1; k <= 40; k += 9) {
14436       for (uint32_t n = 1; n <= 8; n++) {
14437         for (uint32_t m = 1; m <= 1; m++) {
14438           GemmMicrokernelTester()
14439             .mr(1)
14440             .nr(8)
14441             .kr(4)
14442             .sr(1)
14443             .m(m)
14444             .n(n)
14445             .k(k)
14446             .cm_stride(11)
14447             .iterations(1)
14448             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449         }
14450       }
14451     }
14452   }
14453 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,a_offset)14454   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, a_offset) {
14455     TEST_REQUIRES_ARM_NEON_DOT;
14456     for (size_t k = 1; k <= 40; k += 9) {
14457       GemmMicrokernelTester()
14458         .mr(1)
14459         .nr(8)
14460         .kr(4)
14461         .sr(1)
14462         .m(1)
14463         .n(8)
14464         .k(k)
14465         .ks(3)
14466         .a_offset(43)
14467         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468     }
14469   }
14470 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,zero)14471   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, zero) {
14472     TEST_REQUIRES_ARM_NEON_DOT;
14473     for (size_t k = 1; k <= 40; k += 9) {
14474       for (uint32_t mz = 0; mz < 1; mz++) {
14475         GemmMicrokernelTester()
14476           .mr(1)
14477           .nr(8)
14478           .kr(4)
14479           .sr(1)
14480           .m(1)
14481           .n(8)
14482           .k(k)
14483           .ks(3)
14484           .a_offset(43)
14485           .zero_index(mz)
14486           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487       }
14488     }
14489   }
14490 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmin)14491   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
14492     TEST_REQUIRES_ARM_NEON_DOT;
14493     GemmMicrokernelTester()
14494       .mr(1)
14495       .nr(8)
14496       .kr(4)
14497       .sr(1)
14498       .m(1)
14499       .n(8)
14500       .k(8)
14501       .qmin(128)
14502       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503   }
14504 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmax)14505   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
14506     TEST_REQUIRES_ARM_NEON_DOT;
14507     GemmMicrokernelTester()
14508       .mr(1)
14509       .nr(8)
14510       .kr(4)
14511       .sr(1)
14512       .m(1)
14513       .n(8)
14514       .k(8)
14515       .qmax(128)
14516       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517   }
14518 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm)14519   TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
14520     TEST_REQUIRES_ARM_NEON_DOT;
14521     GemmMicrokernelTester()
14522       .mr(1)
14523       .nr(8)
14524       .kr(4)
14525       .sr(1)
14526       .m(1)
14527       .n(8)
14528       .k(8)
14529       .cm_stride(11)
14530       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531   }
14532 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
14533 
14534 
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16)14536   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
14537     TEST_REQUIRES_ARM_NEON_V8;
14538     GemmMicrokernelTester()
14539       .mr(1)
14540       .nr(8)
14541       .kr(8)
14542       .sr(1)
14543       .m(1)
14544       .n(8)
14545       .k(16)
14546       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14547   }
14548 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cn)14549   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
14550     TEST_REQUIRES_ARM_NEON_V8;
14551     GemmMicrokernelTester()
14552       .mr(1)
14553       .nr(8)
14554       .kr(8)
14555       .sr(1)
14556       .m(1)
14557       .n(8)
14558       .k(16)
14559       .cn_stride(11)
14560       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14561   }
14562 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile)14563   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
14564     TEST_REQUIRES_ARM_NEON_V8;
14565     for (uint32_t n = 1; n <= 8; n++) {
14566       for (uint32_t m = 1; m <= 1; m++) {
14567         GemmMicrokernelTester()
14568           .mr(1)
14569           .nr(8)
14570           .kr(8)
14571           .sr(1)
14572           .m(m)
14573           .n(n)
14574           .k(16)
14575           .iterations(1)
14576           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14577       }
14578     }
14579   }
14580 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_m)14581   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
14582     TEST_REQUIRES_ARM_NEON_V8;
14583     for (uint32_t m = 1; m <= 1; m++) {
14584       GemmMicrokernelTester()
14585         .mr(1)
14586         .nr(8)
14587         .kr(8)
14588         .sr(1)
14589         .m(m)
14590         .n(8)
14591         .k(16)
14592         .iterations(1)
14593         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14594     }
14595   }
14596 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_n)14597   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
14598     TEST_REQUIRES_ARM_NEON_V8;
14599     for (uint32_t n = 1; n <= 8; n++) {
14600       GemmMicrokernelTester()
14601         .mr(1)
14602         .nr(8)
14603         .kr(8)
14604         .sr(1)
14605         .m(1)
14606         .n(n)
14607         .k(16)
14608         .iterations(1)
14609         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14610     }
14611   }
14612 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16)14613   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
14614     TEST_REQUIRES_ARM_NEON_V8;
14615     for (size_t k = 1; k < 16; k++) {
14616       GemmMicrokernelTester()
14617         .mr(1)
14618         .nr(8)
14619         .kr(8)
14620         .sr(1)
14621         .m(1)
14622         .n(8)
14623         .k(k)
14624         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14625     }
14626   }
14627 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_subtile)14628   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
14629     TEST_REQUIRES_ARM_NEON_V8;
14630     for (size_t k = 1; k < 16; k++) {
14631       for (uint32_t n = 1; n <= 8; n++) {
14632         for (uint32_t m = 1; m <= 1; m++) {
14633           GemmMicrokernelTester()
14634             .mr(1)
14635             .nr(8)
14636             .kr(8)
14637             .sr(1)
14638             .m(m)
14639             .n(n)
14640             .k(k)
14641             .iterations(1)
14642             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14643         }
14644       }
14645     }
14646   }
14647 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16)14648   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
14649     TEST_REQUIRES_ARM_NEON_V8;
14650     for (size_t k = 17; k < 32; k++) {
14651       GemmMicrokernelTester()
14652         .mr(1)
14653         .nr(8)
14654         .kr(8)
14655         .sr(1)
14656         .m(1)
14657         .n(8)
14658         .k(k)
14659         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14660     }
14661   }
14662 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_subtile)14663   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
14664     TEST_REQUIRES_ARM_NEON_V8;
14665     for (size_t k = 17; k < 32; k++) {
14666       for (uint32_t n = 1; n <= 8; n++) {
14667         for (uint32_t m = 1; m <= 1; m++) {
14668           GemmMicrokernelTester()
14669             .mr(1)
14670             .nr(8)
14671             .kr(8)
14672             .sr(1)
14673             .m(m)
14674             .n(n)
14675             .k(k)
14676             .iterations(1)
14677             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14678         }
14679       }
14680     }
14681   }
14682 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16)14683   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
14684     TEST_REQUIRES_ARM_NEON_V8;
14685     for (size_t k = 32; k <= 160; k += 16) {
14686       GemmMicrokernelTester()
14687         .mr(1)
14688         .nr(8)
14689         .kr(8)
14690         .sr(1)
14691         .m(1)
14692         .n(8)
14693         .k(k)
14694         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14695     }
14696   }
14697 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_subtile)14698   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
14699     TEST_REQUIRES_ARM_NEON_V8;
14700     for (size_t k = 32; k <= 160; k += 16) {
14701       for (uint32_t n = 1; n <= 8; n++) {
14702         for (uint32_t m = 1; m <= 1; m++) {
14703           GemmMicrokernelTester()
14704             .mr(1)
14705             .nr(8)
14706             .kr(8)
14707             .sr(1)
14708             .m(m)
14709             .n(n)
14710             .k(k)
14711             .iterations(1)
14712             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14713         }
14714       }
14715     }
14716   }
14717 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8)14718   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
14719     TEST_REQUIRES_ARM_NEON_V8;
14720     for (uint32_t n = 9; n < 16; n++) {
14721       for (size_t k = 1; k <= 80; k += 17) {
14722         GemmMicrokernelTester()
14723           .mr(1)
14724           .nr(8)
14725           .kr(8)
14726           .sr(1)
14727           .m(1)
14728           .n(n)
14729           .k(k)
14730           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14731       }
14732     }
14733   }
14734 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_cn)14735   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
14736     TEST_REQUIRES_ARM_NEON_V8;
14737     for (uint32_t n = 9; n < 16; n++) {
14738       for (size_t k = 1; k <= 80; k += 17) {
14739         GemmMicrokernelTester()
14740           .mr(1)
14741           .nr(8)
14742           .kr(8)
14743           .sr(1)
14744           .m(1)
14745           .n(n)
14746           .k(k)
14747           .cn_stride(11)
14748           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14749       }
14750     }
14751   }
14752 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_subtile)14753   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
14754     TEST_REQUIRES_ARM_NEON_V8;
14755     for (uint32_t n = 9; n < 16; n++) {
14756       for (size_t k = 1; k <= 80; k += 17) {
14757         for (uint32_t m = 1; m <= 1; m++) {
14758           GemmMicrokernelTester()
14759             .mr(1)
14760             .nr(8)
14761             .kr(8)
14762             .sr(1)
14763             .m(m)
14764             .n(n)
14765             .k(k)
14766             .iterations(1)
14767             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14768         }
14769       }
14770     }
14771   }
14772 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8)14773   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
14774     TEST_REQUIRES_ARM_NEON_V8;
14775     for (uint32_t n = 16; n <= 24; n += 8) {
14776       for (size_t k = 1; k <= 80; k += 17) {
14777         GemmMicrokernelTester()
14778           .mr(1)
14779           .nr(8)
14780           .kr(8)
14781           .sr(1)
14782           .m(1)
14783           .n(n)
14784           .k(k)
14785           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14786       }
14787     }
14788   }
14789 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_cn)14790   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
14791     TEST_REQUIRES_ARM_NEON_V8;
14792     for (uint32_t n = 16; n <= 24; n += 8) {
14793       for (size_t k = 1; k <= 80; k += 17) {
14794         GemmMicrokernelTester()
14795           .mr(1)
14796           .nr(8)
14797           .kr(8)
14798           .sr(1)
14799           .m(1)
14800           .n(n)
14801           .k(k)
14802           .cn_stride(11)
14803           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14804       }
14805     }
14806   }
14807 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_subtile)14808   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
14809     TEST_REQUIRES_ARM_NEON_V8;
14810     for (uint32_t n = 16; n <= 24; n += 8) {
14811       for (size_t k = 1; k <= 80; k += 17) {
14812         for (uint32_t m = 1; m <= 1; m++) {
14813           GemmMicrokernelTester()
14814             .mr(1)
14815             .nr(8)
14816             .kr(8)
14817             .sr(1)
14818             .m(m)
14819             .n(n)
14820             .k(k)
14821             .iterations(1)
14822             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14823         }
14824       }
14825     }
14826   }
14827 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel)14828   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel) {
14829     TEST_REQUIRES_ARM_NEON_V8;
14830     for (size_t k = 1; k <= 80; k += 17) {
14831       GemmMicrokernelTester()
14832         .mr(1)
14833         .nr(8)
14834         .kr(8)
14835         .sr(1)
14836         .m(1)
14837         .n(8)
14838         .k(k)
14839         .ks(3)
14840         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841     }
14842   }
14843 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel_subtile)14844   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel_subtile) {
14845     TEST_REQUIRES_ARM_NEON_V8;
14846     for (size_t k = 1; k <= 80; k += 17) {
14847       for (uint32_t n = 1; n <= 8; n++) {
14848         for (uint32_t m = 1; m <= 1; m++) {
14849           GemmMicrokernelTester()
14850             .mr(1)
14851             .nr(8)
14852             .kr(8)
14853             .sr(1)
14854             .m(m)
14855             .n(n)
14856             .k(k)
14857             .ks(3)
14858             .iterations(1)
14859             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14860         }
14861       }
14862     }
14863   }
14864 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_small_kernel)14865   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
14866     TEST_REQUIRES_ARM_NEON_V8;
14867     for (uint32_t n = 9; n < 16; n++) {
14868       for (size_t k = 1; k <= 80; k += 17) {
14869         GemmMicrokernelTester()
14870           .mr(1)
14871           .nr(8)
14872           .kr(8)
14873           .sr(1)
14874           .m(1)
14875           .n(n)
14876           .k(k)
14877           .ks(3)
14878           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14879       }
14880     }
14881   }
14882 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_small_kernel)14883   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
14884     TEST_REQUIRES_ARM_NEON_V8;
14885     for (uint32_t n = 16; n <= 24; n += 8) {
14886       for (size_t k = 1; k <= 80; k += 17) {
14887         GemmMicrokernelTester()
14888           .mr(1)
14889           .nr(8)
14890           .kr(8)
14891           .sr(1)
14892           .m(1)
14893           .n(n)
14894           .k(k)
14895           .ks(3)
14896           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14897       }
14898     }
14899   }
14900 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm_subtile)14901   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
14902     TEST_REQUIRES_ARM_NEON_V8;
14903     for (size_t k = 1; k <= 80; k += 17) {
14904       for (uint32_t n = 1; n <= 8; n++) {
14905         for (uint32_t m = 1; m <= 1; m++) {
14906           GemmMicrokernelTester()
14907             .mr(1)
14908             .nr(8)
14909             .kr(8)
14910             .sr(1)
14911             .m(m)
14912             .n(n)
14913             .k(k)
14914             .cm_stride(11)
14915             .iterations(1)
14916             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14917         }
14918       }
14919     }
14920   }
14921 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,a_offset)14922   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, a_offset) {
14923     TEST_REQUIRES_ARM_NEON_V8;
14924     for (size_t k = 1; k <= 80; k += 17) {
14925       GemmMicrokernelTester()
14926         .mr(1)
14927         .nr(8)
14928         .kr(8)
14929         .sr(1)
14930         .m(1)
14931         .n(8)
14932         .k(k)
14933         .ks(3)
14934         .a_offset(83)
14935         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14936     }
14937   }
14938 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,zero)14939   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, zero) {
14940     TEST_REQUIRES_ARM_NEON_V8;
14941     for (size_t k = 1; k <= 80; k += 17) {
14942       for (uint32_t mz = 0; mz < 1; mz++) {
14943         GemmMicrokernelTester()
14944           .mr(1)
14945           .nr(8)
14946           .kr(8)
14947           .sr(1)
14948           .m(1)
14949           .n(8)
14950           .k(k)
14951           .ks(3)
14952           .a_offset(83)
14953           .zero_index(mz)
14954           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14955       }
14956     }
14957   }
14958 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmin)14959   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
14960     TEST_REQUIRES_ARM_NEON_V8;
14961     GemmMicrokernelTester()
14962       .mr(1)
14963       .nr(8)
14964       .kr(8)
14965       .sr(1)
14966       .m(1)
14967       .n(8)
14968       .k(16)
14969       .qmin(128)
14970       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14971   }
14972 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmax)14973   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
14974     TEST_REQUIRES_ARM_NEON_V8;
14975     GemmMicrokernelTester()
14976       .mr(1)
14977       .nr(8)
14978       .kr(8)
14979       .sr(1)
14980       .m(1)
14981       .n(8)
14982       .k(16)
14983       .qmax(128)
14984       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14985   }
14986 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm)14987   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
14988     TEST_REQUIRES_ARM_NEON_V8;
14989     GemmMicrokernelTester()
14990       .mr(1)
14991       .nr(8)
14992       .kr(8)
14993       .sr(1)
14994       .m(1)
14995       .n(8)
14996       .k(16)
14997       .cm_stride(11)
14998       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14999   }
15000 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001 
15002 
15003 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16)15004   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16) {
15005     TEST_REQUIRES_ARM_NEON_V8;
15006     GemmMicrokernelTester()
15007       .mr(2)
15008       .nr(8)
15009       .kr(8)
15010       .sr(1)
15011       .m(2)
15012       .n(8)
15013       .k(16)
15014       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15015   }
15016 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cn)15017   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cn) {
15018     TEST_REQUIRES_ARM_NEON_V8;
15019     GemmMicrokernelTester()
15020       .mr(2)
15021       .nr(8)
15022       .kr(8)
15023       .sr(1)
15024       .m(2)
15025       .n(8)
15026       .k(16)
15027       .cn_stride(11)
15028       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15029   }
15030 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile)15031   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile) {
15032     TEST_REQUIRES_ARM_NEON_V8;
15033     for (uint32_t n = 1; n <= 8; n++) {
15034       for (uint32_t m = 1; m <= 2; m++) {
15035         GemmMicrokernelTester()
15036           .mr(2)
15037           .nr(8)
15038           .kr(8)
15039           .sr(1)
15040           .m(m)
15041           .n(n)
15042           .k(16)
15043           .iterations(1)
15044           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15045       }
15046     }
15047   }
15048 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_m)15049   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
15050     TEST_REQUIRES_ARM_NEON_V8;
15051     for (uint32_t m = 1; m <= 2; m++) {
15052       GemmMicrokernelTester()
15053         .mr(2)
15054         .nr(8)
15055         .kr(8)
15056         .sr(1)
15057         .m(m)
15058         .n(8)
15059         .k(16)
15060         .iterations(1)
15061         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15062     }
15063   }
15064 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_n)15065   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
15066     TEST_REQUIRES_ARM_NEON_V8;
15067     for (uint32_t n = 1; n <= 8; n++) {
15068       GemmMicrokernelTester()
15069         .mr(2)
15070         .nr(8)
15071         .kr(8)
15072         .sr(1)
15073         .m(2)
15074         .n(n)
15075         .k(16)
15076         .iterations(1)
15077         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15078     }
15079   }
15080 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16)15081   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16) {
15082     TEST_REQUIRES_ARM_NEON_V8;
15083     for (size_t k = 1; k < 16; k++) {
15084       GemmMicrokernelTester()
15085         .mr(2)
15086         .nr(8)
15087         .kr(8)
15088         .sr(1)
15089         .m(2)
15090         .n(8)
15091         .k(k)
15092         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15093     }
15094   }
15095 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16_subtile)15096   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_subtile) {
15097     TEST_REQUIRES_ARM_NEON_V8;
15098     for (size_t k = 1; k < 16; k++) {
15099       for (uint32_t n = 1; n <= 8; n++) {
15100         for (uint32_t m = 1; m <= 2; m++) {
15101           GemmMicrokernelTester()
15102             .mr(2)
15103             .nr(8)
15104             .kr(8)
15105             .sr(1)
15106             .m(m)
15107             .n(n)
15108             .k(k)
15109             .iterations(1)
15110             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15111         }
15112       }
15113     }
15114   }
15115 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16)15116   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16) {
15117     TEST_REQUIRES_ARM_NEON_V8;
15118     for (size_t k = 17; k < 32; k++) {
15119       GemmMicrokernelTester()
15120         .mr(2)
15121         .nr(8)
15122         .kr(8)
15123         .sr(1)
15124         .m(2)
15125         .n(8)
15126         .k(k)
15127         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15128     }
15129   }
15130 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16_subtile)15131   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_subtile) {
15132     TEST_REQUIRES_ARM_NEON_V8;
15133     for (size_t k = 17; k < 32; k++) {
15134       for (uint32_t n = 1; n <= 8; n++) {
15135         for (uint32_t m = 1; m <= 2; m++) {
15136           GemmMicrokernelTester()
15137             .mr(2)
15138             .nr(8)
15139             .kr(8)
15140             .sr(1)
15141             .m(m)
15142             .n(n)
15143             .k(k)
15144             .iterations(1)
15145             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15146         }
15147       }
15148     }
15149   }
15150 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16)15151   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16) {
15152     TEST_REQUIRES_ARM_NEON_V8;
15153     for (size_t k = 32; k <= 160; k += 16) {
15154       GemmMicrokernelTester()
15155         .mr(2)
15156         .nr(8)
15157         .kr(8)
15158         .sr(1)
15159         .m(2)
15160         .n(8)
15161         .k(k)
15162         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15163     }
15164   }
15165 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16_subtile)15166   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_subtile) {
15167     TEST_REQUIRES_ARM_NEON_V8;
15168     for (size_t k = 32; k <= 160; k += 16) {
15169       for (uint32_t n = 1; n <= 8; n++) {
15170         for (uint32_t m = 1; m <= 2; m++) {
15171           GemmMicrokernelTester()
15172             .mr(2)
15173             .nr(8)
15174             .kr(8)
15175             .sr(1)
15176             .m(m)
15177             .n(n)
15178             .k(k)
15179             .iterations(1)
15180             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15181         }
15182       }
15183     }
15184   }
15185 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8)15186   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8) {
15187     TEST_REQUIRES_ARM_NEON_V8;
15188     for (uint32_t n = 9; n < 16; n++) {
15189       for (size_t k = 1; k <= 80; k += 17) {
15190         GemmMicrokernelTester()
15191           .mr(2)
15192           .nr(8)
15193           .kr(8)
15194           .sr(1)
15195           .m(2)
15196           .n(n)
15197           .k(k)
15198           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15199       }
15200     }
15201   }
15202 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_strided_cn)15203   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
15204     TEST_REQUIRES_ARM_NEON_V8;
15205     for (uint32_t n = 9; n < 16; n++) {
15206       for (size_t k = 1; k <= 80; k += 17) {
15207         GemmMicrokernelTester()
15208           .mr(2)
15209           .nr(8)
15210           .kr(8)
15211           .sr(1)
15212           .m(2)
15213           .n(n)
15214           .k(k)
15215           .cn_stride(11)
15216           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15217       }
15218     }
15219   }
15220 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_subtile)15221   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_subtile) {
15222     TEST_REQUIRES_ARM_NEON_V8;
15223     for (uint32_t n = 9; n < 16; n++) {
15224       for (size_t k = 1; k <= 80; k += 17) {
15225         for (uint32_t m = 1; m <= 2; m++) {
15226           GemmMicrokernelTester()
15227             .mr(2)
15228             .nr(8)
15229             .kr(8)
15230             .sr(1)
15231             .m(m)
15232             .n(n)
15233             .k(k)
15234             .iterations(1)
15235             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15236         }
15237       }
15238     }
15239   }
15240 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8)15241   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8) {
15242     TEST_REQUIRES_ARM_NEON_V8;
15243     for (uint32_t n = 16; n <= 24; n += 8) {
15244       for (size_t k = 1; k <= 80; k += 17) {
15245         GemmMicrokernelTester()
15246           .mr(2)
15247           .nr(8)
15248           .kr(8)
15249           .sr(1)
15250           .m(2)
15251           .n(n)
15252           .k(k)
15253           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15254       }
15255     }
15256   }
15257 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_strided_cn)15258   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
15259     TEST_REQUIRES_ARM_NEON_V8;
15260     for (uint32_t n = 16; n <= 24; n += 8) {
15261       for (size_t k = 1; k <= 80; k += 17) {
15262         GemmMicrokernelTester()
15263           .mr(2)
15264           .nr(8)
15265           .kr(8)
15266           .sr(1)
15267           .m(2)
15268           .n(n)
15269           .k(k)
15270           .cn_stride(11)
15271           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15272       }
15273     }
15274   }
15275 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_subtile)15276   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_subtile) {
15277     TEST_REQUIRES_ARM_NEON_V8;
15278     for (uint32_t n = 16; n <= 24; n += 8) {
15279       for (size_t k = 1; k <= 80; k += 17) {
15280         for (uint32_t m = 1; m <= 2; m++) {
15281           GemmMicrokernelTester()
15282             .mr(2)
15283             .nr(8)
15284             .kr(8)
15285             .sr(1)
15286             .m(m)
15287             .n(n)
15288             .k(k)
15289             .iterations(1)
15290             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15291         }
15292       }
15293     }
15294   }
15295 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel)15296   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel) {
15297     TEST_REQUIRES_ARM_NEON_V8;
15298     for (size_t k = 1; k <= 80; k += 17) {
15299       GemmMicrokernelTester()
15300         .mr(2)
15301         .nr(8)
15302         .kr(8)
15303         .sr(1)
15304         .m(2)
15305         .n(8)
15306         .k(k)
15307         .ks(3)
15308         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15309     }
15310   }
15311 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel_subtile)15312   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel_subtile) {
15313     TEST_REQUIRES_ARM_NEON_V8;
15314     for (size_t k = 1; k <= 80; k += 17) {
15315       for (uint32_t n = 1; n <= 8; n++) {
15316         for (uint32_t m = 1; m <= 2; m++) {
15317           GemmMicrokernelTester()
15318             .mr(2)
15319             .nr(8)
15320             .kr(8)
15321             .sr(1)
15322             .m(m)
15323             .n(n)
15324             .k(k)
15325             .ks(3)
15326             .iterations(1)
15327             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15328         }
15329       }
15330     }
15331   }
15332 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_small_kernel)15333   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
15334     TEST_REQUIRES_ARM_NEON_V8;
15335     for (uint32_t n = 9; n < 16; n++) {
15336       for (size_t k = 1; k <= 80; k += 17) {
15337         GemmMicrokernelTester()
15338           .mr(2)
15339           .nr(8)
15340           .kr(8)
15341           .sr(1)
15342           .m(2)
15343           .n(n)
15344           .k(k)
15345           .ks(3)
15346           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15347       }
15348     }
15349   }
15350 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_small_kernel)15351   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
15352     TEST_REQUIRES_ARM_NEON_V8;
15353     for (uint32_t n = 16; n <= 24; n += 8) {
15354       for (size_t k = 1; k <= 80; k += 17) {
15355         GemmMicrokernelTester()
15356           .mr(2)
15357           .nr(8)
15358           .kr(8)
15359           .sr(1)
15360           .m(2)
15361           .n(n)
15362           .k(k)
15363           .ks(3)
15364           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15365       }
15366     }
15367   }
15368 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm_subtile)15369   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm_subtile) {
15370     TEST_REQUIRES_ARM_NEON_V8;
15371     for (size_t k = 1; k <= 80; k += 17) {
15372       for (uint32_t n = 1; n <= 8; n++) {
15373         for (uint32_t m = 1; m <= 2; m++) {
15374           GemmMicrokernelTester()
15375             .mr(2)
15376             .nr(8)
15377             .kr(8)
15378             .sr(1)
15379             .m(m)
15380             .n(n)
15381             .k(k)
15382             .cm_stride(11)
15383             .iterations(1)
15384             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15385         }
15386       }
15387     }
15388   }
15389 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,a_offset)15390   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, a_offset) {
15391     TEST_REQUIRES_ARM_NEON_V8;
15392     for (size_t k = 1; k <= 80; k += 17) {
15393       GemmMicrokernelTester()
15394         .mr(2)
15395         .nr(8)
15396         .kr(8)
15397         .sr(1)
15398         .m(2)
15399         .n(8)
15400         .k(k)
15401         .ks(3)
15402         .a_offset(163)
15403         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15404     }
15405   }
15406 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,zero)15407   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, zero) {
15408     TEST_REQUIRES_ARM_NEON_V8;
15409     for (size_t k = 1; k <= 80; k += 17) {
15410       for (uint32_t mz = 0; mz < 2; mz++) {
15411         GemmMicrokernelTester()
15412           .mr(2)
15413           .nr(8)
15414           .kr(8)
15415           .sr(1)
15416           .m(2)
15417           .n(8)
15418           .k(k)
15419           .ks(3)
15420           .a_offset(163)
15421           .zero_index(mz)
15422           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15423       }
15424     }
15425   }
15426 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmin)15427   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmin) {
15428     TEST_REQUIRES_ARM_NEON_V8;
15429     GemmMicrokernelTester()
15430       .mr(2)
15431       .nr(8)
15432       .kr(8)
15433       .sr(1)
15434       .m(2)
15435       .n(8)
15436       .k(16)
15437       .qmin(128)
15438       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15439   }
15440 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmax)15441   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmax) {
15442     TEST_REQUIRES_ARM_NEON_V8;
15443     GemmMicrokernelTester()
15444       .mr(2)
15445       .nr(8)
15446       .kr(8)
15447       .sr(1)
15448       .m(2)
15449       .n(8)
15450       .k(16)
15451       .qmax(128)
15452       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15453   }
15454 
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm)15455   TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm) {
15456     TEST_REQUIRES_ARM_NEON_V8;
15457     GemmMicrokernelTester()
15458       .mr(2)
15459       .nr(8)
15460       .kr(8)
15461       .sr(1)
15462       .m(2)
15463       .n(8)
15464       .k(16)
15465       .cm_stride(11)
15466       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15467   }
15468 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15469 
15470 
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)15472   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
15473     TEST_REQUIRES_ARM_NEON;
15474     GemmMicrokernelTester()
15475       .mr(4)
15476       .nr(16)
15477       .kr(1)
15478       .sr(1)
15479       .m(4)
15480       .n(16)
15481       .k(8)
15482       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483   }
15484 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)15485   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
15486     TEST_REQUIRES_ARM_NEON;
15487     GemmMicrokernelTester()
15488       .mr(4)
15489       .nr(16)
15490       .kr(1)
15491       .sr(1)
15492       .m(4)
15493       .n(16)
15494       .k(8)
15495       .cn_stride(19)
15496       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497   }
15498 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)15499   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
15500     TEST_REQUIRES_ARM_NEON;
15501     for (uint32_t n = 1; n <= 16; n++) {
15502       for (uint32_t m = 1; m <= 4; m++) {
15503         GemmMicrokernelTester()
15504           .mr(4)
15505           .nr(16)
15506           .kr(1)
15507           .sr(1)
15508           .m(m)
15509           .n(n)
15510           .k(8)
15511           .iterations(1)
15512           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513       }
15514     }
15515   }
15516 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)15517   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
15518     TEST_REQUIRES_ARM_NEON;
15519     for (uint32_t m = 1; m <= 4; m++) {
15520       GemmMicrokernelTester()
15521         .mr(4)
15522         .nr(16)
15523         .kr(1)
15524         .sr(1)
15525         .m(m)
15526         .n(16)
15527         .k(8)
15528         .iterations(1)
15529         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530     }
15531   }
15532 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)15533   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
15534     TEST_REQUIRES_ARM_NEON;
15535     for (uint32_t n = 1; n <= 16; n++) {
15536       GemmMicrokernelTester()
15537         .mr(4)
15538         .nr(16)
15539         .kr(1)
15540         .sr(1)
15541         .m(4)
15542         .n(n)
15543         .k(8)
15544         .iterations(1)
15545         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546     }
15547   }
15548 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)15549   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
15550     TEST_REQUIRES_ARM_NEON;
15551     for (size_t k = 1; k < 8; k++) {
15552       GemmMicrokernelTester()
15553         .mr(4)
15554         .nr(16)
15555         .kr(1)
15556         .sr(1)
15557         .m(4)
15558         .n(16)
15559         .k(k)
15560         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561     }
15562   }
15563 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)15564   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
15565     TEST_REQUIRES_ARM_NEON;
15566     for (size_t k = 1; k < 8; k++) {
15567       for (uint32_t n = 1; n <= 16; n++) {
15568         for (uint32_t m = 1; m <= 4; m++) {
15569           GemmMicrokernelTester()
15570             .mr(4)
15571             .nr(16)
15572             .kr(1)
15573             .sr(1)
15574             .m(m)
15575             .n(n)
15576             .k(k)
15577             .iterations(1)
15578             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579         }
15580       }
15581     }
15582   }
15583 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)15584   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
15585     TEST_REQUIRES_ARM_NEON;
15586     for (size_t k = 9; k < 16; k++) {
15587       GemmMicrokernelTester()
15588         .mr(4)
15589         .nr(16)
15590         .kr(1)
15591         .sr(1)
15592         .m(4)
15593         .n(16)
15594         .k(k)
15595         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596     }
15597   }
15598 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)15599   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
15600     TEST_REQUIRES_ARM_NEON;
15601     for (size_t k = 9; k < 16; k++) {
15602       for (uint32_t n = 1; n <= 16; n++) {
15603         for (uint32_t m = 1; m <= 4; m++) {
15604           GemmMicrokernelTester()
15605             .mr(4)
15606             .nr(16)
15607             .kr(1)
15608             .sr(1)
15609             .m(m)
15610             .n(n)
15611             .k(k)
15612             .iterations(1)
15613             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614         }
15615       }
15616     }
15617   }
15618 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)15619   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
15620     TEST_REQUIRES_ARM_NEON;
15621     for (size_t k = 16; k <= 80; k += 8) {
15622       GemmMicrokernelTester()
15623         .mr(4)
15624         .nr(16)
15625         .kr(1)
15626         .sr(1)
15627         .m(4)
15628         .n(16)
15629         .k(k)
15630         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631     }
15632   }
15633 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)15634   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
15635     TEST_REQUIRES_ARM_NEON;
15636     for (size_t k = 16; k <= 80; k += 8) {
15637       for (uint32_t n = 1; n <= 16; n++) {
15638         for (uint32_t m = 1; m <= 4; m++) {
15639           GemmMicrokernelTester()
15640             .mr(4)
15641             .nr(16)
15642             .kr(1)
15643             .sr(1)
15644             .m(m)
15645             .n(n)
15646             .k(k)
15647             .iterations(1)
15648             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649         }
15650       }
15651     }
15652   }
15653 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)15654   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
15655     TEST_REQUIRES_ARM_NEON;
15656     for (uint32_t n = 17; n < 32; n++) {
15657       for (size_t k = 1; k <= 40; k += 9) {
15658         GemmMicrokernelTester()
15659           .mr(4)
15660           .nr(16)
15661           .kr(1)
15662           .sr(1)
15663           .m(4)
15664           .n(n)
15665           .k(k)
15666           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667       }
15668     }
15669   }
15670 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)15671   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
15672     TEST_REQUIRES_ARM_NEON;
15673     for (uint32_t n = 17; n < 32; n++) {
15674       for (size_t k = 1; k <= 40; k += 9) {
15675         GemmMicrokernelTester()
15676           .mr(4)
15677           .nr(16)
15678           .kr(1)
15679           .sr(1)
15680           .m(4)
15681           .n(n)
15682           .k(k)
15683           .cn_stride(19)
15684           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685       }
15686     }
15687   }
15688 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)15689   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
15690     TEST_REQUIRES_ARM_NEON;
15691     for (uint32_t n = 17; n < 32; n++) {
15692       for (size_t k = 1; k <= 40; k += 9) {
15693         for (uint32_t m = 1; m <= 4; m++) {
15694           GemmMicrokernelTester()
15695             .mr(4)
15696             .nr(16)
15697             .kr(1)
15698             .sr(1)
15699             .m(m)
15700             .n(n)
15701             .k(k)
15702             .iterations(1)
15703             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704         }
15705       }
15706     }
15707   }
15708 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)15709   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
15710     TEST_REQUIRES_ARM_NEON;
15711     for (uint32_t n = 32; n <= 48; n += 16) {
15712       for (size_t k = 1; k <= 40; k += 9) {
15713         GemmMicrokernelTester()
15714           .mr(4)
15715           .nr(16)
15716           .kr(1)
15717           .sr(1)
15718           .m(4)
15719           .n(n)
15720           .k(k)
15721           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722       }
15723     }
15724   }
15725 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)15726   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
15727     TEST_REQUIRES_ARM_NEON;
15728     for (uint32_t n = 32; n <= 48; n += 16) {
15729       for (size_t k = 1; k <= 40; k += 9) {
15730         GemmMicrokernelTester()
15731           .mr(4)
15732           .nr(16)
15733           .kr(1)
15734           .sr(1)
15735           .m(4)
15736           .n(n)
15737           .k(k)
15738           .cn_stride(19)
15739           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740       }
15741     }
15742   }
15743 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)15744   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
15745     TEST_REQUIRES_ARM_NEON;
15746     for (uint32_t n = 32; n <= 48; n += 16) {
15747       for (size_t k = 1; k <= 40; k += 9) {
15748         for (uint32_t m = 1; m <= 4; m++) {
15749           GemmMicrokernelTester()
15750             .mr(4)
15751             .nr(16)
15752             .kr(1)
15753             .sr(1)
15754             .m(m)
15755             .n(n)
15756             .k(k)
15757             .iterations(1)
15758             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759         }
15760       }
15761     }
15762   }
15763 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel)15764   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
15765     TEST_REQUIRES_ARM_NEON;
15766     for (size_t k = 1; k <= 40; k += 9) {
15767       GemmMicrokernelTester()
15768         .mr(4)
15769         .nr(16)
15770         .kr(1)
15771         .sr(1)
15772         .m(4)
15773         .n(16)
15774         .k(k)
15775         .ks(3)
15776         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777     }
15778   }
15779 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel_subtile)15780   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
15781     TEST_REQUIRES_ARM_NEON;
15782     for (size_t k = 1; k <= 40; k += 9) {
15783       for (uint32_t n = 1; n <= 16; n++) {
15784         for (uint32_t m = 1; m <= 4; m++) {
15785           GemmMicrokernelTester()
15786             .mr(4)
15787             .nr(16)
15788             .kr(1)
15789             .sr(1)
15790             .m(m)
15791             .n(n)
15792             .k(k)
15793             .ks(3)
15794             .iterations(1)
15795             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796         }
15797       }
15798     }
15799   }
15800 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)15801   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
15802     TEST_REQUIRES_ARM_NEON;
15803     for (uint32_t n = 17; n < 32; n++) {
15804       for (size_t k = 1; k <= 40; k += 9) {
15805         GemmMicrokernelTester()
15806           .mr(4)
15807           .nr(16)
15808           .kr(1)
15809           .sr(1)
15810           .m(4)
15811           .n(n)
15812           .k(k)
15813           .ks(3)
15814           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815       }
15816     }
15817   }
15818 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)15819   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
15820     TEST_REQUIRES_ARM_NEON;
15821     for (uint32_t n = 32; n <= 48; n += 16) {
15822       for (size_t k = 1; k <= 40; k += 9) {
15823         GemmMicrokernelTester()
15824           .mr(4)
15825           .nr(16)
15826           .kr(1)
15827           .sr(1)
15828           .m(4)
15829           .n(n)
15830           .k(k)
15831           .ks(3)
15832           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833       }
15834     }
15835   }
15836 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)15837   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
15838     TEST_REQUIRES_ARM_NEON;
15839     for (size_t k = 1; k <= 40; k += 9) {
15840       for (uint32_t n = 1; n <= 16; n++) {
15841         for (uint32_t m = 1; m <= 4; m++) {
15842           GemmMicrokernelTester()
15843             .mr(4)
15844             .nr(16)
15845             .kr(1)
15846             .sr(1)
15847             .m(m)
15848             .n(n)
15849             .k(k)
15850             .cm_stride(19)
15851             .iterations(1)
15852             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853         }
15854       }
15855     }
15856   }
15857 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,a_offset)15858   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
15859     TEST_REQUIRES_ARM_NEON;
15860     for (size_t k = 1; k <= 40; k += 9) {
15861       GemmMicrokernelTester()
15862         .mr(4)
15863         .nr(16)
15864         .kr(1)
15865         .sr(1)
15866         .m(4)
15867         .n(16)
15868         .k(k)
15869         .ks(3)
15870         .a_offset(163)
15871         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872     }
15873   }
15874 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,zero)15875   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
15876     TEST_REQUIRES_ARM_NEON;
15877     for (size_t k = 1; k <= 40; k += 9) {
15878       for (uint32_t mz = 0; mz < 4; mz++) {
15879         GemmMicrokernelTester()
15880           .mr(4)
15881           .nr(16)
15882           .kr(1)
15883           .sr(1)
15884           .m(4)
15885           .n(16)
15886           .k(k)
15887           .ks(3)
15888           .a_offset(163)
15889           .zero_index(mz)
15890           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891       }
15892     }
15893   }
15894 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)15895   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
15896     TEST_REQUIRES_ARM_NEON;
15897     GemmMicrokernelTester()
15898       .mr(4)
15899       .nr(16)
15900       .kr(1)
15901       .sr(1)
15902       .m(4)
15903       .n(16)
15904       .k(8)
15905       .qmin(128)
15906       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907   }
15908 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)15909   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
15910     TEST_REQUIRES_ARM_NEON;
15911     GemmMicrokernelTester()
15912       .mr(4)
15913       .nr(16)
15914       .kr(1)
15915       .sr(1)
15916       .m(4)
15917       .n(16)
15918       .k(8)
15919       .qmax(128)
15920       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921   }
15922 
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)15923   TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
15924     TEST_REQUIRES_ARM_NEON;
15925     GemmMicrokernelTester()
15926       .mr(4)
15927       .nr(16)
15928       .kr(1)
15929       .sr(1)
15930       .m(4)
15931       .n(16)
15932       .k(8)
15933       .cm_stride(19)
15934       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935   }
15936 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937 
15938 
15939 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)15940   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
15941     TEST_REQUIRES_X86_SSE2;
15942     GemmMicrokernelTester()
15943       .mr(3)
15944       .nr(4)
15945       .kr(2)
15946       .sr(1)
15947       .m(3)
15948       .n(4)
15949       .k(8)
15950       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15951   }
15952 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)15953   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
15954     TEST_REQUIRES_X86_SSE2;
15955     GemmMicrokernelTester()
15956       .mr(3)
15957       .nr(4)
15958       .kr(2)
15959       .sr(1)
15960       .m(3)
15961       .n(4)
15962       .k(8)
15963       .cn_stride(7)
15964       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15965   }
15966 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)15967   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
15968     TEST_REQUIRES_X86_SSE2;
15969     for (uint32_t n = 1; n <= 4; n++) {
15970       for (uint32_t m = 1; m <= 3; m++) {
15971         GemmMicrokernelTester()
15972           .mr(3)
15973           .nr(4)
15974           .kr(2)
15975           .sr(1)
15976           .m(m)
15977           .n(n)
15978           .k(8)
15979           .iterations(1)
15980           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15981       }
15982     }
15983   }
15984 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)15985   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
15986     TEST_REQUIRES_X86_SSE2;
15987     for (uint32_t m = 1; m <= 3; m++) {
15988       GemmMicrokernelTester()
15989         .mr(3)
15990         .nr(4)
15991         .kr(2)
15992         .sr(1)
15993         .m(m)
15994         .n(4)
15995         .k(8)
15996         .iterations(1)
15997         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15998     }
15999   }
16000 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)16001   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
16002     TEST_REQUIRES_X86_SSE2;
16003     for (uint32_t n = 1; n <= 4; n++) {
16004       GemmMicrokernelTester()
16005         .mr(3)
16006         .nr(4)
16007         .kr(2)
16008         .sr(1)
16009         .m(3)
16010         .n(n)
16011         .k(8)
16012         .iterations(1)
16013         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16014     }
16015   }
16016 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)16017   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
16018     TEST_REQUIRES_X86_SSE2;
16019     for (size_t k = 1; k < 8; k++) {
16020       GemmMicrokernelTester()
16021         .mr(3)
16022         .nr(4)
16023         .kr(2)
16024         .sr(1)
16025         .m(3)
16026         .n(4)
16027         .k(k)
16028         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16029     }
16030   }
16031 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)16032   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
16033     TEST_REQUIRES_X86_SSE2;
16034     for (size_t k = 1; k < 8; k++) {
16035       for (uint32_t n = 1; n <= 4; n++) {
16036         for (uint32_t m = 1; m <= 3; m++) {
16037           GemmMicrokernelTester()
16038             .mr(3)
16039             .nr(4)
16040             .kr(2)
16041             .sr(1)
16042             .m(m)
16043             .n(n)
16044             .k(k)
16045             .iterations(1)
16046             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16047         }
16048       }
16049     }
16050   }
16051 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)16052   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
16053     TEST_REQUIRES_X86_SSE2;
16054     for (size_t k = 9; k < 16; k++) {
16055       GemmMicrokernelTester()
16056         .mr(3)
16057         .nr(4)
16058         .kr(2)
16059         .sr(1)
16060         .m(3)
16061         .n(4)
16062         .k(k)
16063         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16064     }
16065   }
16066 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)16067   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
16068     TEST_REQUIRES_X86_SSE2;
16069     for (size_t k = 9; k < 16; k++) {
16070       for (uint32_t n = 1; n <= 4; n++) {
16071         for (uint32_t m = 1; m <= 3; m++) {
16072           GemmMicrokernelTester()
16073             .mr(3)
16074             .nr(4)
16075             .kr(2)
16076             .sr(1)
16077             .m(m)
16078             .n(n)
16079             .k(k)
16080             .iterations(1)
16081             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16082         }
16083       }
16084     }
16085   }
16086 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)16087   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
16088     TEST_REQUIRES_X86_SSE2;
16089     for (size_t k = 16; k <= 80; k += 8) {
16090       GemmMicrokernelTester()
16091         .mr(3)
16092         .nr(4)
16093         .kr(2)
16094         .sr(1)
16095         .m(3)
16096         .n(4)
16097         .k(k)
16098         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16099     }
16100   }
16101 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)16102   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
16103     TEST_REQUIRES_X86_SSE2;
16104     for (size_t k = 16; k <= 80; k += 8) {
16105       for (uint32_t n = 1; n <= 4; n++) {
16106         for (uint32_t m = 1; m <= 3; m++) {
16107           GemmMicrokernelTester()
16108             .mr(3)
16109             .nr(4)
16110             .kr(2)
16111             .sr(1)
16112             .m(m)
16113             .n(n)
16114             .k(k)
16115             .iterations(1)
16116             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16117         }
16118       }
16119     }
16120   }
16121 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)16122   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
16123     TEST_REQUIRES_X86_SSE2;
16124     for (uint32_t n = 5; n < 8; n++) {
16125       for (size_t k = 1; k <= 40; k += 9) {
16126         GemmMicrokernelTester()
16127           .mr(3)
16128           .nr(4)
16129           .kr(2)
16130           .sr(1)
16131           .m(3)
16132           .n(n)
16133           .k(k)
16134           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16135       }
16136     }
16137   }
16138 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)16139   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
16140     TEST_REQUIRES_X86_SSE2;
16141     for (uint32_t n = 5; n < 8; n++) {
16142       for (size_t k = 1; k <= 40; k += 9) {
16143         GemmMicrokernelTester()
16144           .mr(3)
16145           .nr(4)
16146           .kr(2)
16147           .sr(1)
16148           .m(3)
16149           .n(n)
16150           .k(k)
16151           .cn_stride(7)
16152           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16153       }
16154     }
16155   }
16156 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)16157   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
16158     TEST_REQUIRES_X86_SSE2;
16159     for (uint32_t n = 5; n < 8; n++) {
16160       for (size_t k = 1; k <= 40; k += 9) {
16161         for (uint32_t m = 1; m <= 3; m++) {
16162           GemmMicrokernelTester()
16163             .mr(3)
16164             .nr(4)
16165             .kr(2)
16166             .sr(1)
16167             .m(m)
16168             .n(n)
16169             .k(k)
16170             .iterations(1)
16171             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16172         }
16173       }
16174     }
16175   }
16176 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)16177   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
16178     TEST_REQUIRES_X86_SSE2;
16179     for (uint32_t n = 8; n <= 12; n += 4) {
16180       for (size_t k = 1; k <= 40; k += 9) {
16181         GemmMicrokernelTester()
16182           .mr(3)
16183           .nr(4)
16184           .kr(2)
16185           .sr(1)
16186           .m(3)
16187           .n(n)
16188           .k(k)
16189           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16190       }
16191     }
16192   }
16193 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)16194   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
16195     TEST_REQUIRES_X86_SSE2;
16196     for (uint32_t n = 8; n <= 12; n += 4) {
16197       for (size_t k = 1; k <= 40; k += 9) {
16198         GemmMicrokernelTester()
16199           .mr(3)
16200           .nr(4)
16201           .kr(2)
16202           .sr(1)
16203           .m(3)
16204           .n(n)
16205           .k(k)
16206           .cn_stride(7)
16207           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16208       }
16209     }
16210   }
16211 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)16212   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
16213     TEST_REQUIRES_X86_SSE2;
16214     for (uint32_t n = 8; n <= 12; n += 4) {
16215       for (size_t k = 1; k <= 40; k += 9) {
16216         for (uint32_t m = 1; m <= 3; m++) {
16217           GemmMicrokernelTester()
16218             .mr(3)
16219             .nr(4)
16220             .kr(2)
16221             .sr(1)
16222             .m(m)
16223             .n(n)
16224             .k(k)
16225             .iterations(1)
16226             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16227         }
16228       }
16229     }
16230   }
16231 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)16232   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
16233     TEST_REQUIRES_X86_SSE2;
16234     for (size_t k = 1; k <= 40; k += 9) {
16235       GemmMicrokernelTester()
16236         .mr(3)
16237         .nr(4)
16238         .kr(2)
16239         .sr(1)
16240         .m(3)
16241         .n(4)
16242         .k(k)
16243         .ks(3)
16244         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16245     }
16246   }
16247 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)16248   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
16249     TEST_REQUIRES_X86_SSE2;
16250     for (size_t k = 1; k <= 40; k += 9) {
16251       for (uint32_t n = 1; n <= 4; n++) {
16252         for (uint32_t m = 1; m <= 3; m++) {
16253           GemmMicrokernelTester()
16254             .mr(3)
16255             .nr(4)
16256             .kr(2)
16257             .sr(1)
16258             .m(m)
16259             .n(n)
16260             .k(k)
16261             .ks(3)
16262             .iterations(1)
16263             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16264         }
16265       }
16266     }
16267   }
16268 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)16269   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
16270     TEST_REQUIRES_X86_SSE2;
16271     for (uint32_t n = 5; n < 8; n++) {
16272       for (size_t k = 1; k <= 40; k += 9) {
16273         GemmMicrokernelTester()
16274           .mr(3)
16275           .nr(4)
16276           .kr(2)
16277           .sr(1)
16278           .m(3)
16279           .n(n)
16280           .k(k)
16281           .ks(3)
16282           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16283       }
16284     }
16285   }
16286 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)16287   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
16288     TEST_REQUIRES_X86_SSE2;
16289     for (uint32_t n = 8; n <= 12; n += 4) {
16290       for (size_t k = 1; k <= 40; k += 9) {
16291         GemmMicrokernelTester()
16292           .mr(3)
16293           .nr(4)
16294           .kr(2)
16295           .sr(1)
16296           .m(3)
16297           .n(n)
16298           .k(k)
16299           .ks(3)
16300           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16301       }
16302     }
16303   }
16304 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)16305   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
16306     TEST_REQUIRES_X86_SSE2;
16307     for (size_t k = 1; k <= 40; k += 9) {
16308       for (uint32_t n = 1; n <= 4; n++) {
16309         for (uint32_t m = 1; m <= 3; m++) {
16310           GemmMicrokernelTester()
16311             .mr(3)
16312             .nr(4)
16313             .kr(2)
16314             .sr(1)
16315             .m(m)
16316             .n(n)
16317             .k(k)
16318             .cm_stride(7)
16319             .iterations(1)
16320             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16321         }
16322       }
16323     }
16324   }
16325 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)16326   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
16327     TEST_REQUIRES_X86_SSE2;
16328     for (size_t k = 1; k <= 40; k += 9) {
16329       GemmMicrokernelTester()
16330         .mr(3)
16331         .nr(4)
16332         .kr(2)
16333         .sr(1)
16334         .m(3)
16335         .n(4)
16336         .k(k)
16337         .ks(3)
16338         .a_offset(127)
16339         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16340     }
16341   }
16342 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)16343   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
16344     TEST_REQUIRES_X86_SSE2;
16345     for (size_t k = 1; k <= 40; k += 9) {
16346       for (uint32_t mz = 0; mz < 3; mz++) {
16347         GemmMicrokernelTester()
16348           .mr(3)
16349           .nr(4)
16350           .kr(2)
16351           .sr(1)
16352           .m(3)
16353           .n(4)
16354           .k(k)
16355           .ks(3)
16356           .a_offset(127)
16357           .zero_index(mz)
16358           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16359       }
16360     }
16361   }
16362 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)16363   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
16364     TEST_REQUIRES_X86_SSE2;
16365     GemmMicrokernelTester()
16366       .mr(3)
16367       .nr(4)
16368       .kr(2)
16369       .sr(1)
16370       .m(3)
16371       .n(4)
16372       .k(8)
16373       .qmin(128)
16374       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16375   }
16376 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)16377   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
16378     TEST_REQUIRES_X86_SSE2;
16379     GemmMicrokernelTester()
16380       .mr(3)
16381       .nr(4)
16382       .kr(2)
16383       .sr(1)
16384       .m(3)
16385       .n(4)
16386       .k(8)
16387       .qmax(128)
16388       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16389   }
16390 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)16391   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
16392     TEST_REQUIRES_X86_SSE2;
16393     GemmMicrokernelTester()
16394       .mr(3)
16395       .nr(4)
16396       .kr(2)
16397       .sr(1)
16398       .m(3)
16399       .n(4)
16400       .k(8)
16401       .cm_stride(7)
16402       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16403   }
16404 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16405 
16406 
16407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)16408   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
16409     TEST_REQUIRES_X86_SSE41;
16410     GemmMicrokernelTester()
16411       .mr(3)
16412       .nr(4)
16413       .kr(2)
16414       .sr(1)
16415       .m(3)
16416       .n(4)
16417       .k(8)
16418       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16419   }
16420 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)16421   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
16422     TEST_REQUIRES_X86_SSE41;
16423     GemmMicrokernelTester()
16424       .mr(3)
16425       .nr(4)
16426       .kr(2)
16427       .sr(1)
16428       .m(3)
16429       .n(4)
16430       .k(8)
16431       .cn_stride(7)
16432       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16433   }
16434 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)16435   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
16436     TEST_REQUIRES_X86_SSE41;
16437     for (uint32_t n = 1; n <= 4; n++) {
16438       for (uint32_t m = 1; m <= 3; m++) {
16439         GemmMicrokernelTester()
16440           .mr(3)
16441           .nr(4)
16442           .kr(2)
16443           .sr(1)
16444           .m(m)
16445           .n(n)
16446           .k(8)
16447           .iterations(1)
16448           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16449       }
16450     }
16451   }
16452 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)16453   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
16454     TEST_REQUIRES_X86_SSE41;
16455     for (uint32_t m = 1; m <= 3; m++) {
16456       GemmMicrokernelTester()
16457         .mr(3)
16458         .nr(4)
16459         .kr(2)
16460         .sr(1)
16461         .m(m)
16462         .n(4)
16463         .k(8)
16464         .iterations(1)
16465         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16466     }
16467   }
16468 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)16469   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
16470     TEST_REQUIRES_X86_SSE41;
16471     for (uint32_t n = 1; n <= 4; n++) {
16472       GemmMicrokernelTester()
16473         .mr(3)
16474         .nr(4)
16475         .kr(2)
16476         .sr(1)
16477         .m(3)
16478         .n(n)
16479         .k(8)
16480         .iterations(1)
16481         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16482     }
16483   }
16484 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)16485   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
16486     TEST_REQUIRES_X86_SSE41;
16487     for (size_t k = 1; k < 8; k++) {
16488       GemmMicrokernelTester()
16489         .mr(3)
16490         .nr(4)
16491         .kr(2)
16492         .sr(1)
16493         .m(3)
16494         .n(4)
16495         .k(k)
16496         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16497     }
16498   }
16499 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)16500   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
16501     TEST_REQUIRES_X86_SSE41;
16502     for (size_t k = 1; k < 8; k++) {
16503       for (uint32_t n = 1; n <= 4; n++) {
16504         for (uint32_t m = 1; m <= 3; m++) {
16505           GemmMicrokernelTester()
16506             .mr(3)
16507             .nr(4)
16508             .kr(2)
16509             .sr(1)
16510             .m(m)
16511             .n(n)
16512             .k(k)
16513             .iterations(1)
16514             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16515         }
16516       }
16517     }
16518   }
16519 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)16520   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
16521     TEST_REQUIRES_X86_SSE41;
16522     for (size_t k = 9; k < 16; k++) {
16523       GemmMicrokernelTester()
16524         .mr(3)
16525         .nr(4)
16526         .kr(2)
16527         .sr(1)
16528         .m(3)
16529         .n(4)
16530         .k(k)
16531         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16532     }
16533   }
16534 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)16535   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
16536     TEST_REQUIRES_X86_SSE41;
16537     for (size_t k = 9; k < 16; k++) {
16538       for (uint32_t n = 1; n <= 4; n++) {
16539         for (uint32_t m = 1; m <= 3; m++) {
16540           GemmMicrokernelTester()
16541             .mr(3)
16542             .nr(4)
16543             .kr(2)
16544             .sr(1)
16545             .m(m)
16546             .n(n)
16547             .k(k)
16548             .iterations(1)
16549             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16550         }
16551       }
16552     }
16553   }
16554 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)16555   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
16556     TEST_REQUIRES_X86_SSE41;
16557     for (size_t k = 16; k <= 80; k += 8) {
16558       GemmMicrokernelTester()
16559         .mr(3)
16560         .nr(4)
16561         .kr(2)
16562         .sr(1)
16563         .m(3)
16564         .n(4)
16565         .k(k)
16566         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16567     }
16568   }
16569 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)16570   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
16571     TEST_REQUIRES_X86_SSE41;
16572     for (size_t k = 16; k <= 80; k += 8) {
16573       for (uint32_t n = 1; n <= 4; n++) {
16574         for (uint32_t m = 1; m <= 3; m++) {
16575           GemmMicrokernelTester()
16576             .mr(3)
16577             .nr(4)
16578             .kr(2)
16579             .sr(1)
16580             .m(m)
16581             .n(n)
16582             .k(k)
16583             .iterations(1)
16584             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16585         }
16586       }
16587     }
16588   }
16589 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)16590   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
16591     TEST_REQUIRES_X86_SSE41;
16592     for (uint32_t n = 5; n < 8; n++) {
16593       for (size_t k = 1; k <= 40; k += 9) {
16594         GemmMicrokernelTester()
16595           .mr(3)
16596           .nr(4)
16597           .kr(2)
16598           .sr(1)
16599           .m(3)
16600           .n(n)
16601           .k(k)
16602           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16603       }
16604     }
16605   }
16606 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)16607   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
16608     TEST_REQUIRES_X86_SSE41;
16609     for (uint32_t n = 5; n < 8; n++) {
16610       for (size_t k = 1; k <= 40; k += 9) {
16611         GemmMicrokernelTester()
16612           .mr(3)
16613           .nr(4)
16614           .kr(2)
16615           .sr(1)
16616           .m(3)
16617           .n(n)
16618           .k(k)
16619           .cn_stride(7)
16620           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16621       }
16622     }
16623   }
16624 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)16625   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
16626     TEST_REQUIRES_X86_SSE41;
16627     for (uint32_t n = 5; n < 8; n++) {
16628       for (size_t k = 1; k <= 40; k += 9) {
16629         for (uint32_t m = 1; m <= 3; m++) {
16630           GemmMicrokernelTester()
16631             .mr(3)
16632             .nr(4)
16633             .kr(2)
16634             .sr(1)
16635             .m(m)
16636             .n(n)
16637             .k(k)
16638             .iterations(1)
16639             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16640         }
16641       }
16642     }
16643   }
16644 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)16645   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
16646     TEST_REQUIRES_X86_SSE41;
16647     for (uint32_t n = 8; n <= 12; n += 4) {
16648       for (size_t k = 1; k <= 40; k += 9) {
16649         GemmMicrokernelTester()
16650           .mr(3)
16651           .nr(4)
16652           .kr(2)
16653           .sr(1)
16654           .m(3)
16655           .n(n)
16656           .k(k)
16657           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16658       }
16659     }
16660   }
16661 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)16662   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
16663     TEST_REQUIRES_X86_SSE41;
16664     for (uint32_t n = 8; n <= 12; n += 4) {
16665       for (size_t k = 1; k <= 40; k += 9) {
16666         GemmMicrokernelTester()
16667           .mr(3)
16668           .nr(4)
16669           .kr(2)
16670           .sr(1)
16671           .m(3)
16672           .n(n)
16673           .k(k)
16674           .cn_stride(7)
16675           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16676       }
16677     }
16678   }
16679 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)16680   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
16681     TEST_REQUIRES_X86_SSE41;
16682     for (uint32_t n = 8; n <= 12; n += 4) {
16683       for (size_t k = 1; k <= 40; k += 9) {
16684         for (uint32_t m = 1; m <= 3; m++) {
16685           GemmMicrokernelTester()
16686             .mr(3)
16687             .nr(4)
16688             .kr(2)
16689             .sr(1)
16690             .m(m)
16691             .n(n)
16692             .k(k)
16693             .iterations(1)
16694             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16695         }
16696       }
16697     }
16698   }
16699 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel)16700   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
16701     TEST_REQUIRES_X86_SSE41;
16702     for (size_t k = 1; k <= 40; k += 9) {
16703       GemmMicrokernelTester()
16704         .mr(3)
16705         .nr(4)
16706         .kr(2)
16707         .sr(1)
16708         .m(3)
16709         .n(4)
16710         .k(k)
16711         .ks(3)
16712         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16713     }
16714   }
16715 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel_subtile)16716   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
16717     TEST_REQUIRES_X86_SSE41;
16718     for (size_t k = 1; k <= 40; k += 9) {
16719       for (uint32_t n = 1; n <= 4; n++) {
16720         for (uint32_t m = 1; m <= 3; m++) {
16721           GemmMicrokernelTester()
16722             .mr(3)
16723             .nr(4)
16724             .kr(2)
16725             .sr(1)
16726             .m(m)
16727             .n(n)
16728             .k(k)
16729             .ks(3)
16730             .iterations(1)
16731             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16732         }
16733       }
16734     }
16735   }
16736 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_small_kernel)16737   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
16738     TEST_REQUIRES_X86_SSE41;
16739     for (uint32_t n = 5; n < 8; n++) {
16740       for (size_t k = 1; k <= 40; k += 9) {
16741         GemmMicrokernelTester()
16742           .mr(3)
16743           .nr(4)
16744           .kr(2)
16745           .sr(1)
16746           .m(3)
16747           .n(n)
16748           .k(k)
16749           .ks(3)
16750           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16751       }
16752     }
16753   }
16754 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_small_kernel)16755   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
16756     TEST_REQUIRES_X86_SSE41;
16757     for (uint32_t n = 8; n <= 12; n += 4) {
16758       for (size_t k = 1; k <= 40; k += 9) {
16759         GemmMicrokernelTester()
16760           .mr(3)
16761           .nr(4)
16762           .kr(2)
16763           .sr(1)
16764           .m(3)
16765           .n(n)
16766           .k(k)
16767           .ks(3)
16768           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16769       }
16770     }
16771   }
16772 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)16773   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
16774     TEST_REQUIRES_X86_SSE41;
16775     for (size_t k = 1; k <= 40; k += 9) {
16776       for (uint32_t n = 1; n <= 4; n++) {
16777         for (uint32_t m = 1; m <= 3; m++) {
16778           GemmMicrokernelTester()
16779             .mr(3)
16780             .nr(4)
16781             .kr(2)
16782             .sr(1)
16783             .m(m)
16784             .n(n)
16785             .k(k)
16786             .cm_stride(7)
16787             .iterations(1)
16788             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16789         }
16790       }
16791     }
16792   }
16793 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,a_offset)16794   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
16795     TEST_REQUIRES_X86_SSE41;
16796     for (size_t k = 1; k <= 40; k += 9) {
16797       GemmMicrokernelTester()
16798         .mr(3)
16799         .nr(4)
16800         .kr(2)
16801         .sr(1)
16802         .m(3)
16803         .n(4)
16804         .k(k)
16805         .ks(3)
16806         .a_offset(127)
16807         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16808     }
16809   }
16810 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,zero)16811   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
16812     TEST_REQUIRES_X86_SSE41;
16813     for (size_t k = 1; k <= 40; k += 9) {
16814       for (uint32_t mz = 0; mz < 3; mz++) {
16815         GemmMicrokernelTester()
16816           .mr(3)
16817           .nr(4)
16818           .kr(2)
16819           .sr(1)
16820           .m(3)
16821           .n(4)
16822           .k(k)
16823           .ks(3)
16824           .a_offset(127)
16825           .zero_index(mz)
16826           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16827       }
16828     }
16829   }
16830 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)16831   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
16832     TEST_REQUIRES_X86_SSE41;
16833     GemmMicrokernelTester()
16834       .mr(3)
16835       .nr(4)
16836       .kr(2)
16837       .sr(1)
16838       .m(3)
16839       .n(4)
16840       .k(8)
16841       .qmin(128)
16842       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16843   }
16844 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)16845   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
16846     TEST_REQUIRES_X86_SSE41;
16847     GemmMicrokernelTester()
16848       .mr(3)
16849       .nr(4)
16850       .kr(2)
16851       .sr(1)
16852       .m(3)
16853       .n(4)
16854       .k(8)
16855       .qmax(128)
16856       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16857   }
16858 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)16859   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
16860     TEST_REQUIRES_X86_SSE41;
16861     GemmMicrokernelTester()
16862       .mr(3)
16863       .nr(4)
16864       .kr(2)
16865       .sr(1)
16866       .m(3)
16867       .n(4)
16868       .k(8)
16869       .cm_stride(7)
16870       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16871   }
16872 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
16873 
16874 
16875 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)16876   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
16877     TEST_REQUIRES_X86_AVX;
16878     GemmMicrokernelTester()
16879       .mr(2)
16880       .nr(4)
16881       .kr(2)
16882       .sr(1)
16883       .m(2)
16884       .n(4)
16885       .k(8)
16886       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16887   }
16888 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)16889   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
16890     TEST_REQUIRES_X86_AVX;
16891     GemmMicrokernelTester()
16892       .mr(2)
16893       .nr(4)
16894       .kr(2)
16895       .sr(1)
16896       .m(2)
16897       .n(4)
16898       .k(8)
16899       .cn_stride(7)
16900       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16901   }
16902 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)16903   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
16904     TEST_REQUIRES_X86_AVX;
16905     for (uint32_t n = 1; n <= 4; n++) {
16906       for (uint32_t m = 1; m <= 2; m++) {
16907         GemmMicrokernelTester()
16908           .mr(2)
16909           .nr(4)
16910           .kr(2)
16911           .sr(1)
16912           .m(m)
16913           .n(n)
16914           .k(8)
16915           .iterations(1)
16916           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16917       }
16918     }
16919   }
16920 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)16921   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
16922     TEST_REQUIRES_X86_AVX;
16923     for (uint32_t m = 1; m <= 2; m++) {
16924       GemmMicrokernelTester()
16925         .mr(2)
16926         .nr(4)
16927         .kr(2)
16928         .sr(1)
16929         .m(m)
16930         .n(4)
16931         .k(8)
16932         .iterations(1)
16933         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16934     }
16935   }
16936 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)16937   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
16938     TEST_REQUIRES_X86_AVX;
16939     for (uint32_t n = 1; n <= 4; n++) {
16940       GemmMicrokernelTester()
16941         .mr(2)
16942         .nr(4)
16943         .kr(2)
16944         .sr(1)
16945         .m(2)
16946         .n(n)
16947         .k(8)
16948         .iterations(1)
16949         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16950     }
16951   }
16952 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)16953   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
16954     TEST_REQUIRES_X86_AVX;
16955     for (size_t k = 1; k < 8; k++) {
16956       GemmMicrokernelTester()
16957         .mr(2)
16958         .nr(4)
16959         .kr(2)
16960         .sr(1)
16961         .m(2)
16962         .n(4)
16963         .k(k)
16964         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16965     }
16966   }
16967 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)16968   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
16969     TEST_REQUIRES_X86_AVX;
16970     for (size_t k = 1; k < 8; k++) {
16971       for (uint32_t n = 1; n <= 4; n++) {
16972         for (uint32_t m = 1; m <= 2; m++) {
16973           GemmMicrokernelTester()
16974             .mr(2)
16975             .nr(4)
16976             .kr(2)
16977             .sr(1)
16978             .m(m)
16979             .n(n)
16980             .k(k)
16981             .iterations(1)
16982             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16983         }
16984       }
16985     }
16986   }
16987 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)16988   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
16989     TEST_REQUIRES_X86_AVX;
16990     for (size_t k = 9; k < 16; k++) {
16991       GemmMicrokernelTester()
16992         .mr(2)
16993         .nr(4)
16994         .kr(2)
16995         .sr(1)
16996         .m(2)
16997         .n(4)
16998         .k(k)
16999         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17000     }
17001   }
17002 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)17003   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
17004     TEST_REQUIRES_X86_AVX;
17005     for (size_t k = 9; k < 16; k++) {
17006       for (uint32_t n = 1; n <= 4; n++) {
17007         for (uint32_t m = 1; m <= 2; m++) {
17008           GemmMicrokernelTester()
17009             .mr(2)
17010             .nr(4)
17011             .kr(2)
17012             .sr(1)
17013             .m(m)
17014             .n(n)
17015             .k(k)
17016             .iterations(1)
17017             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17018         }
17019       }
17020     }
17021   }
17022 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)17023   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
17024     TEST_REQUIRES_X86_AVX;
17025     for (size_t k = 16; k <= 80; k += 8) {
17026       GemmMicrokernelTester()
17027         .mr(2)
17028         .nr(4)
17029         .kr(2)
17030         .sr(1)
17031         .m(2)
17032         .n(4)
17033         .k(k)
17034         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17035     }
17036   }
17037 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)17038   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
17039     TEST_REQUIRES_X86_AVX;
17040     for (size_t k = 16; k <= 80; k += 8) {
17041       for (uint32_t n = 1; n <= 4; n++) {
17042         for (uint32_t m = 1; m <= 2; m++) {
17043           GemmMicrokernelTester()
17044             .mr(2)
17045             .nr(4)
17046             .kr(2)
17047             .sr(1)
17048             .m(m)
17049             .n(n)
17050             .k(k)
17051             .iterations(1)
17052             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17053         }
17054       }
17055     }
17056   }
17057 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)17058   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
17059     TEST_REQUIRES_X86_AVX;
17060     for (uint32_t n = 5; n < 8; n++) {
17061       for (size_t k = 1; k <= 40; k += 9) {
17062         GemmMicrokernelTester()
17063           .mr(2)
17064           .nr(4)
17065           .kr(2)
17066           .sr(1)
17067           .m(2)
17068           .n(n)
17069           .k(k)
17070           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17071       }
17072     }
17073   }
17074 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)17075   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
17076     TEST_REQUIRES_X86_AVX;
17077     for (uint32_t n = 5; n < 8; n++) {
17078       for (size_t k = 1; k <= 40; k += 9) {
17079         GemmMicrokernelTester()
17080           .mr(2)
17081           .nr(4)
17082           .kr(2)
17083           .sr(1)
17084           .m(2)
17085           .n(n)
17086           .k(k)
17087           .cn_stride(7)
17088           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17089       }
17090     }
17091   }
17092 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)17093   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
17094     TEST_REQUIRES_X86_AVX;
17095     for (uint32_t n = 5; n < 8; n++) {
17096       for (size_t k = 1; k <= 40; k += 9) {
17097         for (uint32_t m = 1; m <= 2; m++) {
17098           GemmMicrokernelTester()
17099             .mr(2)
17100             .nr(4)
17101             .kr(2)
17102             .sr(1)
17103             .m(m)
17104             .n(n)
17105             .k(k)
17106             .iterations(1)
17107             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17108         }
17109       }
17110     }
17111   }
17112 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)17113   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
17114     TEST_REQUIRES_X86_AVX;
17115     for (uint32_t n = 8; n <= 12; n += 4) {
17116       for (size_t k = 1; k <= 40; k += 9) {
17117         GemmMicrokernelTester()
17118           .mr(2)
17119           .nr(4)
17120           .kr(2)
17121           .sr(1)
17122           .m(2)
17123           .n(n)
17124           .k(k)
17125           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17126       }
17127     }
17128   }
17129 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)17130   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
17131     TEST_REQUIRES_X86_AVX;
17132     for (uint32_t n = 8; n <= 12; n += 4) {
17133       for (size_t k = 1; k <= 40; k += 9) {
17134         GemmMicrokernelTester()
17135           .mr(2)
17136           .nr(4)
17137           .kr(2)
17138           .sr(1)
17139           .m(2)
17140           .n(n)
17141           .k(k)
17142           .cn_stride(7)
17143           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17144       }
17145     }
17146   }
17147 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)17148   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
17149     TEST_REQUIRES_X86_AVX;
17150     for (uint32_t n = 8; n <= 12; n += 4) {
17151       for (size_t k = 1; k <= 40; k += 9) {
17152         for (uint32_t m = 1; m <= 2; m++) {
17153           GemmMicrokernelTester()
17154             .mr(2)
17155             .nr(4)
17156             .kr(2)
17157             .sr(1)
17158             .m(m)
17159             .n(n)
17160             .k(k)
17161             .iterations(1)
17162             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17163         }
17164       }
17165     }
17166   }
17167 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel)17168   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
17169     TEST_REQUIRES_X86_AVX;
17170     for (size_t k = 1; k <= 40; k += 9) {
17171       GemmMicrokernelTester()
17172         .mr(2)
17173         .nr(4)
17174         .kr(2)
17175         .sr(1)
17176         .m(2)
17177         .n(4)
17178         .k(k)
17179         .ks(3)
17180         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17181     }
17182   }
17183 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel_subtile)17184   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
17185     TEST_REQUIRES_X86_AVX;
17186     for (size_t k = 1; k <= 40; k += 9) {
17187       for (uint32_t n = 1; n <= 4; n++) {
17188         for (uint32_t m = 1; m <= 2; m++) {
17189           GemmMicrokernelTester()
17190             .mr(2)
17191             .nr(4)
17192             .kr(2)
17193             .sr(1)
17194             .m(m)
17195             .n(n)
17196             .k(k)
17197             .ks(3)
17198             .iterations(1)
17199             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17200         }
17201       }
17202     }
17203   }
17204 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_small_kernel)17205   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
17206     TEST_REQUIRES_X86_AVX;
17207     for (uint32_t n = 5; n < 8; n++) {
17208       for (size_t k = 1; k <= 40; k += 9) {
17209         GemmMicrokernelTester()
17210           .mr(2)
17211           .nr(4)
17212           .kr(2)
17213           .sr(1)
17214           .m(2)
17215           .n(n)
17216           .k(k)
17217           .ks(3)
17218           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17219       }
17220     }
17221   }
17222 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_small_kernel)17223   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
17224     TEST_REQUIRES_X86_AVX;
17225     for (uint32_t n = 8; n <= 12; n += 4) {
17226       for (size_t k = 1; k <= 40; k += 9) {
17227         GemmMicrokernelTester()
17228           .mr(2)
17229           .nr(4)
17230           .kr(2)
17231           .sr(1)
17232           .m(2)
17233           .n(n)
17234           .k(k)
17235           .ks(3)
17236           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17237       }
17238     }
17239   }
17240 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)17241   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
17242     TEST_REQUIRES_X86_AVX;
17243     for (size_t k = 1; k <= 40; k += 9) {
17244       for (uint32_t n = 1; n <= 4; n++) {
17245         for (uint32_t m = 1; m <= 2; m++) {
17246           GemmMicrokernelTester()
17247             .mr(2)
17248             .nr(4)
17249             .kr(2)
17250             .sr(1)
17251             .m(m)
17252             .n(n)
17253             .k(k)
17254             .cm_stride(7)
17255             .iterations(1)
17256             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17257         }
17258       }
17259     }
17260   }
17261 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,a_offset)17262   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
17263     TEST_REQUIRES_X86_AVX;
17264     for (size_t k = 1; k <= 40; k += 9) {
17265       GemmMicrokernelTester()
17266         .mr(2)
17267         .nr(4)
17268         .kr(2)
17269         .sr(1)
17270         .m(2)
17271         .n(4)
17272         .k(k)
17273         .ks(3)
17274         .a_offset(83)
17275         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17276     }
17277   }
17278 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,zero)17279   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
17280     TEST_REQUIRES_X86_AVX;
17281     for (size_t k = 1; k <= 40; k += 9) {
17282       for (uint32_t mz = 0; mz < 2; mz++) {
17283         GemmMicrokernelTester()
17284           .mr(2)
17285           .nr(4)
17286           .kr(2)
17287           .sr(1)
17288           .m(2)
17289           .n(4)
17290           .k(k)
17291           .ks(3)
17292           .a_offset(83)
17293           .zero_index(mz)
17294           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17295       }
17296     }
17297   }
17298 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)17299   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
17300     TEST_REQUIRES_X86_AVX;
17301     GemmMicrokernelTester()
17302       .mr(2)
17303       .nr(4)
17304       .kr(2)
17305       .sr(1)
17306       .m(2)
17307       .n(4)
17308       .k(8)
17309       .qmin(128)
17310       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17311   }
17312 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)17313   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
17314     TEST_REQUIRES_X86_AVX;
17315     GemmMicrokernelTester()
17316       .mr(2)
17317       .nr(4)
17318       .kr(2)
17319       .sr(1)
17320       .m(2)
17321       .n(4)
17322       .k(8)
17323       .qmax(128)
17324       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17325   }
17326 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)17327   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
17328     TEST_REQUIRES_X86_AVX;
17329     GemmMicrokernelTester()
17330       .mr(2)
17331       .nr(4)
17332       .kr(2)
17333       .sr(1)
17334       .m(2)
17335       .n(4)
17336       .k(8)
17337       .cm_stride(7)
17338       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17339   }
17340 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17341 
17342 
17343 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)17344   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
17345     TEST_REQUIRES_X86_XOP;
17346     GemmMicrokernelTester()
17347       .mr(2)
17348       .nr(4)
17349       .kr(2)
17350       .sr(1)
17351       .m(2)
17352       .n(4)
17353       .k(8)
17354       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17355   }
17356 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)17357   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
17358     TEST_REQUIRES_X86_XOP;
17359     GemmMicrokernelTester()
17360       .mr(2)
17361       .nr(4)
17362       .kr(2)
17363       .sr(1)
17364       .m(2)
17365       .n(4)
17366       .k(8)
17367       .cn_stride(7)
17368       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17369   }
17370 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)17371   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
17372     TEST_REQUIRES_X86_XOP;
17373     for (uint32_t n = 1; n <= 4; n++) {
17374       for (uint32_t m = 1; m <= 2; m++) {
17375         GemmMicrokernelTester()
17376           .mr(2)
17377           .nr(4)
17378           .kr(2)
17379           .sr(1)
17380           .m(m)
17381           .n(n)
17382           .k(8)
17383           .iterations(1)
17384           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17385       }
17386     }
17387   }
17388 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)17389   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
17390     TEST_REQUIRES_X86_XOP;
17391     for (uint32_t m = 1; m <= 2; m++) {
17392       GemmMicrokernelTester()
17393         .mr(2)
17394         .nr(4)
17395         .kr(2)
17396         .sr(1)
17397         .m(m)
17398         .n(4)
17399         .k(8)
17400         .iterations(1)
17401         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17402     }
17403   }
17404 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)17405   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
17406     TEST_REQUIRES_X86_XOP;
17407     for (uint32_t n = 1; n <= 4; n++) {
17408       GemmMicrokernelTester()
17409         .mr(2)
17410         .nr(4)
17411         .kr(2)
17412         .sr(1)
17413         .m(2)
17414         .n(n)
17415         .k(8)
17416         .iterations(1)
17417         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17418     }
17419   }
17420 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)17421   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
17422     TEST_REQUIRES_X86_XOP;
17423     for (size_t k = 1; k < 8; k++) {
17424       GemmMicrokernelTester()
17425         .mr(2)
17426         .nr(4)
17427         .kr(2)
17428         .sr(1)
17429         .m(2)
17430         .n(4)
17431         .k(k)
17432         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17433     }
17434   }
17435 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)17436   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
17437     TEST_REQUIRES_X86_XOP;
17438     for (size_t k = 1; k < 8; k++) {
17439       for (uint32_t n = 1; n <= 4; n++) {
17440         for (uint32_t m = 1; m <= 2; m++) {
17441           GemmMicrokernelTester()
17442             .mr(2)
17443             .nr(4)
17444             .kr(2)
17445             .sr(1)
17446             .m(m)
17447             .n(n)
17448             .k(k)
17449             .iterations(1)
17450             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17451         }
17452       }
17453     }
17454   }
17455 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)17456   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
17457     TEST_REQUIRES_X86_XOP;
17458     for (size_t k = 9; k < 16; k++) {
17459       GemmMicrokernelTester()
17460         .mr(2)
17461         .nr(4)
17462         .kr(2)
17463         .sr(1)
17464         .m(2)
17465         .n(4)
17466         .k(k)
17467         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17468     }
17469   }
17470 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)17471   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
17472     TEST_REQUIRES_X86_XOP;
17473     for (size_t k = 9; k < 16; k++) {
17474       for (uint32_t n = 1; n <= 4; n++) {
17475         for (uint32_t m = 1; m <= 2; m++) {
17476           GemmMicrokernelTester()
17477             .mr(2)
17478             .nr(4)
17479             .kr(2)
17480             .sr(1)
17481             .m(m)
17482             .n(n)
17483             .k(k)
17484             .iterations(1)
17485             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17486         }
17487       }
17488     }
17489   }
17490 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)17491   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
17492     TEST_REQUIRES_X86_XOP;
17493     for (size_t k = 16; k <= 80; k += 8) {
17494       GemmMicrokernelTester()
17495         .mr(2)
17496         .nr(4)
17497         .kr(2)
17498         .sr(1)
17499         .m(2)
17500         .n(4)
17501         .k(k)
17502         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17503     }
17504   }
17505 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)17506   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
17507     TEST_REQUIRES_X86_XOP;
17508     for (size_t k = 16; k <= 80; k += 8) {
17509       for (uint32_t n = 1; n <= 4; n++) {
17510         for (uint32_t m = 1; m <= 2; m++) {
17511           GemmMicrokernelTester()
17512             .mr(2)
17513             .nr(4)
17514             .kr(2)
17515             .sr(1)
17516             .m(m)
17517             .n(n)
17518             .k(k)
17519             .iterations(1)
17520             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17521         }
17522       }
17523     }
17524   }
17525 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)17526   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
17527     TEST_REQUIRES_X86_XOP;
17528     for (uint32_t n = 5; n < 8; n++) {
17529       for (size_t k = 1; k <= 40; k += 9) {
17530         GemmMicrokernelTester()
17531           .mr(2)
17532           .nr(4)
17533           .kr(2)
17534           .sr(1)
17535           .m(2)
17536           .n(n)
17537           .k(k)
17538           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17539       }
17540     }
17541   }
17542 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)17543   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
17544     TEST_REQUIRES_X86_XOP;
17545     for (uint32_t n = 5; n < 8; n++) {
17546       for (size_t k = 1; k <= 40; k += 9) {
17547         GemmMicrokernelTester()
17548           .mr(2)
17549           .nr(4)
17550           .kr(2)
17551           .sr(1)
17552           .m(2)
17553           .n(n)
17554           .k(k)
17555           .cn_stride(7)
17556           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17557       }
17558     }
17559   }
17560 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)17561   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
17562     TEST_REQUIRES_X86_XOP;
17563     for (uint32_t n = 5; n < 8; n++) {
17564       for (size_t k = 1; k <= 40; k += 9) {
17565         for (uint32_t m = 1; m <= 2; m++) {
17566           GemmMicrokernelTester()
17567             .mr(2)
17568             .nr(4)
17569             .kr(2)
17570             .sr(1)
17571             .m(m)
17572             .n(n)
17573             .k(k)
17574             .iterations(1)
17575             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17576         }
17577       }
17578     }
17579   }
17580 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)17581   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
17582     TEST_REQUIRES_X86_XOP;
17583     for (uint32_t n = 8; n <= 12; n += 4) {
17584       for (size_t k = 1; k <= 40; k += 9) {
17585         GemmMicrokernelTester()
17586           .mr(2)
17587           .nr(4)
17588           .kr(2)
17589           .sr(1)
17590           .m(2)
17591           .n(n)
17592           .k(k)
17593           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17594       }
17595     }
17596   }
17597 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)17598   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
17599     TEST_REQUIRES_X86_XOP;
17600     for (uint32_t n = 8; n <= 12; n += 4) {
17601       for (size_t k = 1; k <= 40; k += 9) {
17602         GemmMicrokernelTester()
17603           .mr(2)
17604           .nr(4)
17605           .kr(2)
17606           .sr(1)
17607           .m(2)
17608           .n(n)
17609           .k(k)
17610           .cn_stride(7)
17611           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17612       }
17613     }
17614   }
17615 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)17616   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
17617     TEST_REQUIRES_X86_XOP;
17618     for (uint32_t n = 8; n <= 12; n += 4) {
17619       for (size_t k = 1; k <= 40; k += 9) {
17620         for (uint32_t m = 1; m <= 2; m++) {
17621           GemmMicrokernelTester()
17622             .mr(2)
17623             .nr(4)
17624             .kr(2)
17625             .sr(1)
17626             .m(m)
17627             .n(n)
17628             .k(k)
17629             .iterations(1)
17630             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17631         }
17632       }
17633     }
17634   }
17635 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel)17636   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
17637     TEST_REQUIRES_X86_XOP;
17638     for (size_t k = 1; k <= 40; k += 9) {
17639       GemmMicrokernelTester()
17640         .mr(2)
17641         .nr(4)
17642         .kr(2)
17643         .sr(1)
17644         .m(2)
17645         .n(4)
17646         .k(k)
17647         .ks(3)
17648         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17649     }
17650   }
17651 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel_subtile)17652   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
17653     TEST_REQUIRES_X86_XOP;
17654     for (size_t k = 1; k <= 40; k += 9) {
17655       for (uint32_t n = 1; n <= 4; n++) {
17656         for (uint32_t m = 1; m <= 2; m++) {
17657           GemmMicrokernelTester()
17658             .mr(2)
17659             .nr(4)
17660             .kr(2)
17661             .sr(1)
17662             .m(m)
17663             .n(n)
17664             .k(k)
17665             .ks(3)
17666             .iterations(1)
17667             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17668         }
17669       }
17670     }
17671   }
17672 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_small_kernel)17673   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
17674     TEST_REQUIRES_X86_XOP;
17675     for (uint32_t n = 5; n < 8; n++) {
17676       for (size_t k = 1; k <= 40; k += 9) {
17677         GemmMicrokernelTester()
17678           .mr(2)
17679           .nr(4)
17680           .kr(2)
17681           .sr(1)
17682           .m(2)
17683           .n(n)
17684           .k(k)
17685           .ks(3)
17686           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17687       }
17688     }
17689   }
17690 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_small_kernel)17691   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
17692     TEST_REQUIRES_X86_XOP;
17693     for (uint32_t n = 8; n <= 12; n += 4) {
17694       for (size_t k = 1; k <= 40; k += 9) {
17695         GemmMicrokernelTester()
17696           .mr(2)
17697           .nr(4)
17698           .kr(2)
17699           .sr(1)
17700           .m(2)
17701           .n(n)
17702           .k(k)
17703           .ks(3)
17704           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17705       }
17706     }
17707   }
17708 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)17709   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
17710     TEST_REQUIRES_X86_XOP;
17711     for (size_t k = 1; k <= 40; k += 9) {
17712       for (uint32_t n = 1; n <= 4; n++) {
17713         for (uint32_t m = 1; m <= 2; m++) {
17714           GemmMicrokernelTester()
17715             .mr(2)
17716             .nr(4)
17717             .kr(2)
17718             .sr(1)
17719             .m(m)
17720             .n(n)
17721             .k(k)
17722             .cm_stride(7)
17723             .iterations(1)
17724             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17725         }
17726       }
17727     }
17728   }
17729 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,a_offset)17730   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
17731     TEST_REQUIRES_X86_XOP;
17732     for (size_t k = 1; k <= 40; k += 9) {
17733       GemmMicrokernelTester()
17734         .mr(2)
17735         .nr(4)
17736         .kr(2)
17737         .sr(1)
17738         .m(2)
17739         .n(4)
17740         .k(k)
17741         .ks(3)
17742         .a_offset(83)
17743         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17744     }
17745   }
17746 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,zero)17747   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
17748     TEST_REQUIRES_X86_XOP;
17749     for (size_t k = 1; k <= 40; k += 9) {
17750       for (uint32_t mz = 0; mz < 2; mz++) {
17751         GemmMicrokernelTester()
17752           .mr(2)
17753           .nr(4)
17754           .kr(2)
17755           .sr(1)
17756           .m(2)
17757           .n(4)
17758           .k(k)
17759           .ks(3)
17760           .a_offset(83)
17761           .zero_index(mz)
17762           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17763       }
17764     }
17765   }
17766 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)17767   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
17768     TEST_REQUIRES_X86_XOP;
17769     GemmMicrokernelTester()
17770       .mr(2)
17771       .nr(4)
17772       .kr(2)
17773       .sr(1)
17774       .m(2)
17775       .n(4)
17776       .k(8)
17777       .qmin(128)
17778       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17779   }
17780 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)17781   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
17782     TEST_REQUIRES_X86_XOP;
17783     GemmMicrokernelTester()
17784       .mr(2)
17785       .nr(4)
17786       .kr(2)
17787       .sr(1)
17788       .m(2)
17789       .n(4)
17790       .k(8)
17791       .qmax(128)
17792       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17793   }
17794 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)17795   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
17796     TEST_REQUIRES_X86_XOP;
17797     GemmMicrokernelTester()
17798       .mr(2)
17799       .nr(4)
17800       .kr(2)
17801       .sr(1)
17802       .m(2)
17803       .n(4)
17804       .k(8)
17805       .cm_stride(7)
17806       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17807   }
17808 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
17809 
17810 
17811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)17812   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
17813     TEST_REQUIRES_X86_AVX;
17814     GemmMicrokernelTester()
17815       .mr(3)
17816       .nr(4)
17817       .kr(2)
17818       .sr(1)
17819       .m(3)
17820       .n(4)
17821       .k(8)
17822       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17823   }
17824 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)17825   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
17826     TEST_REQUIRES_X86_AVX;
17827     GemmMicrokernelTester()
17828       .mr(3)
17829       .nr(4)
17830       .kr(2)
17831       .sr(1)
17832       .m(3)
17833       .n(4)
17834       .k(8)
17835       .cn_stride(7)
17836       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17837   }
17838 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)17839   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
17840     TEST_REQUIRES_X86_AVX;
17841     for (uint32_t n = 1; n <= 4; n++) {
17842       for (uint32_t m = 1; m <= 3; m++) {
17843         GemmMicrokernelTester()
17844           .mr(3)
17845           .nr(4)
17846           .kr(2)
17847           .sr(1)
17848           .m(m)
17849           .n(n)
17850           .k(8)
17851           .iterations(1)
17852           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17853       }
17854     }
17855   }
17856 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)17857   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
17858     TEST_REQUIRES_X86_AVX;
17859     for (uint32_t m = 1; m <= 3; m++) {
17860       GemmMicrokernelTester()
17861         .mr(3)
17862         .nr(4)
17863         .kr(2)
17864         .sr(1)
17865         .m(m)
17866         .n(4)
17867         .k(8)
17868         .iterations(1)
17869         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17870     }
17871   }
17872 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)17873   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
17874     TEST_REQUIRES_X86_AVX;
17875     for (uint32_t n = 1; n <= 4; n++) {
17876       GemmMicrokernelTester()
17877         .mr(3)
17878         .nr(4)
17879         .kr(2)
17880         .sr(1)
17881         .m(3)
17882         .n(n)
17883         .k(8)
17884         .iterations(1)
17885         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17886     }
17887   }
17888 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)17889   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
17890     TEST_REQUIRES_X86_AVX;
17891     for (size_t k = 1; k < 8; k++) {
17892       GemmMicrokernelTester()
17893         .mr(3)
17894         .nr(4)
17895         .kr(2)
17896         .sr(1)
17897         .m(3)
17898         .n(4)
17899         .k(k)
17900         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17901     }
17902   }
17903 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)17904   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
17905     TEST_REQUIRES_X86_AVX;
17906     for (size_t k = 1; k < 8; k++) {
17907       for (uint32_t n = 1; n <= 4; n++) {
17908         for (uint32_t m = 1; m <= 3; m++) {
17909           GemmMicrokernelTester()
17910             .mr(3)
17911             .nr(4)
17912             .kr(2)
17913             .sr(1)
17914             .m(m)
17915             .n(n)
17916             .k(k)
17917             .iterations(1)
17918             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17919         }
17920       }
17921     }
17922   }
17923 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)17924   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
17925     TEST_REQUIRES_X86_AVX;
17926     for (size_t k = 9; k < 16; k++) {
17927       GemmMicrokernelTester()
17928         .mr(3)
17929         .nr(4)
17930         .kr(2)
17931         .sr(1)
17932         .m(3)
17933         .n(4)
17934         .k(k)
17935         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17936     }
17937   }
17938 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)17939   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
17940     TEST_REQUIRES_X86_AVX;
17941     for (size_t k = 9; k < 16; k++) {
17942       for (uint32_t n = 1; n <= 4; n++) {
17943         for (uint32_t m = 1; m <= 3; m++) {
17944           GemmMicrokernelTester()
17945             .mr(3)
17946             .nr(4)
17947             .kr(2)
17948             .sr(1)
17949             .m(m)
17950             .n(n)
17951             .k(k)
17952             .iterations(1)
17953             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17954         }
17955       }
17956     }
17957   }
17958 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)17959   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
17960     TEST_REQUIRES_X86_AVX;
17961     for (size_t k = 16; k <= 80; k += 8) {
17962       GemmMicrokernelTester()
17963         .mr(3)
17964         .nr(4)
17965         .kr(2)
17966         .sr(1)
17967         .m(3)
17968         .n(4)
17969         .k(k)
17970         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17971     }
17972   }
17973 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)17974   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
17975     TEST_REQUIRES_X86_AVX;
17976     for (size_t k = 16; k <= 80; k += 8) {
17977       for (uint32_t n = 1; n <= 4; n++) {
17978         for (uint32_t m = 1; m <= 3; m++) {
17979           GemmMicrokernelTester()
17980             .mr(3)
17981             .nr(4)
17982             .kr(2)
17983             .sr(1)
17984             .m(m)
17985             .n(n)
17986             .k(k)
17987             .iterations(1)
17988             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17989         }
17990       }
17991     }
17992   }
17993 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)17994   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
17995     TEST_REQUIRES_X86_AVX;
17996     for (uint32_t n = 5; n < 8; n++) {
17997       for (size_t k = 1; k <= 40; k += 9) {
17998         GemmMicrokernelTester()
17999           .mr(3)
18000           .nr(4)
18001           .kr(2)
18002           .sr(1)
18003           .m(3)
18004           .n(n)
18005           .k(k)
18006           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18007       }
18008     }
18009   }
18010 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)18011   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
18012     TEST_REQUIRES_X86_AVX;
18013     for (uint32_t n = 5; n < 8; n++) {
18014       for (size_t k = 1; k <= 40; k += 9) {
18015         GemmMicrokernelTester()
18016           .mr(3)
18017           .nr(4)
18018           .kr(2)
18019           .sr(1)
18020           .m(3)
18021           .n(n)
18022           .k(k)
18023           .cn_stride(7)
18024           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18025       }
18026     }
18027   }
18028 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)18029   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
18030     TEST_REQUIRES_X86_AVX;
18031     for (uint32_t n = 5; n < 8; n++) {
18032       for (size_t k = 1; k <= 40; k += 9) {
18033         for (uint32_t m = 1; m <= 3; m++) {
18034           GemmMicrokernelTester()
18035             .mr(3)
18036             .nr(4)
18037             .kr(2)
18038             .sr(1)
18039             .m(m)
18040             .n(n)
18041             .k(k)
18042             .iterations(1)
18043             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18044         }
18045       }
18046     }
18047   }
18048 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)18049   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
18050     TEST_REQUIRES_X86_AVX;
18051     for (uint32_t n = 8; n <= 12; n += 4) {
18052       for (size_t k = 1; k <= 40; k += 9) {
18053         GemmMicrokernelTester()
18054           .mr(3)
18055           .nr(4)
18056           .kr(2)
18057           .sr(1)
18058           .m(3)
18059           .n(n)
18060           .k(k)
18061           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18062       }
18063     }
18064   }
18065 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)18066   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
18067     TEST_REQUIRES_X86_AVX;
18068     for (uint32_t n = 8; n <= 12; n += 4) {
18069       for (size_t k = 1; k <= 40; k += 9) {
18070         GemmMicrokernelTester()
18071           .mr(3)
18072           .nr(4)
18073           .kr(2)
18074           .sr(1)
18075           .m(3)
18076           .n(n)
18077           .k(k)
18078           .cn_stride(7)
18079           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18080       }
18081     }
18082   }
18083 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)18084   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
18085     TEST_REQUIRES_X86_AVX;
18086     for (uint32_t n = 8; n <= 12; n += 4) {
18087       for (size_t k = 1; k <= 40; k += 9) {
18088         for (uint32_t m = 1; m <= 3; m++) {
18089           GemmMicrokernelTester()
18090             .mr(3)
18091             .nr(4)
18092             .kr(2)
18093             .sr(1)
18094             .m(m)
18095             .n(n)
18096             .k(k)
18097             .iterations(1)
18098             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18099         }
18100       }
18101     }
18102   }
18103 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)18104   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
18105     TEST_REQUIRES_X86_AVX;
18106     for (size_t k = 1; k <= 40; k += 9) {
18107       GemmMicrokernelTester()
18108         .mr(3)
18109         .nr(4)
18110         .kr(2)
18111         .sr(1)
18112         .m(3)
18113         .n(4)
18114         .k(k)
18115         .ks(3)
18116         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18117     }
18118   }
18119 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)18120   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
18121     TEST_REQUIRES_X86_AVX;
18122     for (size_t k = 1; k <= 40; k += 9) {
18123       for (uint32_t n = 1; n <= 4; n++) {
18124         for (uint32_t m = 1; m <= 3; m++) {
18125           GemmMicrokernelTester()
18126             .mr(3)
18127             .nr(4)
18128             .kr(2)
18129             .sr(1)
18130             .m(m)
18131             .n(n)
18132             .k(k)
18133             .ks(3)
18134             .iterations(1)
18135             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18136         }
18137       }
18138     }
18139   }
18140 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)18141   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
18142     TEST_REQUIRES_X86_AVX;
18143     for (uint32_t n = 5; n < 8; n++) {
18144       for (size_t k = 1; k <= 40; k += 9) {
18145         GemmMicrokernelTester()
18146           .mr(3)
18147           .nr(4)
18148           .kr(2)
18149           .sr(1)
18150           .m(3)
18151           .n(n)
18152           .k(k)
18153           .ks(3)
18154           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18155       }
18156     }
18157   }
18158 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)18159   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
18160     TEST_REQUIRES_X86_AVX;
18161     for (uint32_t n = 8; n <= 12; n += 4) {
18162       for (size_t k = 1; k <= 40; k += 9) {
18163         GemmMicrokernelTester()
18164           .mr(3)
18165           .nr(4)
18166           .kr(2)
18167           .sr(1)
18168           .m(3)
18169           .n(n)
18170           .k(k)
18171           .ks(3)
18172           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18173       }
18174     }
18175   }
18176 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)18177   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
18178     TEST_REQUIRES_X86_AVX;
18179     for (size_t k = 1; k <= 40; k += 9) {
18180       for (uint32_t n = 1; n <= 4; n++) {
18181         for (uint32_t m = 1; m <= 3; m++) {
18182           GemmMicrokernelTester()
18183             .mr(3)
18184             .nr(4)
18185             .kr(2)
18186             .sr(1)
18187             .m(m)
18188             .n(n)
18189             .k(k)
18190             .cm_stride(7)
18191             .iterations(1)
18192             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18193         }
18194       }
18195     }
18196   }
18197 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)18198   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
18199     TEST_REQUIRES_X86_AVX;
18200     for (size_t k = 1; k <= 40; k += 9) {
18201       GemmMicrokernelTester()
18202         .mr(3)
18203         .nr(4)
18204         .kr(2)
18205         .sr(1)
18206         .m(3)
18207         .n(4)
18208         .k(k)
18209         .ks(3)
18210         .a_offset(127)
18211         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18212     }
18213   }
18214 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)18215   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
18216     TEST_REQUIRES_X86_AVX;
18217     for (size_t k = 1; k <= 40; k += 9) {
18218       for (uint32_t mz = 0; mz < 3; mz++) {
18219         GemmMicrokernelTester()
18220           .mr(3)
18221           .nr(4)
18222           .kr(2)
18223           .sr(1)
18224           .m(3)
18225           .n(4)
18226           .k(k)
18227           .ks(3)
18228           .a_offset(127)
18229           .zero_index(mz)
18230           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18231       }
18232     }
18233   }
18234 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)18235   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
18236     TEST_REQUIRES_X86_AVX;
18237     GemmMicrokernelTester()
18238       .mr(3)
18239       .nr(4)
18240       .kr(2)
18241       .sr(1)
18242       .m(3)
18243       .n(4)
18244       .k(8)
18245       .qmin(128)
18246       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18247   }
18248 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)18249   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
18250     TEST_REQUIRES_X86_AVX;
18251     GemmMicrokernelTester()
18252       .mr(3)
18253       .nr(4)
18254       .kr(2)
18255       .sr(1)
18256       .m(3)
18257       .n(4)
18258       .k(8)
18259       .qmax(128)
18260       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18261   }
18262 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)18263   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
18264     TEST_REQUIRES_X86_AVX;
18265     GemmMicrokernelTester()
18266       .mr(3)
18267       .nr(4)
18268       .kr(2)
18269       .sr(1)
18270       .m(3)
18271       .n(4)
18272       .k(8)
18273       .cm_stride(7)
18274       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18275   }
18276 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18277 
18278 
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)18280   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
18281     TEST_REQUIRES_X86_XOP;
18282     GemmMicrokernelTester()
18283       .mr(3)
18284       .nr(4)
18285       .kr(2)
18286       .sr(1)
18287       .m(3)
18288       .n(4)
18289       .k(8)
18290       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18291   }
18292 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)18293   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
18294     TEST_REQUIRES_X86_XOP;
18295     GemmMicrokernelTester()
18296       .mr(3)
18297       .nr(4)
18298       .kr(2)
18299       .sr(1)
18300       .m(3)
18301       .n(4)
18302       .k(8)
18303       .cn_stride(7)
18304       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18305   }
18306 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)18307   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
18308     TEST_REQUIRES_X86_XOP;
18309     for (uint32_t n = 1; n <= 4; n++) {
18310       for (uint32_t m = 1; m <= 3; m++) {
18311         GemmMicrokernelTester()
18312           .mr(3)
18313           .nr(4)
18314           .kr(2)
18315           .sr(1)
18316           .m(m)
18317           .n(n)
18318           .k(8)
18319           .iterations(1)
18320           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18321       }
18322     }
18323   }
18324 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)18325   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
18326     TEST_REQUIRES_X86_XOP;
18327     for (uint32_t m = 1; m <= 3; m++) {
18328       GemmMicrokernelTester()
18329         .mr(3)
18330         .nr(4)
18331         .kr(2)
18332         .sr(1)
18333         .m(m)
18334         .n(4)
18335         .k(8)
18336         .iterations(1)
18337         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18338     }
18339   }
18340 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)18341   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
18342     TEST_REQUIRES_X86_XOP;
18343     for (uint32_t n = 1; n <= 4; n++) {
18344       GemmMicrokernelTester()
18345         .mr(3)
18346         .nr(4)
18347         .kr(2)
18348         .sr(1)
18349         .m(3)
18350         .n(n)
18351         .k(8)
18352         .iterations(1)
18353         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18354     }
18355   }
18356 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)18357   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
18358     TEST_REQUIRES_X86_XOP;
18359     for (size_t k = 1; k < 8; k++) {
18360       GemmMicrokernelTester()
18361         .mr(3)
18362         .nr(4)
18363         .kr(2)
18364         .sr(1)
18365         .m(3)
18366         .n(4)
18367         .k(k)
18368         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18369     }
18370   }
18371 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)18372   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
18373     TEST_REQUIRES_X86_XOP;
18374     for (size_t k = 1; k < 8; k++) {
18375       for (uint32_t n = 1; n <= 4; n++) {
18376         for (uint32_t m = 1; m <= 3; m++) {
18377           GemmMicrokernelTester()
18378             .mr(3)
18379             .nr(4)
18380             .kr(2)
18381             .sr(1)
18382             .m(m)
18383             .n(n)
18384             .k(k)
18385             .iterations(1)
18386             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18387         }
18388       }
18389     }
18390   }
18391 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)18392   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
18393     TEST_REQUIRES_X86_XOP;
18394     for (size_t k = 9; k < 16; k++) {
18395       GemmMicrokernelTester()
18396         .mr(3)
18397         .nr(4)
18398         .kr(2)
18399         .sr(1)
18400         .m(3)
18401         .n(4)
18402         .k(k)
18403         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18404     }
18405   }
18406 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)18407   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
18408     TEST_REQUIRES_X86_XOP;
18409     for (size_t k = 9; k < 16; k++) {
18410       for (uint32_t n = 1; n <= 4; n++) {
18411         for (uint32_t m = 1; m <= 3; m++) {
18412           GemmMicrokernelTester()
18413             .mr(3)
18414             .nr(4)
18415             .kr(2)
18416             .sr(1)
18417             .m(m)
18418             .n(n)
18419             .k(k)
18420             .iterations(1)
18421             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18422         }
18423       }
18424     }
18425   }
18426 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)18427   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
18428     TEST_REQUIRES_X86_XOP;
18429     for (size_t k = 16; k <= 80; k += 8) {
18430       GemmMicrokernelTester()
18431         .mr(3)
18432         .nr(4)
18433         .kr(2)
18434         .sr(1)
18435         .m(3)
18436         .n(4)
18437         .k(k)
18438         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18439     }
18440   }
18441 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)18442   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
18443     TEST_REQUIRES_X86_XOP;
18444     for (size_t k = 16; k <= 80; k += 8) {
18445       for (uint32_t n = 1; n <= 4; n++) {
18446         for (uint32_t m = 1; m <= 3; m++) {
18447           GemmMicrokernelTester()
18448             .mr(3)
18449             .nr(4)
18450             .kr(2)
18451             .sr(1)
18452             .m(m)
18453             .n(n)
18454             .k(k)
18455             .iterations(1)
18456             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18457         }
18458       }
18459     }
18460   }
18461 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)18462   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
18463     TEST_REQUIRES_X86_XOP;
18464     for (uint32_t n = 5; n < 8; n++) {
18465       for (size_t k = 1; k <= 40; k += 9) {
18466         GemmMicrokernelTester()
18467           .mr(3)
18468           .nr(4)
18469           .kr(2)
18470           .sr(1)
18471           .m(3)
18472           .n(n)
18473           .k(k)
18474           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18475       }
18476     }
18477   }
18478 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)18479   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
18480     TEST_REQUIRES_X86_XOP;
18481     for (uint32_t n = 5; n < 8; n++) {
18482       for (size_t k = 1; k <= 40; k += 9) {
18483         GemmMicrokernelTester()
18484           .mr(3)
18485           .nr(4)
18486           .kr(2)
18487           .sr(1)
18488           .m(3)
18489           .n(n)
18490           .k(k)
18491           .cn_stride(7)
18492           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18493       }
18494     }
18495   }
18496 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)18497   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
18498     TEST_REQUIRES_X86_XOP;
18499     for (uint32_t n = 5; n < 8; n++) {
18500       for (size_t k = 1; k <= 40; k += 9) {
18501         for (uint32_t m = 1; m <= 3; m++) {
18502           GemmMicrokernelTester()
18503             .mr(3)
18504             .nr(4)
18505             .kr(2)
18506             .sr(1)
18507             .m(m)
18508             .n(n)
18509             .k(k)
18510             .iterations(1)
18511             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18512         }
18513       }
18514     }
18515   }
18516 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)18517   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
18518     TEST_REQUIRES_X86_XOP;
18519     for (uint32_t n = 8; n <= 12; n += 4) {
18520       for (size_t k = 1; k <= 40; k += 9) {
18521         GemmMicrokernelTester()
18522           .mr(3)
18523           .nr(4)
18524           .kr(2)
18525           .sr(1)
18526           .m(3)
18527           .n(n)
18528           .k(k)
18529           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18530       }
18531     }
18532   }
18533 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)18534   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
18535     TEST_REQUIRES_X86_XOP;
18536     for (uint32_t n = 8; n <= 12; n += 4) {
18537       for (size_t k = 1; k <= 40; k += 9) {
18538         GemmMicrokernelTester()
18539           .mr(3)
18540           .nr(4)
18541           .kr(2)
18542           .sr(1)
18543           .m(3)
18544           .n(n)
18545           .k(k)
18546           .cn_stride(7)
18547           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18548       }
18549     }
18550   }
18551 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)18552   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
18553     TEST_REQUIRES_X86_XOP;
18554     for (uint32_t n = 8; n <= 12; n += 4) {
18555       for (size_t k = 1; k <= 40; k += 9) {
18556         for (uint32_t m = 1; m <= 3; m++) {
18557           GemmMicrokernelTester()
18558             .mr(3)
18559             .nr(4)
18560             .kr(2)
18561             .sr(1)
18562             .m(m)
18563             .n(n)
18564             .k(k)
18565             .iterations(1)
18566             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18567         }
18568       }
18569     }
18570   }
18571 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel)18572   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
18573     TEST_REQUIRES_X86_XOP;
18574     for (size_t k = 1; k <= 40; k += 9) {
18575       GemmMicrokernelTester()
18576         .mr(3)
18577         .nr(4)
18578         .kr(2)
18579         .sr(1)
18580         .m(3)
18581         .n(4)
18582         .k(k)
18583         .ks(3)
18584         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18585     }
18586   }
18587 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel_subtile)18588   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
18589     TEST_REQUIRES_X86_XOP;
18590     for (size_t k = 1; k <= 40; k += 9) {
18591       for (uint32_t n = 1; n <= 4; n++) {
18592         for (uint32_t m = 1; m <= 3; m++) {
18593           GemmMicrokernelTester()
18594             .mr(3)
18595             .nr(4)
18596             .kr(2)
18597             .sr(1)
18598             .m(m)
18599             .n(n)
18600             .k(k)
18601             .ks(3)
18602             .iterations(1)
18603             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18604         }
18605       }
18606     }
18607   }
18608 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_small_kernel)18609   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
18610     TEST_REQUIRES_X86_XOP;
18611     for (uint32_t n = 5; n < 8; n++) {
18612       for (size_t k = 1; k <= 40; k += 9) {
18613         GemmMicrokernelTester()
18614           .mr(3)
18615           .nr(4)
18616           .kr(2)
18617           .sr(1)
18618           .m(3)
18619           .n(n)
18620           .k(k)
18621           .ks(3)
18622           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18623       }
18624     }
18625   }
18626 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_small_kernel)18627   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
18628     TEST_REQUIRES_X86_XOP;
18629     for (uint32_t n = 8; n <= 12; n += 4) {
18630       for (size_t k = 1; k <= 40; k += 9) {
18631         GemmMicrokernelTester()
18632           .mr(3)
18633           .nr(4)
18634           .kr(2)
18635           .sr(1)
18636           .m(3)
18637           .n(n)
18638           .k(k)
18639           .ks(3)
18640           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18641       }
18642     }
18643   }
18644 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)18645   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
18646     TEST_REQUIRES_X86_XOP;
18647     for (size_t k = 1; k <= 40; k += 9) {
18648       for (uint32_t n = 1; n <= 4; n++) {
18649         for (uint32_t m = 1; m <= 3; m++) {
18650           GemmMicrokernelTester()
18651             .mr(3)
18652             .nr(4)
18653             .kr(2)
18654             .sr(1)
18655             .m(m)
18656             .n(n)
18657             .k(k)
18658             .cm_stride(7)
18659             .iterations(1)
18660             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18661         }
18662       }
18663     }
18664   }
18665 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,a_offset)18666   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
18667     TEST_REQUIRES_X86_XOP;
18668     for (size_t k = 1; k <= 40; k += 9) {
18669       GemmMicrokernelTester()
18670         .mr(3)
18671         .nr(4)
18672         .kr(2)
18673         .sr(1)
18674         .m(3)
18675         .n(4)
18676         .k(k)
18677         .ks(3)
18678         .a_offset(127)
18679         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18680     }
18681   }
18682 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,zero)18683   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
18684     TEST_REQUIRES_X86_XOP;
18685     for (size_t k = 1; k <= 40; k += 9) {
18686       for (uint32_t mz = 0; mz < 3; mz++) {
18687         GemmMicrokernelTester()
18688           .mr(3)
18689           .nr(4)
18690           .kr(2)
18691           .sr(1)
18692           .m(3)
18693           .n(4)
18694           .k(k)
18695           .ks(3)
18696           .a_offset(127)
18697           .zero_index(mz)
18698           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18699       }
18700     }
18701   }
18702 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)18703   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
18704     TEST_REQUIRES_X86_XOP;
18705     GemmMicrokernelTester()
18706       .mr(3)
18707       .nr(4)
18708       .kr(2)
18709       .sr(1)
18710       .m(3)
18711       .n(4)
18712       .k(8)
18713       .qmin(128)
18714       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18715   }
18716 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)18717   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
18718     TEST_REQUIRES_X86_XOP;
18719     GemmMicrokernelTester()
18720       .mr(3)
18721       .nr(4)
18722       .kr(2)
18723       .sr(1)
18724       .m(3)
18725       .n(4)
18726       .k(8)
18727       .qmax(128)
18728       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18729   }
18730 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)18731   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
18732     TEST_REQUIRES_X86_XOP;
18733     GemmMicrokernelTester()
18734       .mr(3)
18735       .nr(4)
18736       .kr(2)
18737       .sr(1)
18738       .m(3)
18739       .n(4)
18740       .k(8)
18741       .cm_stride(7)
18742       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18743   }
18744 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745 
18746 
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)18748   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
18749     TEST_REQUIRES_X86_AVX;
18750     GemmMicrokernelTester()
18751       .mr(4)
18752       .nr(4)
18753       .kr(2)
18754       .sr(1)
18755       .m(4)
18756       .n(4)
18757       .k(8)
18758       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759   }
18760 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)18761   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
18762     TEST_REQUIRES_X86_AVX;
18763     GemmMicrokernelTester()
18764       .mr(4)
18765       .nr(4)
18766       .kr(2)
18767       .sr(1)
18768       .m(4)
18769       .n(4)
18770       .k(8)
18771       .cn_stride(7)
18772       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773   }
18774 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)18775   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
18776     TEST_REQUIRES_X86_AVX;
18777     for (uint32_t n = 1; n <= 4; n++) {
18778       for (uint32_t m = 1; m <= 4; m++) {
18779         GemmMicrokernelTester()
18780           .mr(4)
18781           .nr(4)
18782           .kr(2)
18783           .sr(1)
18784           .m(m)
18785           .n(n)
18786           .k(8)
18787           .iterations(1)
18788           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789       }
18790     }
18791   }
18792 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)18793   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
18794     TEST_REQUIRES_X86_AVX;
18795     for (uint32_t m = 1; m <= 4; m++) {
18796       GemmMicrokernelTester()
18797         .mr(4)
18798         .nr(4)
18799         .kr(2)
18800         .sr(1)
18801         .m(m)
18802         .n(4)
18803         .k(8)
18804         .iterations(1)
18805         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806     }
18807   }
18808 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)18809   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
18810     TEST_REQUIRES_X86_AVX;
18811     for (uint32_t n = 1; n <= 4; n++) {
18812       GemmMicrokernelTester()
18813         .mr(4)
18814         .nr(4)
18815         .kr(2)
18816         .sr(1)
18817         .m(4)
18818         .n(n)
18819         .k(8)
18820         .iterations(1)
18821         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822     }
18823   }
18824 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)18825   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
18826     TEST_REQUIRES_X86_AVX;
18827     for (size_t k = 1; k < 8; k++) {
18828       GemmMicrokernelTester()
18829         .mr(4)
18830         .nr(4)
18831         .kr(2)
18832         .sr(1)
18833         .m(4)
18834         .n(4)
18835         .k(k)
18836         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837     }
18838   }
18839 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)18840   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
18841     TEST_REQUIRES_X86_AVX;
18842     for (size_t k = 1; k < 8; k++) {
18843       for (uint32_t n = 1; n <= 4; n++) {
18844         for (uint32_t m = 1; m <= 4; m++) {
18845           GemmMicrokernelTester()
18846             .mr(4)
18847             .nr(4)
18848             .kr(2)
18849             .sr(1)
18850             .m(m)
18851             .n(n)
18852             .k(k)
18853             .iterations(1)
18854             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855         }
18856       }
18857     }
18858   }
18859 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)18860   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
18861     TEST_REQUIRES_X86_AVX;
18862     for (size_t k = 9; k < 16; k++) {
18863       GemmMicrokernelTester()
18864         .mr(4)
18865         .nr(4)
18866         .kr(2)
18867         .sr(1)
18868         .m(4)
18869         .n(4)
18870         .k(k)
18871         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872     }
18873   }
18874 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)18875   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
18876     TEST_REQUIRES_X86_AVX;
18877     for (size_t k = 9; k < 16; k++) {
18878       for (uint32_t n = 1; n <= 4; n++) {
18879         for (uint32_t m = 1; m <= 4; m++) {
18880           GemmMicrokernelTester()
18881             .mr(4)
18882             .nr(4)
18883             .kr(2)
18884             .sr(1)
18885             .m(m)
18886             .n(n)
18887             .k(k)
18888             .iterations(1)
18889             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890         }
18891       }
18892     }
18893   }
18894 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)18895   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
18896     TEST_REQUIRES_X86_AVX;
18897     for (size_t k = 16; k <= 80; k += 8) {
18898       GemmMicrokernelTester()
18899         .mr(4)
18900         .nr(4)
18901         .kr(2)
18902         .sr(1)
18903         .m(4)
18904         .n(4)
18905         .k(k)
18906         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907     }
18908   }
18909 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)18910   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
18911     TEST_REQUIRES_X86_AVX;
18912     for (size_t k = 16; k <= 80; k += 8) {
18913       for (uint32_t n = 1; n <= 4; n++) {
18914         for (uint32_t m = 1; m <= 4; m++) {
18915           GemmMicrokernelTester()
18916             .mr(4)
18917             .nr(4)
18918             .kr(2)
18919             .sr(1)
18920             .m(m)
18921             .n(n)
18922             .k(k)
18923             .iterations(1)
18924             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925         }
18926       }
18927     }
18928   }
18929 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)18930   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
18931     TEST_REQUIRES_X86_AVX;
18932     for (uint32_t n = 5; n < 8; n++) {
18933       for (size_t k = 1; k <= 40; k += 9) {
18934         GemmMicrokernelTester()
18935           .mr(4)
18936           .nr(4)
18937           .kr(2)
18938           .sr(1)
18939           .m(4)
18940           .n(n)
18941           .k(k)
18942           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943       }
18944     }
18945   }
18946 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)18947   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
18948     TEST_REQUIRES_X86_AVX;
18949     for (uint32_t n = 5; n < 8; n++) {
18950       for (size_t k = 1; k <= 40; k += 9) {
18951         GemmMicrokernelTester()
18952           .mr(4)
18953           .nr(4)
18954           .kr(2)
18955           .sr(1)
18956           .m(4)
18957           .n(n)
18958           .k(k)
18959           .cn_stride(7)
18960           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961       }
18962     }
18963   }
18964 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)18965   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
18966     TEST_REQUIRES_X86_AVX;
18967     for (uint32_t n = 5; n < 8; n++) {
18968       for (size_t k = 1; k <= 40; k += 9) {
18969         for (uint32_t m = 1; m <= 4; m++) {
18970           GemmMicrokernelTester()
18971             .mr(4)
18972             .nr(4)
18973             .kr(2)
18974             .sr(1)
18975             .m(m)
18976             .n(n)
18977             .k(k)
18978             .iterations(1)
18979             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980         }
18981       }
18982     }
18983   }
18984 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)18985   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
18986     TEST_REQUIRES_X86_AVX;
18987     for (uint32_t n = 8; n <= 12; n += 4) {
18988       for (size_t k = 1; k <= 40; k += 9) {
18989         GemmMicrokernelTester()
18990           .mr(4)
18991           .nr(4)
18992           .kr(2)
18993           .sr(1)
18994           .m(4)
18995           .n(n)
18996           .k(k)
18997           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998       }
18999     }
19000   }
19001 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)19002   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
19003     TEST_REQUIRES_X86_AVX;
19004     for (uint32_t n = 8; n <= 12; n += 4) {
19005       for (size_t k = 1; k <= 40; k += 9) {
19006         GemmMicrokernelTester()
19007           .mr(4)
19008           .nr(4)
19009           .kr(2)
19010           .sr(1)
19011           .m(4)
19012           .n(n)
19013           .k(k)
19014           .cn_stride(7)
19015           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016       }
19017     }
19018   }
19019 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)19020   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
19021     TEST_REQUIRES_X86_AVX;
19022     for (uint32_t n = 8; n <= 12; n += 4) {
19023       for (size_t k = 1; k <= 40; k += 9) {
19024         for (uint32_t m = 1; m <= 4; m++) {
19025           GemmMicrokernelTester()
19026             .mr(4)
19027             .nr(4)
19028             .kr(2)
19029             .sr(1)
19030             .m(m)
19031             .n(n)
19032             .k(k)
19033             .iterations(1)
19034             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035         }
19036       }
19037     }
19038   }
19039 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel)19040   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
19041     TEST_REQUIRES_X86_AVX;
19042     for (size_t k = 1; k <= 40; k += 9) {
19043       GemmMicrokernelTester()
19044         .mr(4)
19045         .nr(4)
19046         .kr(2)
19047         .sr(1)
19048         .m(4)
19049         .n(4)
19050         .k(k)
19051         .ks(3)
19052         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053     }
19054   }
19055 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel_subtile)19056   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
19057     TEST_REQUIRES_X86_AVX;
19058     for (size_t k = 1; k <= 40; k += 9) {
19059       for (uint32_t n = 1; n <= 4; n++) {
19060         for (uint32_t m = 1; m <= 4; m++) {
19061           GemmMicrokernelTester()
19062             .mr(4)
19063             .nr(4)
19064             .kr(2)
19065             .sr(1)
19066             .m(m)
19067             .n(n)
19068             .k(k)
19069             .ks(3)
19070             .iterations(1)
19071             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072         }
19073       }
19074     }
19075   }
19076 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_small_kernel)19077   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
19078     TEST_REQUIRES_X86_AVX;
19079     for (uint32_t n = 5; n < 8; n++) {
19080       for (size_t k = 1; k <= 40; k += 9) {
19081         GemmMicrokernelTester()
19082           .mr(4)
19083           .nr(4)
19084           .kr(2)
19085           .sr(1)
19086           .m(4)
19087           .n(n)
19088           .k(k)
19089           .ks(3)
19090           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091       }
19092     }
19093   }
19094 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_small_kernel)19095   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
19096     TEST_REQUIRES_X86_AVX;
19097     for (uint32_t n = 8; n <= 12; n += 4) {
19098       for (size_t k = 1; k <= 40; k += 9) {
19099         GemmMicrokernelTester()
19100           .mr(4)
19101           .nr(4)
19102           .kr(2)
19103           .sr(1)
19104           .m(4)
19105           .n(n)
19106           .k(k)
19107           .ks(3)
19108           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109       }
19110     }
19111   }
19112 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)19113   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
19114     TEST_REQUIRES_X86_AVX;
19115     for (size_t k = 1; k <= 40; k += 9) {
19116       for (uint32_t n = 1; n <= 4; n++) {
19117         for (uint32_t m = 1; m <= 4; m++) {
19118           GemmMicrokernelTester()
19119             .mr(4)
19120             .nr(4)
19121             .kr(2)
19122             .sr(1)
19123             .m(m)
19124             .n(n)
19125             .k(k)
19126             .cm_stride(7)
19127             .iterations(1)
19128             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129         }
19130       }
19131     }
19132   }
19133 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,a_offset)19134   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
19135     TEST_REQUIRES_X86_AVX;
19136     for (size_t k = 1; k <= 40; k += 9) {
19137       GemmMicrokernelTester()
19138         .mr(4)
19139         .nr(4)
19140         .kr(2)
19141         .sr(1)
19142         .m(4)
19143         .n(4)
19144         .k(k)
19145         .ks(3)
19146         .a_offset(163)
19147         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148     }
19149   }
19150 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,zero)19151   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
19152     TEST_REQUIRES_X86_AVX;
19153     for (size_t k = 1; k <= 40; k += 9) {
19154       for (uint32_t mz = 0; mz < 4; mz++) {
19155         GemmMicrokernelTester()
19156           .mr(4)
19157           .nr(4)
19158           .kr(2)
19159           .sr(1)
19160           .m(4)
19161           .n(4)
19162           .k(k)
19163           .ks(3)
19164           .a_offset(163)
19165           .zero_index(mz)
19166           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167       }
19168     }
19169   }
19170 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)19171   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
19172     TEST_REQUIRES_X86_AVX;
19173     GemmMicrokernelTester()
19174       .mr(4)
19175       .nr(4)
19176       .kr(2)
19177       .sr(1)
19178       .m(4)
19179       .n(4)
19180       .k(8)
19181       .qmin(128)
19182       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183   }
19184 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)19185   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
19186     TEST_REQUIRES_X86_AVX;
19187     GemmMicrokernelTester()
19188       .mr(4)
19189       .nr(4)
19190       .kr(2)
19191       .sr(1)
19192       .m(4)
19193       .n(4)
19194       .k(8)
19195       .qmax(128)
19196       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197   }
19198 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)19199   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
19200     TEST_REQUIRES_X86_AVX;
19201     GemmMicrokernelTester()
19202       .mr(4)
19203       .nr(4)
19204       .kr(2)
19205       .sr(1)
19206       .m(4)
19207       .n(4)
19208       .k(8)
19209       .cm_stride(7)
19210       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211   }
19212 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213 
19214 
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)19216   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
19217     TEST_REQUIRES_X86_XOP;
19218     GemmMicrokernelTester()
19219       .mr(4)
19220       .nr(4)
19221       .kr(2)
19222       .sr(1)
19223       .m(4)
19224       .n(4)
19225       .k(8)
19226       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19227   }
19228 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)19229   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
19230     TEST_REQUIRES_X86_XOP;
19231     GemmMicrokernelTester()
19232       .mr(4)
19233       .nr(4)
19234       .kr(2)
19235       .sr(1)
19236       .m(4)
19237       .n(4)
19238       .k(8)
19239       .cn_stride(7)
19240       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19241   }
19242 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)19243   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
19244     TEST_REQUIRES_X86_XOP;
19245     for (uint32_t n = 1; n <= 4; n++) {
19246       for (uint32_t m = 1; m <= 4; m++) {
19247         GemmMicrokernelTester()
19248           .mr(4)
19249           .nr(4)
19250           .kr(2)
19251           .sr(1)
19252           .m(m)
19253           .n(n)
19254           .k(8)
19255           .iterations(1)
19256           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19257       }
19258     }
19259   }
19260 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)19261   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
19262     TEST_REQUIRES_X86_XOP;
19263     for (uint32_t m = 1; m <= 4; m++) {
19264       GemmMicrokernelTester()
19265         .mr(4)
19266         .nr(4)
19267         .kr(2)
19268         .sr(1)
19269         .m(m)
19270         .n(4)
19271         .k(8)
19272         .iterations(1)
19273         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19274     }
19275   }
19276 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)19277   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
19278     TEST_REQUIRES_X86_XOP;
19279     for (uint32_t n = 1; n <= 4; n++) {
19280       GemmMicrokernelTester()
19281         .mr(4)
19282         .nr(4)
19283         .kr(2)
19284         .sr(1)
19285         .m(4)
19286         .n(n)
19287         .k(8)
19288         .iterations(1)
19289         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19290     }
19291   }
19292 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)19293   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
19294     TEST_REQUIRES_X86_XOP;
19295     for (size_t k = 1; k < 8; k++) {
19296       GemmMicrokernelTester()
19297         .mr(4)
19298         .nr(4)
19299         .kr(2)
19300         .sr(1)
19301         .m(4)
19302         .n(4)
19303         .k(k)
19304         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19305     }
19306   }
19307 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)19308   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
19309     TEST_REQUIRES_X86_XOP;
19310     for (size_t k = 1; k < 8; k++) {
19311       for (uint32_t n = 1; n <= 4; n++) {
19312         for (uint32_t m = 1; m <= 4; m++) {
19313           GemmMicrokernelTester()
19314             .mr(4)
19315             .nr(4)
19316             .kr(2)
19317             .sr(1)
19318             .m(m)
19319             .n(n)
19320             .k(k)
19321             .iterations(1)
19322             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19323         }
19324       }
19325     }
19326   }
19327 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)19328   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
19329     TEST_REQUIRES_X86_XOP;
19330     for (size_t k = 9; k < 16; k++) {
19331       GemmMicrokernelTester()
19332         .mr(4)
19333         .nr(4)
19334         .kr(2)
19335         .sr(1)
19336         .m(4)
19337         .n(4)
19338         .k(k)
19339         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19340     }
19341   }
19342 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)19343   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
19344     TEST_REQUIRES_X86_XOP;
19345     for (size_t k = 9; k < 16; k++) {
19346       for (uint32_t n = 1; n <= 4; n++) {
19347         for (uint32_t m = 1; m <= 4; m++) {
19348           GemmMicrokernelTester()
19349             .mr(4)
19350             .nr(4)
19351             .kr(2)
19352             .sr(1)
19353             .m(m)
19354             .n(n)
19355             .k(k)
19356             .iterations(1)
19357             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19358         }
19359       }
19360     }
19361   }
19362 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)19363   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
19364     TEST_REQUIRES_X86_XOP;
19365     for (size_t k = 16; k <= 80; k += 8) {
19366       GemmMicrokernelTester()
19367         .mr(4)
19368         .nr(4)
19369         .kr(2)
19370         .sr(1)
19371         .m(4)
19372         .n(4)
19373         .k(k)
19374         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19375     }
19376   }
19377 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)19378   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
19379     TEST_REQUIRES_X86_XOP;
19380     for (size_t k = 16; k <= 80; k += 8) {
19381       for (uint32_t n = 1; n <= 4; n++) {
19382         for (uint32_t m = 1; m <= 4; m++) {
19383           GemmMicrokernelTester()
19384             .mr(4)
19385             .nr(4)
19386             .kr(2)
19387             .sr(1)
19388             .m(m)
19389             .n(n)
19390             .k(k)
19391             .iterations(1)
19392             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19393         }
19394       }
19395     }
19396   }
19397 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)19398   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
19399     TEST_REQUIRES_X86_XOP;
19400     for (uint32_t n = 5; n < 8; n++) {
19401       for (size_t k = 1; k <= 40; k += 9) {
19402         GemmMicrokernelTester()
19403           .mr(4)
19404           .nr(4)
19405           .kr(2)
19406           .sr(1)
19407           .m(4)
19408           .n(n)
19409           .k(k)
19410           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19411       }
19412     }
19413   }
19414 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)19415   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
19416     TEST_REQUIRES_X86_XOP;
19417     for (uint32_t n = 5; n < 8; n++) {
19418       for (size_t k = 1; k <= 40; k += 9) {
19419         GemmMicrokernelTester()
19420           .mr(4)
19421           .nr(4)
19422           .kr(2)
19423           .sr(1)
19424           .m(4)
19425           .n(n)
19426           .k(k)
19427           .cn_stride(7)
19428           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19429       }
19430     }
19431   }
19432 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)19433   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
19434     TEST_REQUIRES_X86_XOP;
19435     for (uint32_t n = 5; n < 8; n++) {
19436       for (size_t k = 1; k <= 40; k += 9) {
19437         for (uint32_t m = 1; m <= 4; m++) {
19438           GemmMicrokernelTester()
19439             .mr(4)
19440             .nr(4)
19441             .kr(2)
19442             .sr(1)
19443             .m(m)
19444             .n(n)
19445             .k(k)
19446             .iterations(1)
19447             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19448         }
19449       }
19450     }
19451   }
19452 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)19453   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
19454     TEST_REQUIRES_X86_XOP;
19455     for (uint32_t n = 8; n <= 12; n += 4) {
19456       for (size_t k = 1; k <= 40; k += 9) {
19457         GemmMicrokernelTester()
19458           .mr(4)
19459           .nr(4)
19460           .kr(2)
19461           .sr(1)
19462           .m(4)
19463           .n(n)
19464           .k(k)
19465           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19466       }
19467     }
19468   }
19469 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)19470   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
19471     TEST_REQUIRES_X86_XOP;
19472     for (uint32_t n = 8; n <= 12; n += 4) {
19473       for (size_t k = 1; k <= 40; k += 9) {
19474         GemmMicrokernelTester()
19475           .mr(4)
19476           .nr(4)
19477           .kr(2)
19478           .sr(1)
19479           .m(4)
19480           .n(n)
19481           .k(k)
19482           .cn_stride(7)
19483           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19484       }
19485     }
19486   }
19487 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)19488   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
19489     TEST_REQUIRES_X86_XOP;
19490     for (uint32_t n = 8; n <= 12; n += 4) {
19491       for (size_t k = 1; k <= 40; k += 9) {
19492         for (uint32_t m = 1; m <= 4; m++) {
19493           GemmMicrokernelTester()
19494             .mr(4)
19495             .nr(4)
19496             .kr(2)
19497             .sr(1)
19498             .m(m)
19499             .n(n)
19500             .k(k)
19501             .iterations(1)
19502             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19503         }
19504       }
19505     }
19506   }
19507 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel)19508   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
19509     TEST_REQUIRES_X86_XOP;
19510     for (size_t k = 1; k <= 40; k += 9) {
19511       GemmMicrokernelTester()
19512         .mr(4)
19513         .nr(4)
19514         .kr(2)
19515         .sr(1)
19516         .m(4)
19517         .n(4)
19518         .k(k)
19519         .ks(3)
19520         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19521     }
19522   }
19523 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel_subtile)19524   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
19525     TEST_REQUIRES_X86_XOP;
19526     for (size_t k = 1; k <= 40; k += 9) {
19527       for (uint32_t n = 1; n <= 4; n++) {
19528         for (uint32_t m = 1; m <= 4; m++) {
19529           GemmMicrokernelTester()
19530             .mr(4)
19531             .nr(4)
19532             .kr(2)
19533             .sr(1)
19534             .m(m)
19535             .n(n)
19536             .k(k)
19537             .ks(3)
19538             .iterations(1)
19539             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19540         }
19541       }
19542     }
19543   }
19544 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_small_kernel)19545   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
19546     TEST_REQUIRES_X86_XOP;
19547     for (uint32_t n = 5; n < 8; n++) {
19548       for (size_t k = 1; k <= 40; k += 9) {
19549         GemmMicrokernelTester()
19550           .mr(4)
19551           .nr(4)
19552           .kr(2)
19553           .sr(1)
19554           .m(4)
19555           .n(n)
19556           .k(k)
19557           .ks(3)
19558           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19559       }
19560     }
19561   }
19562 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_small_kernel)19563   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
19564     TEST_REQUIRES_X86_XOP;
19565     for (uint32_t n = 8; n <= 12; n += 4) {
19566       for (size_t k = 1; k <= 40; k += 9) {
19567         GemmMicrokernelTester()
19568           .mr(4)
19569           .nr(4)
19570           .kr(2)
19571           .sr(1)
19572           .m(4)
19573           .n(n)
19574           .k(k)
19575           .ks(3)
19576           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19577       }
19578     }
19579   }
19580 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)19581   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
19582     TEST_REQUIRES_X86_XOP;
19583     for (size_t k = 1; k <= 40; k += 9) {
19584       for (uint32_t n = 1; n <= 4; n++) {
19585         for (uint32_t m = 1; m <= 4; m++) {
19586           GemmMicrokernelTester()
19587             .mr(4)
19588             .nr(4)
19589             .kr(2)
19590             .sr(1)
19591             .m(m)
19592             .n(n)
19593             .k(k)
19594             .cm_stride(7)
19595             .iterations(1)
19596             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19597         }
19598       }
19599     }
19600   }
19601 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,a_offset)19602   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
19603     TEST_REQUIRES_X86_XOP;
19604     for (size_t k = 1; k <= 40; k += 9) {
19605       GemmMicrokernelTester()
19606         .mr(4)
19607         .nr(4)
19608         .kr(2)
19609         .sr(1)
19610         .m(4)
19611         .n(4)
19612         .k(k)
19613         .ks(3)
19614         .a_offset(163)
19615         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19616     }
19617   }
19618 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,zero)19619   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
19620     TEST_REQUIRES_X86_XOP;
19621     for (size_t k = 1; k <= 40; k += 9) {
19622       for (uint32_t mz = 0; mz < 4; mz++) {
19623         GemmMicrokernelTester()
19624           .mr(4)
19625           .nr(4)
19626           .kr(2)
19627           .sr(1)
19628           .m(4)
19629           .n(4)
19630           .k(k)
19631           .ks(3)
19632           .a_offset(163)
19633           .zero_index(mz)
19634           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19635       }
19636     }
19637   }
19638 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)19639   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
19640     TEST_REQUIRES_X86_XOP;
19641     GemmMicrokernelTester()
19642       .mr(4)
19643       .nr(4)
19644       .kr(2)
19645       .sr(1)
19646       .m(4)
19647       .n(4)
19648       .k(8)
19649       .qmin(128)
19650       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19651   }
19652 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)19653   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
19654     TEST_REQUIRES_X86_XOP;
19655     GemmMicrokernelTester()
19656       .mr(4)
19657       .nr(4)
19658       .kr(2)
19659       .sr(1)
19660       .m(4)
19661       .n(4)
19662       .k(8)
19663       .qmax(128)
19664       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19665   }
19666 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)19667   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
19668     TEST_REQUIRES_X86_XOP;
19669     GemmMicrokernelTester()
19670       .mr(4)
19671       .nr(4)
19672       .kr(2)
19673       .sr(1)
19674       .m(4)
19675       .n(4)
19676       .k(8)
19677       .cm_stride(7)
19678       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19679   }
19680 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681 
19682 
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)19684   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
19685     TEST_REQUIRES_X86_SSE2;
19686     GemmMicrokernelTester()
19687       .mr(1)
19688       .nr(4)
19689       .kr(2)
19690       .sr(1)
19691       .m(1)
19692       .n(4)
19693       .k(8)
19694       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19695   }
19696 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)19697   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
19698     TEST_REQUIRES_X86_SSE2;
19699     GemmMicrokernelTester()
19700       .mr(1)
19701       .nr(4)
19702       .kr(2)
19703       .sr(1)
19704       .m(1)
19705       .n(4)
19706       .k(8)
19707       .cn_stride(7)
19708       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19709   }
19710 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)19711   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
19712     TEST_REQUIRES_X86_SSE2;
19713     for (uint32_t n = 1; n <= 4; n++) {
19714       for (uint32_t m = 1; m <= 1; m++) {
19715         GemmMicrokernelTester()
19716           .mr(1)
19717           .nr(4)
19718           .kr(2)
19719           .sr(1)
19720           .m(m)
19721           .n(n)
19722           .k(8)
19723           .iterations(1)
19724           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19725       }
19726     }
19727   }
19728 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)19729   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
19730     TEST_REQUIRES_X86_SSE2;
19731     for (uint32_t m = 1; m <= 1; m++) {
19732       GemmMicrokernelTester()
19733         .mr(1)
19734         .nr(4)
19735         .kr(2)
19736         .sr(1)
19737         .m(m)
19738         .n(4)
19739         .k(8)
19740         .iterations(1)
19741         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19742     }
19743   }
19744 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)19745   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
19746     TEST_REQUIRES_X86_SSE2;
19747     for (uint32_t n = 1; n <= 4; n++) {
19748       GemmMicrokernelTester()
19749         .mr(1)
19750         .nr(4)
19751         .kr(2)
19752         .sr(1)
19753         .m(1)
19754         .n(n)
19755         .k(8)
19756         .iterations(1)
19757         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19758     }
19759   }
19760 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)19761   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
19762     TEST_REQUIRES_X86_SSE2;
19763     for (size_t k = 1; k < 8; k++) {
19764       GemmMicrokernelTester()
19765         .mr(1)
19766         .nr(4)
19767         .kr(2)
19768         .sr(1)
19769         .m(1)
19770         .n(4)
19771         .k(k)
19772         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19773     }
19774   }
19775 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)19776   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
19777     TEST_REQUIRES_X86_SSE2;
19778     for (size_t k = 1; k < 8; k++) {
19779       for (uint32_t n = 1; n <= 4; n++) {
19780         for (uint32_t m = 1; m <= 1; m++) {
19781           GemmMicrokernelTester()
19782             .mr(1)
19783             .nr(4)
19784             .kr(2)
19785             .sr(1)
19786             .m(m)
19787             .n(n)
19788             .k(k)
19789             .iterations(1)
19790             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19791         }
19792       }
19793     }
19794   }
19795 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)19796   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
19797     TEST_REQUIRES_X86_SSE2;
19798     for (size_t k = 9; k < 16; k++) {
19799       GemmMicrokernelTester()
19800         .mr(1)
19801         .nr(4)
19802         .kr(2)
19803         .sr(1)
19804         .m(1)
19805         .n(4)
19806         .k(k)
19807         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19808     }
19809   }
19810 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)19811   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
19812     TEST_REQUIRES_X86_SSE2;
19813     for (size_t k = 9; k < 16; k++) {
19814       for (uint32_t n = 1; n <= 4; n++) {
19815         for (uint32_t m = 1; m <= 1; m++) {
19816           GemmMicrokernelTester()
19817             .mr(1)
19818             .nr(4)
19819             .kr(2)
19820             .sr(1)
19821             .m(m)
19822             .n(n)
19823             .k(k)
19824             .iterations(1)
19825             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19826         }
19827       }
19828     }
19829   }
19830 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)19831   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
19832     TEST_REQUIRES_X86_SSE2;
19833     for (size_t k = 16; k <= 80; k += 8) {
19834       GemmMicrokernelTester()
19835         .mr(1)
19836         .nr(4)
19837         .kr(2)
19838         .sr(1)
19839         .m(1)
19840         .n(4)
19841         .k(k)
19842         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19843     }
19844   }
19845 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)19846   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
19847     TEST_REQUIRES_X86_SSE2;
19848     for (size_t k = 16; k <= 80; k += 8) {
19849       for (uint32_t n = 1; n <= 4; n++) {
19850         for (uint32_t m = 1; m <= 1; m++) {
19851           GemmMicrokernelTester()
19852             .mr(1)
19853             .nr(4)
19854             .kr(2)
19855             .sr(1)
19856             .m(m)
19857             .n(n)
19858             .k(k)
19859             .iterations(1)
19860             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19861         }
19862       }
19863     }
19864   }
19865 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)19866   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
19867     TEST_REQUIRES_X86_SSE2;
19868     for (uint32_t n = 5; n < 8; n++) {
19869       for (size_t k = 1; k <= 40; k += 9) {
19870         GemmMicrokernelTester()
19871           .mr(1)
19872           .nr(4)
19873           .kr(2)
19874           .sr(1)
19875           .m(1)
19876           .n(n)
19877           .k(k)
19878           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19879       }
19880     }
19881   }
19882 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)19883   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
19884     TEST_REQUIRES_X86_SSE2;
19885     for (uint32_t n = 5; n < 8; n++) {
19886       for (size_t k = 1; k <= 40; k += 9) {
19887         GemmMicrokernelTester()
19888           .mr(1)
19889           .nr(4)
19890           .kr(2)
19891           .sr(1)
19892           .m(1)
19893           .n(n)
19894           .k(k)
19895           .cn_stride(7)
19896           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19897       }
19898     }
19899   }
19900 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)19901   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
19902     TEST_REQUIRES_X86_SSE2;
19903     for (uint32_t n = 5; n < 8; n++) {
19904       for (size_t k = 1; k <= 40; k += 9) {
19905         for (uint32_t m = 1; m <= 1; m++) {
19906           GemmMicrokernelTester()
19907             .mr(1)
19908             .nr(4)
19909             .kr(2)
19910             .sr(1)
19911             .m(m)
19912             .n(n)
19913             .k(k)
19914             .iterations(1)
19915             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19916         }
19917       }
19918     }
19919   }
19920 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)19921   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
19922     TEST_REQUIRES_X86_SSE2;
19923     for (uint32_t n = 8; n <= 12; n += 4) {
19924       for (size_t k = 1; k <= 40; k += 9) {
19925         GemmMicrokernelTester()
19926           .mr(1)
19927           .nr(4)
19928           .kr(2)
19929           .sr(1)
19930           .m(1)
19931           .n(n)
19932           .k(k)
19933           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19934       }
19935     }
19936   }
19937 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)19938   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
19939     TEST_REQUIRES_X86_SSE2;
19940     for (uint32_t n = 8; n <= 12; n += 4) {
19941       for (size_t k = 1; k <= 40; k += 9) {
19942         GemmMicrokernelTester()
19943           .mr(1)
19944           .nr(4)
19945           .kr(2)
19946           .sr(1)
19947           .m(1)
19948           .n(n)
19949           .k(k)
19950           .cn_stride(7)
19951           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19952       }
19953     }
19954   }
19955 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)19956   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
19957     TEST_REQUIRES_X86_SSE2;
19958     for (uint32_t n = 8; n <= 12; n += 4) {
19959       for (size_t k = 1; k <= 40; k += 9) {
19960         for (uint32_t m = 1; m <= 1; m++) {
19961           GemmMicrokernelTester()
19962             .mr(1)
19963             .nr(4)
19964             .kr(2)
19965             .sr(1)
19966             .m(m)
19967             .n(n)
19968             .k(k)
19969             .iterations(1)
19970             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19971         }
19972       }
19973     }
19974   }
19975 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)19976   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
19977     TEST_REQUIRES_X86_SSE2;
19978     for (size_t k = 1; k <= 40; k += 9) {
19979       GemmMicrokernelTester()
19980         .mr(1)
19981         .nr(4)
19982         .kr(2)
19983         .sr(1)
19984         .m(1)
19985         .n(4)
19986         .k(k)
19987         .ks(3)
19988         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19989     }
19990   }
19991 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)19992   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
19993     TEST_REQUIRES_X86_SSE2;
19994     for (size_t k = 1; k <= 40; k += 9) {
19995       for (uint32_t n = 1; n <= 4; n++) {
19996         for (uint32_t m = 1; m <= 1; m++) {
19997           GemmMicrokernelTester()
19998             .mr(1)
19999             .nr(4)
20000             .kr(2)
20001             .sr(1)
20002             .m(m)
20003             .n(n)
20004             .k(k)
20005             .ks(3)
20006             .iterations(1)
20007             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20008         }
20009       }
20010     }
20011   }
20012 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)20013   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
20014     TEST_REQUIRES_X86_SSE2;
20015     for (uint32_t n = 5; n < 8; n++) {
20016       for (size_t k = 1; k <= 40; k += 9) {
20017         GemmMicrokernelTester()
20018           .mr(1)
20019           .nr(4)
20020           .kr(2)
20021           .sr(1)
20022           .m(1)
20023           .n(n)
20024           .k(k)
20025           .ks(3)
20026           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20027       }
20028     }
20029   }
20030 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)20031   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
20032     TEST_REQUIRES_X86_SSE2;
20033     for (uint32_t n = 8; n <= 12; n += 4) {
20034       for (size_t k = 1; k <= 40; k += 9) {
20035         GemmMicrokernelTester()
20036           .mr(1)
20037           .nr(4)
20038           .kr(2)
20039           .sr(1)
20040           .m(1)
20041           .n(n)
20042           .k(k)
20043           .ks(3)
20044           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20045       }
20046     }
20047   }
20048 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)20049   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
20050     TEST_REQUIRES_X86_SSE2;
20051     for (size_t k = 1; k <= 40; k += 9) {
20052       for (uint32_t n = 1; n <= 4; n++) {
20053         for (uint32_t m = 1; m <= 1; m++) {
20054           GemmMicrokernelTester()
20055             .mr(1)
20056             .nr(4)
20057             .kr(2)
20058             .sr(1)
20059             .m(m)
20060             .n(n)
20061             .k(k)
20062             .cm_stride(7)
20063             .iterations(1)
20064             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20065         }
20066       }
20067     }
20068   }
20069 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)20070   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
20071     TEST_REQUIRES_X86_SSE2;
20072     for (size_t k = 1; k <= 40; k += 9) {
20073       GemmMicrokernelTester()
20074         .mr(1)
20075         .nr(4)
20076         .kr(2)
20077         .sr(1)
20078         .m(1)
20079         .n(4)
20080         .k(k)
20081         .ks(3)
20082         .a_offset(43)
20083         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20084     }
20085   }
20086 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)20087   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
20088     TEST_REQUIRES_X86_SSE2;
20089     for (size_t k = 1; k <= 40; k += 9) {
20090       for (uint32_t mz = 0; mz < 1; mz++) {
20091         GemmMicrokernelTester()
20092           .mr(1)
20093           .nr(4)
20094           .kr(2)
20095           .sr(1)
20096           .m(1)
20097           .n(4)
20098           .k(k)
20099           .ks(3)
20100           .a_offset(43)
20101           .zero_index(mz)
20102           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20103       }
20104     }
20105   }
20106 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)20107   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
20108     TEST_REQUIRES_X86_SSE2;
20109     GemmMicrokernelTester()
20110       .mr(1)
20111       .nr(4)
20112       .kr(2)
20113       .sr(1)
20114       .m(1)
20115       .n(4)
20116       .k(8)
20117       .qmin(128)
20118       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20119   }
20120 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)20121   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
20122     TEST_REQUIRES_X86_SSE2;
20123     GemmMicrokernelTester()
20124       .mr(1)
20125       .nr(4)
20126       .kr(2)
20127       .sr(1)
20128       .m(1)
20129       .n(4)
20130       .k(8)
20131       .qmax(128)
20132       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20133   }
20134 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)20135   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
20136     TEST_REQUIRES_X86_SSE2;
20137     GemmMicrokernelTester()
20138       .mr(1)
20139       .nr(4)
20140       .kr(2)
20141       .sr(1)
20142       .m(1)
20143       .n(4)
20144       .k(8)
20145       .cm_stride(7)
20146       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20147   }
20148 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149 
20150 
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)20152   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
20153     TEST_REQUIRES_X86_SSE41;
20154     GemmMicrokernelTester()
20155       .mr(1)
20156       .nr(4)
20157       .kr(2)
20158       .sr(1)
20159       .m(1)
20160       .n(4)
20161       .k(8)
20162       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163   }
20164 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)20165   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
20166     TEST_REQUIRES_X86_SSE41;
20167     GemmMicrokernelTester()
20168       .mr(1)
20169       .nr(4)
20170       .kr(2)
20171       .sr(1)
20172       .m(1)
20173       .n(4)
20174       .k(8)
20175       .cn_stride(7)
20176       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177   }
20178 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)20179   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
20180     TEST_REQUIRES_X86_SSE41;
20181     for (uint32_t n = 1; n <= 4; n++) {
20182       for (uint32_t m = 1; m <= 1; m++) {
20183         GemmMicrokernelTester()
20184           .mr(1)
20185           .nr(4)
20186           .kr(2)
20187           .sr(1)
20188           .m(m)
20189           .n(n)
20190           .k(8)
20191           .iterations(1)
20192           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193       }
20194     }
20195   }
20196 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)20197   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
20198     TEST_REQUIRES_X86_SSE41;
20199     for (uint32_t m = 1; m <= 1; m++) {
20200       GemmMicrokernelTester()
20201         .mr(1)
20202         .nr(4)
20203         .kr(2)
20204         .sr(1)
20205         .m(m)
20206         .n(4)
20207         .k(8)
20208         .iterations(1)
20209         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210     }
20211   }
20212 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)20213   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
20214     TEST_REQUIRES_X86_SSE41;
20215     for (uint32_t n = 1; n <= 4; n++) {
20216       GemmMicrokernelTester()
20217         .mr(1)
20218         .nr(4)
20219         .kr(2)
20220         .sr(1)
20221         .m(1)
20222         .n(n)
20223         .k(8)
20224         .iterations(1)
20225         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226     }
20227   }
20228 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)20229   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
20230     TEST_REQUIRES_X86_SSE41;
20231     for (size_t k = 1; k < 8; k++) {
20232       GemmMicrokernelTester()
20233         .mr(1)
20234         .nr(4)
20235         .kr(2)
20236         .sr(1)
20237         .m(1)
20238         .n(4)
20239         .k(k)
20240         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241     }
20242   }
20243 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)20244   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
20245     TEST_REQUIRES_X86_SSE41;
20246     for (size_t k = 1; k < 8; k++) {
20247       for (uint32_t n = 1; n <= 4; n++) {
20248         for (uint32_t m = 1; m <= 1; m++) {
20249           GemmMicrokernelTester()
20250             .mr(1)
20251             .nr(4)
20252             .kr(2)
20253             .sr(1)
20254             .m(m)
20255             .n(n)
20256             .k(k)
20257             .iterations(1)
20258             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259         }
20260       }
20261     }
20262   }
20263 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)20264   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
20265     TEST_REQUIRES_X86_SSE41;
20266     for (size_t k = 9; k < 16; k++) {
20267       GemmMicrokernelTester()
20268         .mr(1)
20269         .nr(4)
20270         .kr(2)
20271         .sr(1)
20272         .m(1)
20273         .n(4)
20274         .k(k)
20275         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276     }
20277   }
20278 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)20279   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
20280     TEST_REQUIRES_X86_SSE41;
20281     for (size_t k = 9; k < 16; k++) {
20282       for (uint32_t n = 1; n <= 4; n++) {
20283         for (uint32_t m = 1; m <= 1; m++) {
20284           GemmMicrokernelTester()
20285             .mr(1)
20286             .nr(4)
20287             .kr(2)
20288             .sr(1)
20289             .m(m)
20290             .n(n)
20291             .k(k)
20292             .iterations(1)
20293             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294         }
20295       }
20296     }
20297   }
20298 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)20299   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
20300     TEST_REQUIRES_X86_SSE41;
20301     for (size_t k = 16; k <= 80; k += 8) {
20302       GemmMicrokernelTester()
20303         .mr(1)
20304         .nr(4)
20305         .kr(2)
20306         .sr(1)
20307         .m(1)
20308         .n(4)
20309         .k(k)
20310         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311     }
20312   }
20313 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)20314   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
20315     TEST_REQUIRES_X86_SSE41;
20316     for (size_t k = 16; k <= 80; k += 8) {
20317       for (uint32_t n = 1; n <= 4; n++) {
20318         for (uint32_t m = 1; m <= 1; m++) {
20319           GemmMicrokernelTester()
20320             .mr(1)
20321             .nr(4)
20322             .kr(2)
20323             .sr(1)
20324             .m(m)
20325             .n(n)
20326             .k(k)
20327             .iterations(1)
20328             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329         }
20330       }
20331     }
20332   }
20333 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)20334   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
20335     TEST_REQUIRES_X86_SSE41;
20336     for (uint32_t n = 5; n < 8; n++) {
20337       for (size_t k = 1; k <= 40; k += 9) {
20338         GemmMicrokernelTester()
20339           .mr(1)
20340           .nr(4)
20341           .kr(2)
20342           .sr(1)
20343           .m(1)
20344           .n(n)
20345           .k(k)
20346           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347       }
20348     }
20349   }
20350 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)20351   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
20352     TEST_REQUIRES_X86_SSE41;
20353     for (uint32_t n = 5; n < 8; n++) {
20354       for (size_t k = 1; k <= 40; k += 9) {
20355         GemmMicrokernelTester()
20356           .mr(1)
20357           .nr(4)
20358           .kr(2)
20359           .sr(1)
20360           .m(1)
20361           .n(n)
20362           .k(k)
20363           .cn_stride(7)
20364           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365       }
20366     }
20367   }
20368 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)20369   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
20370     TEST_REQUIRES_X86_SSE41;
20371     for (uint32_t n = 5; n < 8; n++) {
20372       for (size_t k = 1; k <= 40; k += 9) {
20373         for (uint32_t m = 1; m <= 1; m++) {
20374           GemmMicrokernelTester()
20375             .mr(1)
20376             .nr(4)
20377             .kr(2)
20378             .sr(1)
20379             .m(m)
20380             .n(n)
20381             .k(k)
20382             .iterations(1)
20383             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384         }
20385       }
20386     }
20387   }
20388 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)20389   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
20390     TEST_REQUIRES_X86_SSE41;
20391     for (uint32_t n = 8; n <= 12; n += 4) {
20392       for (size_t k = 1; k <= 40; k += 9) {
20393         GemmMicrokernelTester()
20394           .mr(1)
20395           .nr(4)
20396           .kr(2)
20397           .sr(1)
20398           .m(1)
20399           .n(n)
20400           .k(k)
20401           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402       }
20403     }
20404   }
20405 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)20406   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
20407     TEST_REQUIRES_X86_SSE41;
20408     for (uint32_t n = 8; n <= 12; n += 4) {
20409       for (size_t k = 1; k <= 40; k += 9) {
20410         GemmMicrokernelTester()
20411           .mr(1)
20412           .nr(4)
20413           .kr(2)
20414           .sr(1)
20415           .m(1)
20416           .n(n)
20417           .k(k)
20418           .cn_stride(7)
20419           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420       }
20421     }
20422   }
20423 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)20424   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
20425     TEST_REQUIRES_X86_SSE41;
20426     for (uint32_t n = 8; n <= 12; n += 4) {
20427       for (size_t k = 1; k <= 40; k += 9) {
20428         for (uint32_t m = 1; m <= 1; m++) {
20429           GemmMicrokernelTester()
20430             .mr(1)
20431             .nr(4)
20432             .kr(2)
20433             .sr(1)
20434             .m(m)
20435             .n(n)
20436             .k(k)
20437             .iterations(1)
20438             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439         }
20440       }
20441     }
20442   }
20443 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel)20444   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
20445     TEST_REQUIRES_X86_SSE41;
20446     for (size_t k = 1; k <= 40; k += 9) {
20447       GemmMicrokernelTester()
20448         .mr(1)
20449         .nr(4)
20450         .kr(2)
20451         .sr(1)
20452         .m(1)
20453         .n(4)
20454         .k(k)
20455         .ks(3)
20456         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457     }
20458   }
20459 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel_subtile)20460   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
20461     TEST_REQUIRES_X86_SSE41;
20462     for (size_t k = 1; k <= 40; k += 9) {
20463       for (uint32_t n = 1; n <= 4; n++) {
20464         for (uint32_t m = 1; m <= 1; m++) {
20465           GemmMicrokernelTester()
20466             .mr(1)
20467             .nr(4)
20468             .kr(2)
20469             .sr(1)
20470             .m(m)
20471             .n(n)
20472             .k(k)
20473             .ks(3)
20474             .iterations(1)
20475             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476         }
20477       }
20478     }
20479   }
20480 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_small_kernel)20481   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
20482     TEST_REQUIRES_X86_SSE41;
20483     for (uint32_t n = 5; n < 8; n++) {
20484       for (size_t k = 1; k <= 40; k += 9) {
20485         GemmMicrokernelTester()
20486           .mr(1)
20487           .nr(4)
20488           .kr(2)
20489           .sr(1)
20490           .m(1)
20491           .n(n)
20492           .k(k)
20493           .ks(3)
20494           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495       }
20496     }
20497   }
20498 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_small_kernel)20499   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
20500     TEST_REQUIRES_X86_SSE41;
20501     for (uint32_t n = 8; n <= 12; n += 4) {
20502       for (size_t k = 1; k <= 40; k += 9) {
20503         GemmMicrokernelTester()
20504           .mr(1)
20505           .nr(4)
20506           .kr(2)
20507           .sr(1)
20508           .m(1)
20509           .n(n)
20510           .k(k)
20511           .ks(3)
20512           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513       }
20514     }
20515   }
20516 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)20517   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
20518     TEST_REQUIRES_X86_SSE41;
20519     for (size_t k = 1; k <= 40; k += 9) {
20520       for (uint32_t n = 1; n <= 4; n++) {
20521         for (uint32_t m = 1; m <= 1; m++) {
20522           GemmMicrokernelTester()
20523             .mr(1)
20524             .nr(4)
20525             .kr(2)
20526             .sr(1)
20527             .m(m)
20528             .n(n)
20529             .k(k)
20530             .cm_stride(7)
20531             .iterations(1)
20532             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533         }
20534       }
20535     }
20536   }
20537 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,a_offset)20538   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
20539     TEST_REQUIRES_X86_SSE41;
20540     for (size_t k = 1; k <= 40; k += 9) {
20541       GemmMicrokernelTester()
20542         .mr(1)
20543         .nr(4)
20544         .kr(2)
20545         .sr(1)
20546         .m(1)
20547         .n(4)
20548         .k(k)
20549         .ks(3)
20550         .a_offset(43)
20551         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552     }
20553   }
20554 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,zero)20555   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
20556     TEST_REQUIRES_X86_SSE41;
20557     for (size_t k = 1; k <= 40; k += 9) {
20558       for (uint32_t mz = 0; mz < 1; mz++) {
20559         GemmMicrokernelTester()
20560           .mr(1)
20561           .nr(4)
20562           .kr(2)
20563           .sr(1)
20564           .m(1)
20565           .n(4)
20566           .k(k)
20567           .ks(3)
20568           .a_offset(43)
20569           .zero_index(mz)
20570           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571       }
20572     }
20573   }
20574 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)20575   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
20576     TEST_REQUIRES_X86_SSE41;
20577     GemmMicrokernelTester()
20578       .mr(1)
20579       .nr(4)
20580       .kr(2)
20581       .sr(1)
20582       .m(1)
20583       .n(4)
20584       .k(8)
20585       .qmin(128)
20586       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587   }
20588 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)20589   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
20590     TEST_REQUIRES_X86_SSE41;
20591     GemmMicrokernelTester()
20592       .mr(1)
20593       .nr(4)
20594       .kr(2)
20595       .sr(1)
20596       .m(1)
20597       .n(4)
20598       .k(8)
20599       .qmax(128)
20600       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601   }
20602 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)20603   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
20604     TEST_REQUIRES_X86_SSE41;
20605     GemmMicrokernelTester()
20606       .mr(1)
20607       .nr(4)
20608       .kr(2)
20609       .sr(1)
20610       .m(1)
20611       .n(4)
20612       .k(8)
20613       .cm_stride(7)
20614       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615   }
20616 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617 
20618 
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)20620   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
20621     TEST_REQUIRES_X86_SSE2;
20622     GemmMicrokernelTester()
20623       .mr(2)
20624       .nr(4)
20625       .kr(2)
20626       .sr(1)
20627       .m(2)
20628       .n(4)
20629       .k(8)
20630       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20631   }
20632 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)20633   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
20634     TEST_REQUIRES_X86_SSE2;
20635     GemmMicrokernelTester()
20636       .mr(2)
20637       .nr(4)
20638       .kr(2)
20639       .sr(1)
20640       .m(2)
20641       .n(4)
20642       .k(8)
20643       .cn_stride(7)
20644       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20645   }
20646 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)20647   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
20648     TEST_REQUIRES_X86_SSE2;
20649     for (uint32_t n = 1; n <= 4; n++) {
20650       for (uint32_t m = 1; m <= 2; m++) {
20651         GemmMicrokernelTester()
20652           .mr(2)
20653           .nr(4)
20654           .kr(2)
20655           .sr(1)
20656           .m(m)
20657           .n(n)
20658           .k(8)
20659           .iterations(1)
20660           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20661       }
20662     }
20663   }
20664 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)20665   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
20666     TEST_REQUIRES_X86_SSE2;
20667     for (uint32_t m = 1; m <= 2; m++) {
20668       GemmMicrokernelTester()
20669         .mr(2)
20670         .nr(4)
20671         .kr(2)
20672         .sr(1)
20673         .m(m)
20674         .n(4)
20675         .k(8)
20676         .iterations(1)
20677         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20678     }
20679   }
20680 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)20681   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
20682     TEST_REQUIRES_X86_SSE2;
20683     for (uint32_t n = 1; n <= 4; n++) {
20684       GemmMicrokernelTester()
20685         .mr(2)
20686         .nr(4)
20687         .kr(2)
20688         .sr(1)
20689         .m(2)
20690         .n(n)
20691         .k(8)
20692         .iterations(1)
20693         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20694     }
20695   }
20696 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)20697   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
20698     TEST_REQUIRES_X86_SSE2;
20699     for (size_t k = 1; k < 8; k++) {
20700       GemmMicrokernelTester()
20701         .mr(2)
20702         .nr(4)
20703         .kr(2)
20704         .sr(1)
20705         .m(2)
20706         .n(4)
20707         .k(k)
20708         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20709     }
20710   }
20711 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)20712   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
20713     TEST_REQUIRES_X86_SSE2;
20714     for (size_t k = 1; k < 8; k++) {
20715       for (uint32_t n = 1; n <= 4; n++) {
20716         for (uint32_t m = 1; m <= 2; m++) {
20717           GemmMicrokernelTester()
20718             .mr(2)
20719             .nr(4)
20720             .kr(2)
20721             .sr(1)
20722             .m(m)
20723             .n(n)
20724             .k(k)
20725             .iterations(1)
20726             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20727         }
20728       }
20729     }
20730   }
20731 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)20732   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
20733     TEST_REQUIRES_X86_SSE2;
20734     for (size_t k = 9; k < 16; k++) {
20735       GemmMicrokernelTester()
20736         .mr(2)
20737         .nr(4)
20738         .kr(2)
20739         .sr(1)
20740         .m(2)
20741         .n(4)
20742         .k(k)
20743         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20744     }
20745   }
20746 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)20747   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
20748     TEST_REQUIRES_X86_SSE2;
20749     for (size_t k = 9; k < 16; k++) {
20750       for (uint32_t n = 1; n <= 4; n++) {
20751         for (uint32_t m = 1; m <= 2; m++) {
20752           GemmMicrokernelTester()
20753             .mr(2)
20754             .nr(4)
20755             .kr(2)
20756             .sr(1)
20757             .m(m)
20758             .n(n)
20759             .k(k)
20760             .iterations(1)
20761             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20762         }
20763       }
20764     }
20765   }
20766 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)20767   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
20768     TEST_REQUIRES_X86_SSE2;
20769     for (size_t k = 16; k <= 80; k += 8) {
20770       GemmMicrokernelTester()
20771         .mr(2)
20772         .nr(4)
20773         .kr(2)
20774         .sr(1)
20775         .m(2)
20776         .n(4)
20777         .k(k)
20778         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20779     }
20780   }
20781 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)20782   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
20783     TEST_REQUIRES_X86_SSE2;
20784     for (size_t k = 16; k <= 80; k += 8) {
20785       for (uint32_t n = 1; n <= 4; n++) {
20786         for (uint32_t m = 1; m <= 2; m++) {
20787           GemmMicrokernelTester()
20788             .mr(2)
20789             .nr(4)
20790             .kr(2)
20791             .sr(1)
20792             .m(m)
20793             .n(n)
20794             .k(k)
20795             .iterations(1)
20796             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20797         }
20798       }
20799     }
20800   }
20801 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)20802   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
20803     TEST_REQUIRES_X86_SSE2;
20804     for (uint32_t n = 5; n < 8; n++) {
20805       for (size_t k = 1; k <= 40; k += 9) {
20806         GemmMicrokernelTester()
20807           .mr(2)
20808           .nr(4)
20809           .kr(2)
20810           .sr(1)
20811           .m(2)
20812           .n(n)
20813           .k(k)
20814           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20815       }
20816     }
20817   }
20818 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)20819   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
20820     TEST_REQUIRES_X86_SSE2;
20821     for (uint32_t n = 5; n < 8; n++) {
20822       for (size_t k = 1; k <= 40; k += 9) {
20823         GemmMicrokernelTester()
20824           .mr(2)
20825           .nr(4)
20826           .kr(2)
20827           .sr(1)
20828           .m(2)
20829           .n(n)
20830           .k(k)
20831           .cn_stride(7)
20832           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20833       }
20834     }
20835   }
20836 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)20837   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
20838     TEST_REQUIRES_X86_SSE2;
20839     for (uint32_t n = 5; n < 8; n++) {
20840       for (size_t k = 1; k <= 40; k += 9) {
20841         for (uint32_t m = 1; m <= 2; m++) {
20842           GemmMicrokernelTester()
20843             .mr(2)
20844             .nr(4)
20845             .kr(2)
20846             .sr(1)
20847             .m(m)
20848             .n(n)
20849             .k(k)
20850             .iterations(1)
20851             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20852         }
20853       }
20854     }
20855   }
20856 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)20857   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
20858     TEST_REQUIRES_X86_SSE2;
20859     for (uint32_t n = 8; n <= 12; n += 4) {
20860       for (size_t k = 1; k <= 40; k += 9) {
20861         GemmMicrokernelTester()
20862           .mr(2)
20863           .nr(4)
20864           .kr(2)
20865           .sr(1)
20866           .m(2)
20867           .n(n)
20868           .k(k)
20869           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20870       }
20871     }
20872   }
20873 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)20874   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
20875     TEST_REQUIRES_X86_SSE2;
20876     for (uint32_t n = 8; n <= 12; n += 4) {
20877       for (size_t k = 1; k <= 40; k += 9) {
20878         GemmMicrokernelTester()
20879           .mr(2)
20880           .nr(4)
20881           .kr(2)
20882           .sr(1)
20883           .m(2)
20884           .n(n)
20885           .k(k)
20886           .cn_stride(7)
20887           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20888       }
20889     }
20890   }
20891 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)20892   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
20893     TEST_REQUIRES_X86_SSE2;
20894     for (uint32_t n = 8; n <= 12; n += 4) {
20895       for (size_t k = 1; k <= 40; k += 9) {
20896         for (uint32_t m = 1; m <= 2; m++) {
20897           GemmMicrokernelTester()
20898             .mr(2)
20899             .nr(4)
20900             .kr(2)
20901             .sr(1)
20902             .m(m)
20903             .n(n)
20904             .k(k)
20905             .iterations(1)
20906             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20907         }
20908       }
20909     }
20910   }
20911 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel)20912   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
20913     TEST_REQUIRES_X86_SSE2;
20914     for (size_t k = 1; k <= 40; k += 9) {
20915       GemmMicrokernelTester()
20916         .mr(2)
20917         .nr(4)
20918         .kr(2)
20919         .sr(1)
20920         .m(2)
20921         .n(4)
20922         .k(k)
20923         .ks(3)
20924         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20925     }
20926   }
20927 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel_subtile)20928   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
20929     TEST_REQUIRES_X86_SSE2;
20930     for (size_t k = 1; k <= 40; k += 9) {
20931       for (uint32_t n = 1; n <= 4; n++) {
20932         for (uint32_t m = 1; m <= 2; m++) {
20933           GemmMicrokernelTester()
20934             .mr(2)
20935             .nr(4)
20936             .kr(2)
20937             .sr(1)
20938             .m(m)
20939             .n(n)
20940             .k(k)
20941             .ks(3)
20942             .iterations(1)
20943             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20944         }
20945       }
20946     }
20947   }
20948 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_small_kernel)20949   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
20950     TEST_REQUIRES_X86_SSE2;
20951     for (uint32_t n = 5; n < 8; n++) {
20952       for (size_t k = 1; k <= 40; k += 9) {
20953         GemmMicrokernelTester()
20954           .mr(2)
20955           .nr(4)
20956           .kr(2)
20957           .sr(1)
20958           .m(2)
20959           .n(n)
20960           .k(k)
20961           .ks(3)
20962           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20963       }
20964     }
20965   }
20966 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_small_kernel)20967   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
20968     TEST_REQUIRES_X86_SSE2;
20969     for (uint32_t n = 8; n <= 12; n += 4) {
20970       for (size_t k = 1; k <= 40; k += 9) {
20971         GemmMicrokernelTester()
20972           .mr(2)
20973           .nr(4)
20974           .kr(2)
20975           .sr(1)
20976           .m(2)
20977           .n(n)
20978           .k(k)
20979           .ks(3)
20980           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20981       }
20982     }
20983   }
20984 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)20985   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
20986     TEST_REQUIRES_X86_SSE2;
20987     for (size_t k = 1; k <= 40; k += 9) {
20988       for (uint32_t n = 1; n <= 4; n++) {
20989         for (uint32_t m = 1; m <= 2; m++) {
20990           GemmMicrokernelTester()
20991             .mr(2)
20992             .nr(4)
20993             .kr(2)
20994             .sr(1)
20995             .m(m)
20996             .n(n)
20997             .k(k)
20998             .cm_stride(7)
20999             .iterations(1)
21000             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21001         }
21002       }
21003     }
21004   }
21005 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,a_offset)21006   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
21007     TEST_REQUIRES_X86_SSE2;
21008     for (size_t k = 1; k <= 40; k += 9) {
21009       GemmMicrokernelTester()
21010         .mr(2)
21011         .nr(4)
21012         .kr(2)
21013         .sr(1)
21014         .m(2)
21015         .n(4)
21016         .k(k)
21017         .ks(3)
21018         .a_offset(83)
21019         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21020     }
21021   }
21022 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,zero)21023   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
21024     TEST_REQUIRES_X86_SSE2;
21025     for (size_t k = 1; k <= 40; k += 9) {
21026       for (uint32_t mz = 0; mz < 2; mz++) {
21027         GemmMicrokernelTester()
21028           .mr(2)
21029           .nr(4)
21030           .kr(2)
21031           .sr(1)
21032           .m(2)
21033           .n(4)
21034           .k(k)
21035           .ks(3)
21036           .a_offset(83)
21037           .zero_index(mz)
21038           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21039       }
21040     }
21041   }
21042 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)21043   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
21044     TEST_REQUIRES_X86_SSE2;
21045     GemmMicrokernelTester()
21046       .mr(2)
21047       .nr(4)
21048       .kr(2)
21049       .sr(1)
21050       .m(2)
21051       .n(4)
21052       .k(8)
21053       .qmin(128)
21054       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21055   }
21056 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)21057   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
21058     TEST_REQUIRES_X86_SSE2;
21059     GemmMicrokernelTester()
21060       .mr(2)
21061       .nr(4)
21062       .kr(2)
21063       .sr(1)
21064       .m(2)
21065       .n(4)
21066       .k(8)
21067       .qmax(128)
21068       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21069   }
21070 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)21071   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
21072     TEST_REQUIRES_X86_SSE2;
21073     GemmMicrokernelTester()
21074       .mr(2)
21075       .nr(4)
21076       .kr(2)
21077       .sr(1)
21078       .m(2)
21079       .n(4)
21080       .k(8)
21081       .cm_stride(7)
21082       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21083   }
21084 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085 
21086 
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)21088   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
21089     TEST_REQUIRES_X86_SSE41;
21090     GemmMicrokernelTester()
21091       .mr(2)
21092       .nr(4)
21093       .kr(2)
21094       .sr(1)
21095       .m(2)
21096       .n(4)
21097       .k(8)
21098       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21099   }
21100 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)21101   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
21102     TEST_REQUIRES_X86_SSE41;
21103     GemmMicrokernelTester()
21104       .mr(2)
21105       .nr(4)
21106       .kr(2)
21107       .sr(1)
21108       .m(2)
21109       .n(4)
21110       .k(8)
21111       .cn_stride(7)
21112       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21113   }
21114 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)21115   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
21116     TEST_REQUIRES_X86_SSE41;
21117     for (uint32_t n = 1; n <= 4; n++) {
21118       for (uint32_t m = 1; m <= 2; m++) {
21119         GemmMicrokernelTester()
21120           .mr(2)
21121           .nr(4)
21122           .kr(2)
21123           .sr(1)
21124           .m(m)
21125           .n(n)
21126           .k(8)
21127           .iterations(1)
21128           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129       }
21130     }
21131   }
21132 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)21133   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
21134     TEST_REQUIRES_X86_SSE41;
21135     for (uint32_t m = 1; m <= 2; m++) {
21136       GemmMicrokernelTester()
21137         .mr(2)
21138         .nr(4)
21139         .kr(2)
21140         .sr(1)
21141         .m(m)
21142         .n(4)
21143         .k(8)
21144         .iterations(1)
21145         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21146     }
21147   }
21148 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)21149   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
21150     TEST_REQUIRES_X86_SSE41;
21151     for (uint32_t n = 1; n <= 4; n++) {
21152       GemmMicrokernelTester()
21153         .mr(2)
21154         .nr(4)
21155         .kr(2)
21156         .sr(1)
21157         .m(2)
21158         .n(n)
21159         .k(8)
21160         .iterations(1)
21161         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21162     }
21163   }
21164 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)21165   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
21166     TEST_REQUIRES_X86_SSE41;
21167     for (size_t k = 1; k < 8; k++) {
21168       GemmMicrokernelTester()
21169         .mr(2)
21170         .nr(4)
21171         .kr(2)
21172         .sr(1)
21173         .m(2)
21174         .n(4)
21175         .k(k)
21176         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21177     }
21178   }
21179 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)21180   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
21181     TEST_REQUIRES_X86_SSE41;
21182     for (size_t k = 1; k < 8; k++) {
21183       for (uint32_t n = 1; n <= 4; n++) {
21184         for (uint32_t m = 1; m <= 2; m++) {
21185           GemmMicrokernelTester()
21186             .mr(2)
21187             .nr(4)
21188             .kr(2)
21189             .sr(1)
21190             .m(m)
21191             .n(n)
21192             .k(k)
21193             .iterations(1)
21194             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195         }
21196       }
21197     }
21198   }
21199 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)21200   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
21201     TEST_REQUIRES_X86_SSE41;
21202     for (size_t k = 9; k < 16; k++) {
21203       GemmMicrokernelTester()
21204         .mr(2)
21205         .nr(4)
21206         .kr(2)
21207         .sr(1)
21208         .m(2)
21209         .n(4)
21210         .k(k)
21211         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21212     }
21213   }
21214 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)21215   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
21216     TEST_REQUIRES_X86_SSE41;
21217     for (size_t k = 9; k < 16; k++) {
21218       for (uint32_t n = 1; n <= 4; n++) {
21219         for (uint32_t m = 1; m <= 2; m++) {
21220           GemmMicrokernelTester()
21221             .mr(2)
21222             .nr(4)
21223             .kr(2)
21224             .sr(1)
21225             .m(m)
21226             .n(n)
21227             .k(k)
21228             .iterations(1)
21229             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21230         }
21231       }
21232     }
21233   }
21234 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)21235   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
21236     TEST_REQUIRES_X86_SSE41;
21237     for (size_t k = 16; k <= 80; k += 8) {
21238       GemmMicrokernelTester()
21239         .mr(2)
21240         .nr(4)
21241         .kr(2)
21242         .sr(1)
21243         .m(2)
21244         .n(4)
21245         .k(k)
21246         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21247     }
21248   }
21249 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)21250   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
21251     TEST_REQUIRES_X86_SSE41;
21252     for (size_t k = 16; k <= 80; k += 8) {
21253       for (uint32_t n = 1; n <= 4; n++) {
21254         for (uint32_t m = 1; m <= 2; m++) {
21255           GemmMicrokernelTester()
21256             .mr(2)
21257             .nr(4)
21258             .kr(2)
21259             .sr(1)
21260             .m(m)
21261             .n(n)
21262             .k(k)
21263             .iterations(1)
21264             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21265         }
21266       }
21267     }
21268   }
21269 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)21270   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
21271     TEST_REQUIRES_X86_SSE41;
21272     for (uint32_t n = 5; n < 8; n++) {
21273       for (size_t k = 1; k <= 40; k += 9) {
21274         GemmMicrokernelTester()
21275           .mr(2)
21276           .nr(4)
21277           .kr(2)
21278           .sr(1)
21279           .m(2)
21280           .n(n)
21281           .k(k)
21282           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21283       }
21284     }
21285   }
21286 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)21287   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
21288     TEST_REQUIRES_X86_SSE41;
21289     for (uint32_t n = 5; n < 8; n++) {
21290       for (size_t k = 1; k <= 40; k += 9) {
21291         GemmMicrokernelTester()
21292           .mr(2)
21293           .nr(4)
21294           .kr(2)
21295           .sr(1)
21296           .m(2)
21297           .n(n)
21298           .k(k)
21299           .cn_stride(7)
21300           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21301       }
21302     }
21303   }
21304 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)21305   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
21306     TEST_REQUIRES_X86_SSE41;
21307     for (uint32_t n = 5; n < 8; n++) {
21308       for (size_t k = 1; k <= 40; k += 9) {
21309         for (uint32_t m = 1; m <= 2; m++) {
21310           GemmMicrokernelTester()
21311             .mr(2)
21312             .nr(4)
21313             .kr(2)
21314             .sr(1)
21315             .m(m)
21316             .n(n)
21317             .k(k)
21318             .iterations(1)
21319             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21320         }
21321       }
21322     }
21323   }
21324 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)21325   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
21326     TEST_REQUIRES_X86_SSE41;
21327     for (uint32_t n = 8; n <= 12; n += 4) {
21328       for (size_t k = 1; k <= 40; k += 9) {
21329         GemmMicrokernelTester()
21330           .mr(2)
21331           .nr(4)
21332           .kr(2)
21333           .sr(1)
21334           .m(2)
21335           .n(n)
21336           .k(k)
21337           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21338       }
21339     }
21340   }
21341 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)21342   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
21343     TEST_REQUIRES_X86_SSE41;
21344     for (uint32_t n = 8; n <= 12; n += 4) {
21345       for (size_t k = 1; k <= 40; k += 9) {
21346         GemmMicrokernelTester()
21347           .mr(2)
21348           .nr(4)
21349           .kr(2)
21350           .sr(1)
21351           .m(2)
21352           .n(n)
21353           .k(k)
21354           .cn_stride(7)
21355           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21356       }
21357     }
21358   }
21359 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)21360   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
21361     TEST_REQUIRES_X86_SSE41;
21362     for (uint32_t n = 8; n <= 12; n += 4) {
21363       for (size_t k = 1; k <= 40; k += 9) {
21364         for (uint32_t m = 1; m <= 2; m++) {
21365           GemmMicrokernelTester()
21366             .mr(2)
21367             .nr(4)
21368             .kr(2)
21369             .sr(1)
21370             .m(m)
21371             .n(n)
21372             .k(k)
21373             .iterations(1)
21374             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21375         }
21376       }
21377     }
21378   }
21379 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel)21380   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
21381     TEST_REQUIRES_X86_SSE41;
21382     for (size_t k = 1; k <= 40; k += 9) {
21383       GemmMicrokernelTester()
21384         .mr(2)
21385         .nr(4)
21386         .kr(2)
21387         .sr(1)
21388         .m(2)
21389         .n(4)
21390         .k(k)
21391         .ks(3)
21392         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393     }
21394   }
21395 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel_subtile)21396   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
21397     TEST_REQUIRES_X86_SSE41;
21398     for (size_t k = 1; k <= 40; k += 9) {
21399       for (uint32_t n = 1; n <= 4; n++) {
21400         for (uint32_t m = 1; m <= 2; m++) {
21401           GemmMicrokernelTester()
21402             .mr(2)
21403             .nr(4)
21404             .kr(2)
21405             .sr(1)
21406             .m(m)
21407             .n(n)
21408             .k(k)
21409             .ks(3)
21410             .iterations(1)
21411             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21412         }
21413       }
21414     }
21415   }
21416 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_small_kernel)21417   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
21418     TEST_REQUIRES_X86_SSE41;
21419     for (uint32_t n = 5; n < 8; n++) {
21420       for (size_t k = 1; k <= 40; k += 9) {
21421         GemmMicrokernelTester()
21422           .mr(2)
21423           .nr(4)
21424           .kr(2)
21425           .sr(1)
21426           .m(2)
21427           .n(n)
21428           .k(k)
21429           .ks(3)
21430           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21431       }
21432     }
21433   }
21434 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_small_kernel)21435   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
21436     TEST_REQUIRES_X86_SSE41;
21437     for (uint32_t n = 8; n <= 12; n += 4) {
21438       for (size_t k = 1; k <= 40; k += 9) {
21439         GemmMicrokernelTester()
21440           .mr(2)
21441           .nr(4)
21442           .kr(2)
21443           .sr(1)
21444           .m(2)
21445           .n(n)
21446           .k(k)
21447           .ks(3)
21448           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21449       }
21450     }
21451   }
21452 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)21453   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
21454     TEST_REQUIRES_X86_SSE41;
21455     for (size_t k = 1; k <= 40; k += 9) {
21456       for (uint32_t n = 1; n <= 4; n++) {
21457         for (uint32_t m = 1; m <= 2; m++) {
21458           GemmMicrokernelTester()
21459             .mr(2)
21460             .nr(4)
21461             .kr(2)
21462             .sr(1)
21463             .m(m)
21464             .n(n)
21465             .k(k)
21466             .cm_stride(7)
21467             .iterations(1)
21468             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21469         }
21470       }
21471     }
21472   }
21473 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,a_offset)21474   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
21475     TEST_REQUIRES_X86_SSE41;
21476     for (size_t k = 1; k <= 40; k += 9) {
21477       GemmMicrokernelTester()
21478         .mr(2)
21479         .nr(4)
21480         .kr(2)
21481         .sr(1)
21482         .m(2)
21483         .n(4)
21484         .k(k)
21485         .ks(3)
21486         .a_offset(83)
21487         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21488     }
21489   }
21490 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,zero)21491   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
21492     TEST_REQUIRES_X86_SSE41;
21493     for (size_t k = 1; k <= 40; k += 9) {
21494       for (uint32_t mz = 0; mz < 2; mz++) {
21495         GemmMicrokernelTester()
21496           .mr(2)
21497           .nr(4)
21498           .kr(2)
21499           .sr(1)
21500           .m(2)
21501           .n(4)
21502           .k(k)
21503           .ks(3)
21504           .a_offset(83)
21505           .zero_index(mz)
21506           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21507       }
21508     }
21509   }
21510 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)21511   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
21512     TEST_REQUIRES_X86_SSE41;
21513     GemmMicrokernelTester()
21514       .mr(2)
21515       .nr(4)
21516       .kr(2)
21517       .sr(1)
21518       .m(2)
21519       .n(4)
21520       .k(8)
21521       .qmin(128)
21522       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21523   }
21524 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)21525   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
21526     TEST_REQUIRES_X86_SSE41;
21527     GemmMicrokernelTester()
21528       .mr(2)
21529       .nr(4)
21530       .kr(2)
21531       .sr(1)
21532       .m(2)
21533       .n(4)
21534       .k(8)
21535       .qmax(128)
21536       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21537   }
21538 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)21539   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
21540     TEST_REQUIRES_X86_SSE41;
21541     GemmMicrokernelTester()
21542       .mr(2)
21543       .nr(4)
21544       .kr(2)
21545       .sr(1)
21546       .m(2)
21547       .n(4)
21548       .k(8)
21549       .cm_stride(7)
21550       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21551   }
21552 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553 
21554 
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)21556   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
21557     TEST_REQUIRES_X86_SSE2;
21558     GemmMicrokernelTester()
21559       .mr(4)
21560       .nr(4)
21561       .kr(2)
21562       .sr(1)
21563       .m(4)
21564       .n(4)
21565       .k(8)
21566       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21567   }
21568 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)21569   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
21570     TEST_REQUIRES_X86_SSE2;
21571     GemmMicrokernelTester()
21572       .mr(4)
21573       .nr(4)
21574       .kr(2)
21575       .sr(1)
21576       .m(4)
21577       .n(4)
21578       .k(8)
21579       .cn_stride(7)
21580       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21581   }
21582 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)21583   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
21584     TEST_REQUIRES_X86_SSE2;
21585     for (uint32_t n = 1; n <= 4; n++) {
21586       for (uint32_t m = 1; m <= 4; m++) {
21587         GemmMicrokernelTester()
21588           .mr(4)
21589           .nr(4)
21590           .kr(2)
21591           .sr(1)
21592           .m(m)
21593           .n(n)
21594           .k(8)
21595           .iterations(1)
21596           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21597       }
21598     }
21599   }
21600 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)21601   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21602     TEST_REQUIRES_X86_SSE2;
21603     for (uint32_t m = 1; m <= 4; m++) {
21604       GemmMicrokernelTester()
21605         .mr(4)
21606         .nr(4)
21607         .kr(2)
21608         .sr(1)
21609         .m(m)
21610         .n(4)
21611         .k(8)
21612         .iterations(1)
21613         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21614     }
21615   }
21616 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)21617   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21618     TEST_REQUIRES_X86_SSE2;
21619     for (uint32_t n = 1; n <= 4; n++) {
21620       GemmMicrokernelTester()
21621         .mr(4)
21622         .nr(4)
21623         .kr(2)
21624         .sr(1)
21625         .m(4)
21626         .n(n)
21627         .k(8)
21628         .iterations(1)
21629         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21630     }
21631   }
21632 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)21633   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
21634     TEST_REQUIRES_X86_SSE2;
21635     for (size_t k = 1; k < 8; k++) {
21636       GemmMicrokernelTester()
21637         .mr(4)
21638         .nr(4)
21639         .kr(2)
21640         .sr(1)
21641         .m(4)
21642         .n(4)
21643         .k(k)
21644         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21645     }
21646   }
21647 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)21648   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
21649     TEST_REQUIRES_X86_SSE2;
21650     for (size_t k = 1; k < 8; k++) {
21651       for (uint32_t n = 1; n <= 4; n++) {
21652         for (uint32_t m = 1; m <= 4; m++) {
21653           GemmMicrokernelTester()
21654             .mr(4)
21655             .nr(4)
21656             .kr(2)
21657             .sr(1)
21658             .m(m)
21659             .n(n)
21660             .k(k)
21661             .iterations(1)
21662             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21663         }
21664       }
21665     }
21666   }
21667 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)21668   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
21669     TEST_REQUIRES_X86_SSE2;
21670     for (size_t k = 9; k < 16; k++) {
21671       GemmMicrokernelTester()
21672         .mr(4)
21673         .nr(4)
21674         .kr(2)
21675         .sr(1)
21676         .m(4)
21677         .n(4)
21678         .k(k)
21679         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21680     }
21681   }
21682 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)21683   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
21684     TEST_REQUIRES_X86_SSE2;
21685     for (size_t k = 9; k < 16; k++) {
21686       for (uint32_t n = 1; n <= 4; n++) {
21687         for (uint32_t m = 1; m <= 4; m++) {
21688           GemmMicrokernelTester()
21689             .mr(4)
21690             .nr(4)
21691             .kr(2)
21692             .sr(1)
21693             .m(m)
21694             .n(n)
21695             .k(k)
21696             .iterations(1)
21697             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21698         }
21699       }
21700     }
21701   }
21702 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)21703   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
21704     TEST_REQUIRES_X86_SSE2;
21705     for (size_t k = 16; k <= 80; k += 8) {
21706       GemmMicrokernelTester()
21707         .mr(4)
21708         .nr(4)
21709         .kr(2)
21710         .sr(1)
21711         .m(4)
21712         .n(4)
21713         .k(k)
21714         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21715     }
21716   }
21717 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)21718   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
21719     TEST_REQUIRES_X86_SSE2;
21720     for (size_t k = 16; k <= 80; k += 8) {
21721       for (uint32_t n = 1; n <= 4; n++) {
21722         for (uint32_t m = 1; m <= 4; m++) {
21723           GemmMicrokernelTester()
21724             .mr(4)
21725             .nr(4)
21726             .kr(2)
21727             .sr(1)
21728             .m(m)
21729             .n(n)
21730             .k(k)
21731             .iterations(1)
21732             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21733         }
21734       }
21735     }
21736   }
21737 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)21738   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
21739     TEST_REQUIRES_X86_SSE2;
21740     for (uint32_t n = 5; n < 8; n++) {
21741       for (size_t k = 1; k <= 40; k += 9) {
21742         GemmMicrokernelTester()
21743           .mr(4)
21744           .nr(4)
21745           .kr(2)
21746           .sr(1)
21747           .m(4)
21748           .n(n)
21749           .k(k)
21750           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21751       }
21752     }
21753   }
21754 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)21755   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21756     TEST_REQUIRES_X86_SSE2;
21757     for (uint32_t n = 5; n < 8; n++) {
21758       for (size_t k = 1; k <= 40; k += 9) {
21759         GemmMicrokernelTester()
21760           .mr(4)
21761           .nr(4)
21762           .kr(2)
21763           .sr(1)
21764           .m(4)
21765           .n(n)
21766           .k(k)
21767           .cn_stride(7)
21768           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21769       }
21770     }
21771   }
21772 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)21773   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
21774     TEST_REQUIRES_X86_SSE2;
21775     for (uint32_t n = 5; n < 8; n++) {
21776       for (size_t k = 1; k <= 40; k += 9) {
21777         for (uint32_t m = 1; m <= 4; m++) {
21778           GemmMicrokernelTester()
21779             .mr(4)
21780             .nr(4)
21781             .kr(2)
21782             .sr(1)
21783             .m(m)
21784             .n(n)
21785             .k(k)
21786             .iterations(1)
21787             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21788         }
21789       }
21790     }
21791   }
21792 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)21793   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
21794     TEST_REQUIRES_X86_SSE2;
21795     for (uint32_t n = 8; n <= 12; n += 4) {
21796       for (size_t k = 1; k <= 40; k += 9) {
21797         GemmMicrokernelTester()
21798           .mr(4)
21799           .nr(4)
21800           .kr(2)
21801           .sr(1)
21802           .m(4)
21803           .n(n)
21804           .k(k)
21805           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21806       }
21807     }
21808   }
21809 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)21810   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
21811     TEST_REQUIRES_X86_SSE2;
21812     for (uint32_t n = 8; n <= 12; n += 4) {
21813       for (size_t k = 1; k <= 40; k += 9) {
21814         GemmMicrokernelTester()
21815           .mr(4)
21816           .nr(4)
21817           .kr(2)
21818           .sr(1)
21819           .m(4)
21820           .n(n)
21821           .k(k)
21822           .cn_stride(7)
21823           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21824       }
21825     }
21826   }
21827 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)21828   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
21829     TEST_REQUIRES_X86_SSE2;
21830     for (uint32_t n = 8; n <= 12; n += 4) {
21831       for (size_t k = 1; k <= 40; k += 9) {
21832         for (uint32_t m = 1; m <= 4; m++) {
21833           GemmMicrokernelTester()
21834             .mr(4)
21835             .nr(4)
21836             .kr(2)
21837             .sr(1)
21838             .m(m)
21839             .n(n)
21840             .k(k)
21841             .iterations(1)
21842             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21843         }
21844       }
21845     }
21846   }
21847 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)21848   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
21849     TEST_REQUIRES_X86_SSE2;
21850     for (size_t k = 1; k <= 40; k += 9) {
21851       GemmMicrokernelTester()
21852         .mr(4)
21853         .nr(4)
21854         .kr(2)
21855         .sr(1)
21856         .m(4)
21857         .n(4)
21858         .k(k)
21859         .ks(3)
21860         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21861     }
21862   }
21863 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)21864   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
21865     TEST_REQUIRES_X86_SSE2;
21866     for (size_t k = 1; k <= 40; k += 9) {
21867       for (uint32_t n = 1; n <= 4; n++) {
21868         for (uint32_t m = 1; m <= 4; m++) {
21869           GemmMicrokernelTester()
21870             .mr(4)
21871             .nr(4)
21872             .kr(2)
21873             .sr(1)
21874             .m(m)
21875             .n(n)
21876             .k(k)
21877             .ks(3)
21878             .iterations(1)
21879             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21880         }
21881       }
21882     }
21883   }
21884 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)21885   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21886     TEST_REQUIRES_X86_SSE2;
21887     for (uint32_t n = 5; n < 8; n++) {
21888       for (size_t k = 1; k <= 40; k += 9) {
21889         GemmMicrokernelTester()
21890           .mr(4)
21891           .nr(4)
21892           .kr(2)
21893           .sr(1)
21894           .m(4)
21895           .n(n)
21896           .k(k)
21897           .ks(3)
21898           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21899       }
21900     }
21901   }
21902 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)21903   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
21904     TEST_REQUIRES_X86_SSE2;
21905     for (uint32_t n = 8; n <= 12; n += 4) {
21906       for (size_t k = 1; k <= 40; k += 9) {
21907         GemmMicrokernelTester()
21908           .mr(4)
21909           .nr(4)
21910           .kr(2)
21911           .sr(1)
21912           .m(4)
21913           .n(n)
21914           .k(k)
21915           .ks(3)
21916           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21917       }
21918     }
21919   }
21920 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)21921   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
21922     TEST_REQUIRES_X86_SSE2;
21923     for (size_t k = 1; k <= 40; k += 9) {
21924       for (uint32_t n = 1; n <= 4; n++) {
21925         for (uint32_t m = 1; m <= 4; m++) {
21926           GemmMicrokernelTester()
21927             .mr(4)
21928             .nr(4)
21929             .kr(2)
21930             .sr(1)
21931             .m(m)
21932             .n(n)
21933             .k(k)
21934             .cm_stride(7)
21935             .iterations(1)
21936             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21937         }
21938       }
21939     }
21940   }
21941 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)21942   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
21943     TEST_REQUIRES_X86_SSE2;
21944     for (size_t k = 1; k <= 40; k += 9) {
21945       GemmMicrokernelTester()
21946         .mr(4)
21947         .nr(4)
21948         .kr(2)
21949         .sr(1)
21950         .m(4)
21951         .n(4)
21952         .k(k)
21953         .ks(3)
21954         .a_offset(163)
21955         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21956     }
21957   }
21958 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)21959   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
21960     TEST_REQUIRES_X86_SSE2;
21961     for (size_t k = 1; k <= 40; k += 9) {
21962       for (uint32_t mz = 0; mz < 4; mz++) {
21963         GemmMicrokernelTester()
21964           .mr(4)
21965           .nr(4)
21966           .kr(2)
21967           .sr(1)
21968           .m(4)
21969           .n(4)
21970           .k(k)
21971           .ks(3)
21972           .a_offset(163)
21973           .zero_index(mz)
21974           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21975       }
21976     }
21977   }
21978 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)21979   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
21980     TEST_REQUIRES_X86_SSE2;
21981     GemmMicrokernelTester()
21982       .mr(4)
21983       .nr(4)
21984       .kr(2)
21985       .sr(1)
21986       .m(4)
21987       .n(4)
21988       .k(8)
21989       .qmin(128)
21990       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21991   }
21992 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)21993   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
21994     TEST_REQUIRES_X86_SSE2;
21995     GemmMicrokernelTester()
21996       .mr(4)
21997       .nr(4)
21998       .kr(2)
21999       .sr(1)
22000       .m(4)
22001       .n(4)
22002       .k(8)
22003       .qmax(128)
22004       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22005   }
22006 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)22007   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
22008     TEST_REQUIRES_X86_SSE2;
22009     GemmMicrokernelTester()
22010       .mr(4)
22011       .nr(4)
22012       .kr(2)
22013       .sr(1)
22014       .m(4)
22015       .n(4)
22016       .k(8)
22017       .cm_stride(7)
22018       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22019   }
22020 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021 
22022 
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)22024   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
22025     TEST_REQUIRES_X86_XOP;
22026     GemmMicrokernelTester()
22027       .mr(1)
22028       .nr(4)
22029       .kr(2)
22030       .sr(1)
22031       .m(1)
22032       .n(4)
22033       .k(8)
22034       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035   }
22036 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)22037   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
22038     TEST_REQUIRES_X86_XOP;
22039     GemmMicrokernelTester()
22040       .mr(1)
22041       .nr(4)
22042       .kr(2)
22043       .sr(1)
22044       .m(1)
22045       .n(4)
22046       .k(8)
22047       .cn_stride(7)
22048       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22049   }
22050 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)22051   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
22052     TEST_REQUIRES_X86_XOP;
22053     for (uint32_t n = 1; n <= 4; n++) {
22054       for (uint32_t m = 1; m <= 1; m++) {
22055         GemmMicrokernelTester()
22056           .mr(1)
22057           .nr(4)
22058           .kr(2)
22059           .sr(1)
22060           .m(m)
22061           .n(n)
22062           .k(8)
22063           .iterations(1)
22064           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22065       }
22066     }
22067   }
22068 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)22069   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
22070     TEST_REQUIRES_X86_XOP;
22071     for (uint32_t m = 1; m <= 1; m++) {
22072       GemmMicrokernelTester()
22073         .mr(1)
22074         .nr(4)
22075         .kr(2)
22076         .sr(1)
22077         .m(m)
22078         .n(4)
22079         .k(8)
22080         .iterations(1)
22081         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22082     }
22083   }
22084 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)22085   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
22086     TEST_REQUIRES_X86_XOP;
22087     for (uint32_t n = 1; n <= 4; n++) {
22088       GemmMicrokernelTester()
22089         .mr(1)
22090         .nr(4)
22091         .kr(2)
22092         .sr(1)
22093         .m(1)
22094         .n(n)
22095         .k(8)
22096         .iterations(1)
22097         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22098     }
22099   }
22100 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)22101   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
22102     TEST_REQUIRES_X86_XOP;
22103     for (size_t k = 1; k < 8; k++) {
22104       GemmMicrokernelTester()
22105         .mr(1)
22106         .nr(4)
22107         .kr(2)
22108         .sr(1)
22109         .m(1)
22110         .n(4)
22111         .k(k)
22112         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22113     }
22114   }
22115 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)22116   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
22117     TEST_REQUIRES_X86_XOP;
22118     for (size_t k = 1; k < 8; k++) {
22119       for (uint32_t n = 1; n <= 4; n++) {
22120         for (uint32_t m = 1; m <= 1; m++) {
22121           GemmMicrokernelTester()
22122             .mr(1)
22123             .nr(4)
22124             .kr(2)
22125             .sr(1)
22126             .m(m)
22127             .n(n)
22128             .k(k)
22129             .iterations(1)
22130             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22131         }
22132       }
22133     }
22134   }
22135 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)22136   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
22137     TEST_REQUIRES_X86_XOP;
22138     for (size_t k = 9; k < 16; k++) {
22139       GemmMicrokernelTester()
22140         .mr(1)
22141         .nr(4)
22142         .kr(2)
22143         .sr(1)
22144         .m(1)
22145         .n(4)
22146         .k(k)
22147         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22148     }
22149   }
22150 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)22151   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
22152     TEST_REQUIRES_X86_XOP;
22153     for (size_t k = 9; k < 16; k++) {
22154       for (uint32_t n = 1; n <= 4; n++) {
22155         for (uint32_t m = 1; m <= 1; m++) {
22156           GemmMicrokernelTester()
22157             .mr(1)
22158             .nr(4)
22159             .kr(2)
22160             .sr(1)
22161             .m(m)
22162             .n(n)
22163             .k(k)
22164             .iterations(1)
22165             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22166         }
22167       }
22168     }
22169   }
22170 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)22171   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
22172     TEST_REQUIRES_X86_XOP;
22173     for (size_t k = 16; k <= 80; k += 8) {
22174       GemmMicrokernelTester()
22175         .mr(1)
22176         .nr(4)
22177         .kr(2)
22178         .sr(1)
22179         .m(1)
22180         .n(4)
22181         .k(k)
22182         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22183     }
22184   }
22185 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)22186   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
22187     TEST_REQUIRES_X86_XOP;
22188     for (size_t k = 16; k <= 80; k += 8) {
22189       for (uint32_t n = 1; n <= 4; n++) {
22190         for (uint32_t m = 1; m <= 1; m++) {
22191           GemmMicrokernelTester()
22192             .mr(1)
22193             .nr(4)
22194             .kr(2)
22195             .sr(1)
22196             .m(m)
22197             .n(n)
22198             .k(k)
22199             .iterations(1)
22200             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22201         }
22202       }
22203     }
22204   }
22205 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)22206   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
22207     TEST_REQUIRES_X86_XOP;
22208     for (uint32_t n = 5; n < 8; n++) {
22209       for (size_t k = 1; k <= 40; k += 9) {
22210         GemmMicrokernelTester()
22211           .mr(1)
22212           .nr(4)
22213           .kr(2)
22214           .sr(1)
22215           .m(1)
22216           .n(n)
22217           .k(k)
22218           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22219       }
22220     }
22221   }
22222 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)22223   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
22224     TEST_REQUIRES_X86_XOP;
22225     for (uint32_t n = 5; n < 8; n++) {
22226       for (size_t k = 1; k <= 40; k += 9) {
22227         GemmMicrokernelTester()
22228           .mr(1)
22229           .nr(4)
22230           .kr(2)
22231           .sr(1)
22232           .m(1)
22233           .n(n)
22234           .k(k)
22235           .cn_stride(7)
22236           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22237       }
22238     }
22239   }
22240 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)22241   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
22242     TEST_REQUIRES_X86_XOP;
22243     for (uint32_t n = 5; n < 8; n++) {
22244       for (size_t k = 1; k <= 40; k += 9) {
22245         for (uint32_t m = 1; m <= 1; m++) {
22246           GemmMicrokernelTester()
22247             .mr(1)
22248             .nr(4)
22249             .kr(2)
22250             .sr(1)
22251             .m(m)
22252             .n(n)
22253             .k(k)
22254             .iterations(1)
22255             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22256         }
22257       }
22258     }
22259   }
22260 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)22261   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
22262     TEST_REQUIRES_X86_XOP;
22263     for (uint32_t n = 8; n <= 12; n += 4) {
22264       for (size_t k = 1; k <= 40; k += 9) {
22265         GemmMicrokernelTester()
22266           .mr(1)
22267           .nr(4)
22268           .kr(2)
22269           .sr(1)
22270           .m(1)
22271           .n(n)
22272           .k(k)
22273           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22274       }
22275     }
22276   }
22277 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)22278   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
22279     TEST_REQUIRES_X86_XOP;
22280     for (uint32_t n = 8; n <= 12; n += 4) {
22281       for (size_t k = 1; k <= 40; k += 9) {
22282         GemmMicrokernelTester()
22283           .mr(1)
22284           .nr(4)
22285           .kr(2)
22286           .sr(1)
22287           .m(1)
22288           .n(n)
22289           .k(k)
22290           .cn_stride(7)
22291           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22292       }
22293     }
22294   }
22295 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)22296   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
22297     TEST_REQUIRES_X86_XOP;
22298     for (uint32_t n = 8; n <= 12; n += 4) {
22299       for (size_t k = 1; k <= 40; k += 9) {
22300         for (uint32_t m = 1; m <= 1; m++) {
22301           GemmMicrokernelTester()
22302             .mr(1)
22303             .nr(4)
22304             .kr(2)
22305             .sr(1)
22306             .m(m)
22307             .n(n)
22308             .k(k)
22309             .iterations(1)
22310             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22311         }
22312       }
22313     }
22314   }
22315 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel)22316   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
22317     TEST_REQUIRES_X86_XOP;
22318     for (size_t k = 1; k <= 40; k += 9) {
22319       GemmMicrokernelTester()
22320         .mr(1)
22321         .nr(4)
22322         .kr(2)
22323         .sr(1)
22324         .m(1)
22325         .n(4)
22326         .k(k)
22327         .ks(3)
22328         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22329     }
22330   }
22331 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel_subtile)22332   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
22333     TEST_REQUIRES_X86_XOP;
22334     for (size_t k = 1; k <= 40; k += 9) {
22335       for (uint32_t n = 1; n <= 4; n++) {
22336         for (uint32_t m = 1; m <= 1; m++) {
22337           GemmMicrokernelTester()
22338             .mr(1)
22339             .nr(4)
22340             .kr(2)
22341             .sr(1)
22342             .m(m)
22343             .n(n)
22344             .k(k)
22345             .ks(3)
22346             .iterations(1)
22347             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22348         }
22349       }
22350     }
22351   }
22352 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_small_kernel)22353   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
22354     TEST_REQUIRES_X86_XOP;
22355     for (uint32_t n = 5; n < 8; n++) {
22356       for (size_t k = 1; k <= 40; k += 9) {
22357         GemmMicrokernelTester()
22358           .mr(1)
22359           .nr(4)
22360           .kr(2)
22361           .sr(1)
22362           .m(1)
22363           .n(n)
22364           .k(k)
22365           .ks(3)
22366           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367       }
22368     }
22369   }
22370 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_small_kernel)22371   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
22372     TEST_REQUIRES_X86_XOP;
22373     for (uint32_t n = 8; n <= 12; n += 4) {
22374       for (size_t k = 1; k <= 40; k += 9) {
22375         GemmMicrokernelTester()
22376           .mr(1)
22377           .nr(4)
22378           .kr(2)
22379           .sr(1)
22380           .m(1)
22381           .n(n)
22382           .k(k)
22383           .ks(3)
22384           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22385       }
22386     }
22387   }
22388 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)22389   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
22390     TEST_REQUIRES_X86_XOP;
22391     for (size_t k = 1; k <= 40; k += 9) {
22392       for (uint32_t n = 1; n <= 4; n++) {
22393         for (uint32_t m = 1; m <= 1; m++) {
22394           GemmMicrokernelTester()
22395             .mr(1)
22396             .nr(4)
22397             .kr(2)
22398             .sr(1)
22399             .m(m)
22400             .n(n)
22401             .k(k)
22402             .cm_stride(7)
22403             .iterations(1)
22404             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22405         }
22406       }
22407     }
22408   }
22409 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,a_offset)22410   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
22411     TEST_REQUIRES_X86_XOP;
22412     for (size_t k = 1; k <= 40; k += 9) {
22413       GemmMicrokernelTester()
22414         .mr(1)
22415         .nr(4)
22416         .kr(2)
22417         .sr(1)
22418         .m(1)
22419         .n(4)
22420         .k(k)
22421         .ks(3)
22422         .a_offset(43)
22423         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22424     }
22425   }
22426 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,zero)22427   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
22428     TEST_REQUIRES_X86_XOP;
22429     for (size_t k = 1; k <= 40; k += 9) {
22430       for (uint32_t mz = 0; mz < 1; mz++) {
22431         GemmMicrokernelTester()
22432           .mr(1)
22433           .nr(4)
22434           .kr(2)
22435           .sr(1)
22436           .m(1)
22437           .n(4)
22438           .k(k)
22439           .ks(3)
22440           .a_offset(43)
22441           .zero_index(mz)
22442           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22443       }
22444     }
22445   }
22446 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)22447   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
22448     TEST_REQUIRES_X86_XOP;
22449     GemmMicrokernelTester()
22450       .mr(1)
22451       .nr(4)
22452       .kr(2)
22453       .sr(1)
22454       .m(1)
22455       .n(4)
22456       .k(8)
22457       .qmin(128)
22458       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22459   }
22460 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)22461   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
22462     TEST_REQUIRES_X86_XOP;
22463     GemmMicrokernelTester()
22464       .mr(1)
22465       .nr(4)
22466       .kr(2)
22467       .sr(1)
22468       .m(1)
22469       .n(4)
22470       .k(8)
22471       .qmax(128)
22472       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22473   }
22474 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)22475   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
22476     TEST_REQUIRES_X86_XOP;
22477     GemmMicrokernelTester()
22478       .mr(1)
22479       .nr(4)
22480       .kr(2)
22481       .sr(1)
22482       .m(1)
22483       .n(4)
22484       .k(8)
22485       .cm_stride(7)
22486       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22487   }
22488 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489 
22490 
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)22492   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
22493     TEST_REQUIRES_X86_XOP;
22494     GemmMicrokernelTester()
22495       .mr(2)
22496       .nr(4)
22497       .kr(2)
22498       .sr(1)
22499       .m(2)
22500       .n(4)
22501       .k(8)
22502       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22503   }
22504 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)22505   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
22506     TEST_REQUIRES_X86_XOP;
22507     GemmMicrokernelTester()
22508       .mr(2)
22509       .nr(4)
22510       .kr(2)
22511       .sr(1)
22512       .m(2)
22513       .n(4)
22514       .k(8)
22515       .cn_stride(7)
22516       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22517   }
22518 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)22519   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
22520     TEST_REQUIRES_X86_XOP;
22521     for (uint32_t n = 1; n <= 4; n++) {
22522       for (uint32_t m = 1; m <= 2; m++) {
22523         GemmMicrokernelTester()
22524           .mr(2)
22525           .nr(4)
22526           .kr(2)
22527           .sr(1)
22528           .m(m)
22529           .n(n)
22530           .k(8)
22531           .iterations(1)
22532           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22533       }
22534     }
22535   }
22536 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)22537   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
22538     TEST_REQUIRES_X86_XOP;
22539     for (uint32_t m = 1; m <= 2; m++) {
22540       GemmMicrokernelTester()
22541         .mr(2)
22542         .nr(4)
22543         .kr(2)
22544         .sr(1)
22545         .m(m)
22546         .n(4)
22547         .k(8)
22548         .iterations(1)
22549         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22550     }
22551   }
22552 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)22553   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
22554     TEST_REQUIRES_X86_XOP;
22555     for (uint32_t n = 1; n <= 4; n++) {
22556       GemmMicrokernelTester()
22557         .mr(2)
22558         .nr(4)
22559         .kr(2)
22560         .sr(1)
22561         .m(2)
22562         .n(n)
22563         .k(8)
22564         .iterations(1)
22565         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22566     }
22567   }
22568 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)22569   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
22570     TEST_REQUIRES_X86_XOP;
22571     for (size_t k = 1; k < 8; k++) {
22572       GemmMicrokernelTester()
22573         .mr(2)
22574         .nr(4)
22575         .kr(2)
22576         .sr(1)
22577         .m(2)
22578         .n(4)
22579         .k(k)
22580         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22581     }
22582   }
22583 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)22584   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
22585     TEST_REQUIRES_X86_XOP;
22586     for (size_t k = 1; k < 8; k++) {
22587       for (uint32_t n = 1; n <= 4; n++) {
22588         for (uint32_t m = 1; m <= 2; m++) {
22589           GemmMicrokernelTester()
22590             .mr(2)
22591             .nr(4)
22592             .kr(2)
22593             .sr(1)
22594             .m(m)
22595             .n(n)
22596             .k(k)
22597             .iterations(1)
22598             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22599         }
22600       }
22601     }
22602   }
22603 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)22604   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
22605     TEST_REQUIRES_X86_XOP;
22606     for (size_t k = 9; k < 16; k++) {
22607       GemmMicrokernelTester()
22608         .mr(2)
22609         .nr(4)
22610         .kr(2)
22611         .sr(1)
22612         .m(2)
22613         .n(4)
22614         .k(k)
22615         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22616     }
22617   }
22618 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)22619   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
22620     TEST_REQUIRES_X86_XOP;
22621     for (size_t k = 9; k < 16; k++) {
22622       for (uint32_t n = 1; n <= 4; n++) {
22623         for (uint32_t m = 1; m <= 2; m++) {
22624           GemmMicrokernelTester()
22625             .mr(2)
22626             .nr(4)
22627             .kr(2)
22628             .sr(1)
22629             .m(m)
22630             .n(n)
22631             .k(k)
22632             .iterations(1)
22633             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22634         }
22635       }
22636     }
22637   }
22638 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)22639   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
22640     TEST_REQUIRES_X86_XOP;
22641     for (size_t k = 16; k <= 80; k += 8) {
22642       GemmMicrokernelTester()
22643         .mr(2)
22644         .nr(4)
22645         .kr(2)
22646         .sr(1)
22647         .m(2)
22648         .n(4)
22649         .k(k)
22650         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22651     }
22652   }
22653 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)22654   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
22655     TEST_REQUIRES_X86_XOP;
22656     for (size_t k = 16; k <= 80; k += 8) {
22657       for (uint32_t n = 1; n <= 4; n++) {
22658         for (uint32_t m = 1; m <= 2; m++) {
22659           GemmMicrokernelTester()
22660             .mr(2)
22661             .nr(4)
22662             .kr(2)
22663             .sr(1)
22664             .m(m)
22665             .n(n)
22666             .k(k)
22667             .iterations(1)
22668             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22669         }
22670       }
22671     }
22672   }
22673 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)22674   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
22675     TEST_REQUIRES_X86_XOP;
22676     for (uint32_t n = 5; n < 8; n++) {
22677       for (size_t k = 1; k <= 40; k += 9) {
22678         GemmMicrokernelTester()
22679           .mr(2)
22680           .nr(4)
22681           .kr(2)
22682           .sr(1)
22683           .m(2)
22684           .n(n)
22685           .k(k)
22686           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22687       }
22688     }
22689   }
22690 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)22691   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
22692     TEST_REQUIRES_X86_XOP;
22693     for (uint32_t n = 5; n < 8; n++) {
22694       for (size_t k = 1; k <= 40; k += 9) {
22695         GemmMicrokernelTester()
22696           .mr(2)
22697           .nr(4)
22698           .kr(2)
22699           .sr(1)
22700           .m(2)
22701           .n(n)
22702           .k(k)
22703           .cn_stride(7)
22704           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22705       }
22706     }
22707   }
22708 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)22709   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
22710     TEST_REQUIRES_X86_XOP;
22711     for (uint32_t n = 5; n < 8; n++) {
22712       for (size_t k = 1; k <= 40; k += 9) {
22713         for (uint32_t m = 1; m <= 2; m++) {
22714           GemmMicrokernelTester()
22715             .mr(2)
22716             .nr(4)
22717             .kr(2)
22718             .sr(1)
22719             .m(m)
22720             .n(n)
22721             .k(k)
22722             .iterations(1)
22723             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22724         }
22725       }
22726     }
22727   }
22728 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)22729   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
22730     TEST_REQUIRES_X86_XOP;
22731     for (uint32_t n = 8; n <= 12; n += 4) {
22732       for (size_t k = 1; k <= 40; k += 9) {
22733         GemmMicrokernelTester()
22734           .mr(2)
22735           .nr(4)
22736           .kr(2)
22737           .sr(1)
22738           .m(2)
22739           .n(n)
22740           .k(k)
22741           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22742       }
22743     }
22744   }
22745 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)22746   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
22747     TEST_REQUIRES_X86_XOP;
22748     for (uint32_t n = 8; n <= 12; n += 4) {
22749       for (size_t k = 1; k <= 40; k += 9) {
22750         GemmMicrokernelTester()
22751           .mr(2)
22752           .nr(4)
22753           .kr(2)
22754           .sr(1)
22755           .m(2)
22756           .n(n)
22757           .k(k)
22758           .cn_stride(7)
22759           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22760       }
22761     }
22762   }
22763 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)22764   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
22765     TEST_REQUIRES_X86_XOP;
22766     for (uint32_t n = 8; n <= 12; n += 4) {
22767       for (size_t k = 1; k <= 40; k += 9) {
22768         for (uint32_t m = 1; m <= 2; m++) {
22769           GemmMicrokernelTester()
22770             .mr(2)
22771             .nr(4)
22772             .kr(2)
22773             .sr(1)
22774             .m(m)
22775             .n(n)
22776             .k(k)
22777             .iterations(1)
22778             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22779         }
22780       }
22781     }
22782   }
22783 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel)22784   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
22785     TEST_REQUIRES_X86_XOP;
22786     for (size_t k = 1; k <= 40; k += 9) {
22787       GemmMicrokernelTester()
22788         .mr(2)
22789         .nr(4)
22790         .kr(2)
22791         .sr(1)
22792         .m(2)
22793         .n(4)
22794         .k(k)
22795         .ks(3)
22796         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22797     }
22798   }
22799 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel_subtile)22800   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
22801     TEST_REQUIRES_X86_XOP;
22802     for (size_t k = 1; k <= 40; k += 9) {
22803       for (uint32_t n = 1; n <= 4; n++) {
22804         for (uint32_t m = 1; m <= 2; m++) {
22805           GemmMicrokernelTester()
22806             .mr(2)
22807             .nr(4)
22808             .kr(2)
22809             .sr(1)
22810             .m(m)
22811             .n(n)
22812             .k(k)
22813             .ks(3)
22814             .iterations(1)
22815             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22816         }
22817       }
22818     }
22819   }
22820 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_small_kernel)22821   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
22822     TEST_REQUIRES_X86_XOP;
22823     for (uint32_t n = 5; n < 8; n++) {
22824       for (size_t k = 1; k <= 40; k += 9) {
22825         GemmMicrokernelTester()
22826           .mr(2)
22827           .nr(4)
22828           .kr(2)
22829           .sr(1)
22830           .m(2)
22831           .n(n)
22832           .k(k)
22833           .ks(3)
22834           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22835       }
22836     }
22837   }
22838 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_small_kernel)22839   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
22840     TEST_REQUIRES_X86_XOP;
22841     for (uint32_t n = 8; n <= 12; n += 4) {
22842       for (size_t k = 1; k <= 40; k += 9) {
22843         GemmMicrokernelTester()
22844           .mr(2)
22845           .nr(4)
22846           .kr(2)
22847           .sr(1)
22848           .m(2)
22849           .n(n)
22850           .k(k)
22851           .ks(3)
22852           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22853       }
22854     }
22855   }
22856 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)22857   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
22858     TEST_REQUIRES_X86_XOP;
22859     for (size_t k = 1; k <= 40; k += 9) {
22860       for (uint32_t n = 1; n <= 4; n++) {
22861         for (uint32_t m = 1; m <= 2; m++) {
22862           GemmMicrokernelTester()
22863             .mr(2)
22864             .nr(4)
22865             .kr(2)
22866             .sr(1)
22867             .m(m)
22868             .n(n)
22869             .k(k)
22870             .cm_stride(7)
22871             .iterations(1)
22872             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22873         }
22874       }
22875     }
22876   }
22877 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,a_offset)22878   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
22879     TEST_REQUIRES_X86_XOP;
22880     for (size_t k = 1; k <= 40; k += 9) {
22881       GemmMicrokernelTester()
22882         .mr(2)
22883         .nr(4)
22884         .kr(2)
22885         .sr(1)
22886         .m(2)
22887         .n(4)
22888         .k(k)
22889         .ks(3)
22890         .a_offset(83)
22891         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22892     }
22893   }
22894 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,zero)22895   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
22896     TEST_REQUIRES_X86_XOP;
22897     for (size_t k = 1; k <= 40; k += 9) {
22898       for (uint32_t mz = 0; mz < 2; mz++) {
22899         GemmMicrokernelTester()
22900           .mr(2)
22901           .nr(4)
22902           .kr(2)
22903           .sr(1)
22904           .m(2)
22905           .n(4)
22906           .k(k)
22907           .ks(3)
22908           .a_offset(83)
22909           .zero_index(mz)
22910           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22911       }
22912     }
22913   }
22914 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)22915   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
22916     TEST_REQUIRES_X86_XOP;
22917     GemmMicrokernelTester()
22918       .mr(2)
22919       .nr(4)
22920       .kr(2)
22921       .sr(1)
22922       .m(2)
22923       .n(4)
22924       .k(8)
22925       .qmin(128)
22926       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22927   }
22928 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)22929   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
22930     TEST_REQUIRES_X86_XOP;
22931     GemmMicrokernelTester()
22932       .mr(2)
22933       .nr(4)
22934       .kr(2)
22935       .sr(1)
22936       .m(2)
22937       .n(4)
22938       .k(8)
22939       .qmax(128)
22940       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22941   }
22942 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)22943   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
22944     TEST_REQUIRES_X86_XOP;
22945     GemmMicrokernelTester()
22946       .mr(2)
22947       .nr(4)
22948       .kr(2)
22949       .sr(1)
22950       .m(2)
22951       .n(4)
22952       .k(8)
22953       .cm_stride(7)
22954       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22955   }
22956 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957 
22958 
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)22960   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
22961     TEST_REQUIRES_X86_AVX;
22962     GemmMicrokernelTester()
22963       .mr(3)
22964       .nr(4)
22965       .kr(2)
22966       .sr(1)
22967       .m(3)
22968       .n(4)
22969       .k(8)
22970       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971   }
22972 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)22973   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
22974     TEST_REQUIRES_X86_AVX;
22975     GemmMicrokernelTester()
22976       .mr(3)
22977       .nr(4)
22978       .kr(2)
22979       .sr(1)
22980       .m(3)
22981       .n(4)
22982       .k(8)
22983       .cn_stride(7)
22984       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985   }
22986 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)22987   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
22988     TEST_REQUIRES_X86_AVX;
22989     for (uint32_t n = 1; n <= 4; n++) {
22990       for (uint32_t m = 1; m <= 3; m++) {
22991         GemmMicrokernelTester()
22992           .mr(3)
22993           .nr(4)
22994           .kr(2)
22995           .sr(1)
22996           .m(m)
22997           .n(n)
22998           .k(8)
22999           .iterations(1)
23000           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001       }
23002     }
23003   }
23004 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)23005   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
23006     TEST_REQUIRES_X86_AVX;
23007     for (uint32_t m = 1; m <= 3; m++) {
23008       GemmMicrokernelTester()
23009         .mr(3)
23010         .nr(4)
23011         .kr(2)
23012         .sr(1)
23013         .m(m)
23014         .n(4)
23015         .k(8)
23016         .iterations(1)
23017         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018     }
23019   }
23020 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)23021   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
23022     TEST_REQUIRES_X86_AVX;
23023     for (uint32_t n = 1; n <= 4; n++) {
23024       GemmMicrokernelTester()
23025         .mr(3)
23026         .nr(4)
23027         .kr(2)
23028         .sr(1)
23029         .m(3)
23030         .n(n)
23031         .k(8)
23032         .iterations(1)
23033         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034     }
23035   }
23036 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)23037   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
23038     TEST_REQUIRES_X86_AVX;
23039     for (size_t k = 1; k < 8; k++) {
23040       GemmMicrokernelTester()
23041         .mr(3)
23042         .nr(4)
23043         .kr(2)
23044         .sr(1)
23045         .m(3)
23046         .n(4)
23047         .k(k)
23048         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049     }
23050   }
23051 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)23052   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
23053     TEST_REQUIRES_X86_AVX;
23054     for (size_t k = 1; k < 8; k++) {
23055       for (uint32_t n = 1; n <= 4; n++) {
23056         for (uint32_t m = 1; m <= 3; m++) {
23057           GemmMicrokernelTester()
23058             .mr(3)
23059             .nr(4)
23060             .kr(2)
23061             .sr(1)
23062             .m(m)
23063             .n(n)
23064             .k(k)
23065             .iterations(1)
23066             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067         }
23068       }
23069     }
23070   }
23071 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)23072   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
23073     TEST_REQUIRES_X86_AVX;
23074     for (size_t k = 9; k < 16; k++) {
23075       GemmMicrokernelTester()
23076         .mr(3)
23077         .nr(4)
23078         .kr(2)
23079         .sr(1)
23080         .m(3)
23081         .n(4)
23082         .k(k)
23083         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084     }
23085   }
23086 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)23087   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
23088     TEST_REQUIRES_X86_AVX;
23089     for (size_t k = 9; k < 16; k++) {
23090       for (uint32_t n = 1; n <= 4; n++) {
23091         for (uint32_t m = 1; m <= 3; m++) {
23092           GemmMicrokernelTester()
23093             .mr(3)
23094             .nr(4)
23095             .kr(2)
23096             .sr(1)
23097             .m(m)
23098             .n(n)
23099             .k(k)
23100             .iterations(1)
23101             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102         }
23103       }
23104     }
23105   }
23106 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)23107   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
23108     TEST_REQUIRES_X86_AVX;
23109     for (size_t k = 16; k <= 80; k += 8) {
23110       GemmMicrokernelTester()
23111         .mr(3)
23112         .nr(4)
23113         .kr(2)
23114         .sr(1)
23115         .m(3)
23116         .n(4)
23117         .k(k)
23118         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119     }
23120   }
23121 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)23122   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
23123     TEST_REQUIRES_X86_AVX;
23124     for (size_t k = 16; k <= 80; k += 8) {
23125       for (uint32_t n = 1; n <= 4; n++) {
23126         for (uint32_t m = 1; m <= 3; m++) {
23127           GemmMicrokernelTester()
23128             .mr(3)
23129             .nr(4)
23130             .kr(2)
23131             .sr(1)
23132             .m(m)
23133             .n(n)
23134             .k(k)
23135             .iterations(1)
23136             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137         }
23138       }
23139     }
23140   }
23141 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)23142   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
23143     TEST_REQUIRES_X86_AVX;
23144     for (uint32_t n = 5; n < 8; n++) {
23145       for (size_t k = 1; k <= 40; k += 9) {
23146         GemmMicrokernelTester()
23147           .mr(3)
23148           .nr(4)
23149           .kr(2)
23150           .sr(1)
23151           .m(3)
23152           .n(n)
23153           .k(k)
23154           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155       }
23156     }
23157   }
23158 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)23159   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
23160     TEST_REQUIRES_X86_AVX;
23161     for (uint32_t n = 5; n < 8; n++) {
23162       for (size_t k = 1; k <= 40; k += 9) {
23163         GemmMicrokernelTester()
23164           .mr(3)
23165           .nr(4)
23166           .kr(2)
23167           .sr(1)
23168           .m(3)
23169           .n(n)
23170           .k(k)
23171           .cn_stride(7)
23172           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173       }
23174     }
23175   }
23176 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)23177   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
23178     TEST_REQUIRES_X86_AVX;
23179     for (uint32_t n = 5; n < 8; n++) {
23180       for (size_t k = 1; k <= 40; k += 9) {
23181         for (uint32_t m = 1; m <= 3; m++) {
23182           GemmMicrokernelTester()
23183             .mr(3)
23184             .nr(4)
23185             .kr(2)
23186             .sr(1)
23187             .m(m)
23188             .n(n)
23189             .k(k)
23190             .iterations(1)
23191             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192         }
23193       }
23194     }
23195   }
23196 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)23197   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
23198     TEST_REQUIRES_X86_AVX;
23199     for (uint32_t n = 8; n <= 12; n += 4) {
23200       for (size_t k = 1; k <= 40; k += 9) {
23201         GemmMicrokernelTester()
23202           .mr(3)
23203           .nr(4)
23204           .kr(2)
23205           .sr(1)
23206           .m(3)
23207           .n(n)
23208           .k(k)
23209           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210       }
23211     }
23212   }
23213 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)23214   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
23215     TEST_REQUIRES_X86_AVX;
23216     for (uint32_t n = 8; n <= 12; n += 4) {
23217       for (size_t k = 1; k <= 40; k += 9) {
23218         GemmMicrokernelTester()
23219           .mr(3)
23220           .nr(4)
23221           .kr(2)
23222           .sr(1)
23223           .m(3)
23224           .n(n)
23225           .k(k)
23226           .cn_stride(7)
23227           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228       }
23229     }
23230   }
23231 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)23232   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
23233     TEST_REQUIRES_X86_AVX;
23234     for (uint32_t n = 8; n <= 12; n += 4) {
23235       for (size_t k = 1; k <= 40; k += 9) {
23236         for (uint32_t m = 1; m <= 3; m++) {
23237           GemmMicrokernelTester()
23238             .mr(3)
23239             .nr(4)
23240             .kr(2)
23241             .sr(1)
23242             .m(m)
23243             .n(n)
23244             .k(k)
23245             .iterations(1)
23246             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247         }
23248       }
23249     }
23250   }
23251 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel)23252   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
23253     TEST_REQUIRES_X86_AVX;
23254     for (size_t k = 1; k <= 40; k += 9) {
23255       GemmMicrokernelTester()
23256         .mr(3)
23257         .nr(4)
23258         .kr(2)
23259         .sr(1)
23260         .m(3)
23261         .n(4)
23262         .k(k)
23263         .ks(3)
23264         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265     }
23266   }
23267 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel_subtile)23268   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
23269     TEST_REQUIRES_X86_AVX;
23270     for (size_t k = 1; k <= 40; k += 9) {
23271       for (uint32_t n = 1; n <= 4; n++) {
23272         for (uint32_t m = 1; m <= 3; m++) {
23273           GemmMicrokernelTester()
23274             .mr(3)
23275             .nr(4)
23276             .kr(2)
23277             .sr(1)
23278             .m(m)
23279             .n(n)
23280             .k(k)
23281             .ks(3)
23282             .iterations(1)
23283             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284         }
23285       }
23286     }
23287   }
23288 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_small_kernel)23289   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
23290     TEST_REQUIRES_X86_AVX;
23291     for (uint32_t n = 5; n < 8; n++) {
23292       for (size_t k = 1; k <= 40; k += 9) {
23293         GemmMicrokernelTester()
23294           .mr(3)
23295           .nr(4)
23296           .kr(2)
23297           .sr(1)
23298           .m(3)
23299           .n(n)
23300           .k(k)
23301           .ks(3)
23302           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303       }
23304     }
23305   }
23306 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_small_kernel)23307   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
23308     TEST_REQUIRES_X86_AVX;
23309     for (uint32_t n = 8; n <= 12; n += 4) {
23310       for (size_t k = 1; k <= 40; k += 9) {
23311         GemmMicrokernelTester()
23312           .mr(3)
23313           .nr(4)
23314           .kr(2)
23315           .sr(1)
23316           .m(3)
23317           .n(n)
23318           .k(k)
23319           .ks(3)
23320           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321       }
23322     }
23323   }
23324 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)23325   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
23326     TEST_REQUIRES_X86_AVX;
23327     for (size_t k = 1; k <= 40; k += 9) {
23328       for (uint32_t n = 1; n <= 4; n++) {
23329         for (uint32_t m = 1; m <= 3; m++) {
23330           GemmMicrokernelTester()
23331             .mr(3)
23332             .nr(4)
23333             .kr(2)
23334             .sr(1)
23335             .m(m)
23336             .n(n)
23337             .k(k)
23338             .cm_stride(7)
23339             .iterations(1)
23340             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341         }
23342       }
23343     }
23344   }
23345 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,a_offset)23346   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
23347     TEST_REQUIRES_X86_AVX;
23348     for (size_t k = 1; k <= 40; k += 9) {
23349       GemmMicrokernelTester()
23350         .mr(3)
23351         .nr(4)
23352         .kr(2)
23353         .sr(1)
23354         .m(3)
23355         .n(4)
23356         .k(k)
23357         .ks(3)
23358         .a_offset(127)
23359         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360     }
23361   }
23362 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,zero)23363   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
23364     TEST_REQUIRES_X86_AVX;
23365     for (size_t k = 1; k <= 40; k += 9) {
23366       for (uint32_t mz = 0; mz < 3; mz++) {
23367         GemmMicrokernelTester()
23368           .mr(3)
23369           .nr(4)
23370           .kr(2)
23371           .sr(1)
23372           .m(3)
23373           .n(4)
23374           .k(k)
23375           .ks(3)
23376           .a_offset(127)
23377           .zero_index(mz)
23378           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379       }
23380     }
23381   }
23382 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)23383   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
23384     TEST_REQUIRES_X86_AVX;
23385     GemmMicrokernelTester()
23386       .mr(3)
23387       .nr(4)
23388       .kr(2)
23389       .sr(1)
23390       .m(3)
23391       .n(4)
23392       .k(8)
23393       .qmin(128)
23394       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395   }
23396 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)23397   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
23398     TEST_REQUIRES_X86_AVX;
23399     GemmMicrokernelTester()
23400       .mr(3)
23401       .nr(4)
23402       .kr(2)
23403       .sr(1)
23404       .m(3)
23405       .n(4)
23406       .k(8)
23407       .qmax(128)
23408       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409   }
23410 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)23411   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
23412     TEST_REQUIRES_X86_AVX;
23413     GemmMicrokernelTester()
23414       .mr(3)
23415       .nr(4)
23416       .kr(2)
23417       .sr(1)
23418       .m(3)
23419       .n(4)
23420       .k(8)
23421       .cm_stride(7)
23422       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423   }
23424 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425 
23426 
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)23428   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
23429     TEST_REQUIRES_X86_XOP;
23430     GemmMicrokernelTester()
23431       .mr(4)
23432       .nr(4)
23433       .kr(2)
23434       .sr(1)
23435       .m(4)
23436       .n(4)
23437       .k(8)
23438       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439   }
23440 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)23441   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
23442     TEST_REQUIRES_X86_XOP;
23443     GemmMicrokernelTester()
23444       .mr(4)
23445       .nr(4)
23446       .kr(2)
23447       .sr(1)
23448       .m(4)
23449       .n(4)
23450       .k(8)
23451       .cn_stride(7)
23452       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453   }
23454 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)23455   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
23456     TEST_REQUIRES_X86_XOP;
23457     for (uint32_t n = 1; n <= 4; n++) {
23458       for (uint32_t m = 1; m <= 4; m++) {
23459         GemmMicrokernelTester()
23460           .mr(4)
23461           .nr(4)
23462           .kr(2)
23463           .sr(1)
23464           .m(m)
23465           .n(n)
23466           .k(8)
23467           .iterations(1)
23468           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469       }
23470     }
23471   }
23472 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)23473   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
23474     TEST_REQUIRES_X86_XOP;
23475     for (uint32_t m = 1; m <= 4; m++) {
23476       GemmMicrokernelTester()
23477         .mr(4)
23478         .nr(4)
23479         .kr(2)
23480         .sr(1)
23481         .m(m)
23482         .n(4)
23483         .k(8)
23484         .iterations(1)
23485         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486     }
23487   }
23488 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)23489   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
23490     TEST_REQUIRES_X86_XOP;
23491     for (uint32_t n = 1; n <= 4; n++) {
23492       GemmMicrokernelTester()
23493         .mr(4)
23494         .nr(4)
23495         .kr(2)
23496         .sr(1)
23497         .m(4)
23498         .n(n)
23499         .k(8)
23500         .iterations(1)
23501         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502     }
23503   }
23504 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)23505   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
23506     TEST_REQUIRES_X86_XOP;
23507     for (size_t k = 1; k < 8; k++) {
23508       GemmMicrokernelTester()
23509         .mr(4)
23510         .nr(4)
23511         .kr(2)
23512         .sr(1)
23513         .m(4)
23514         .n(4)
23515         .k(k)
23516         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517     }
23518   }
23519 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)23520   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
23521     TEST_REQUIRES_X86_XOP;
23522     for (size_t k = 1; k < 8; k++) {
23523       for (uint32_t n = 1; n <= 4; n++) {
23524         for (uint32_t m = 1; m <= 4; m++) {
23525           GemmMicrokernelTester()
23526             .mr(4)
23527             .nr(4)
23528             .kr(2)
23529             .sr(1)
23530             .m(m)
23531             .n(n)
23532             .k(k)
23533             .iterations(1)
23534             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535         }
23536       }
23537     }
23538   }
23539 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)23540   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
23541     TEST_REQUIRES_X86_XOP;
23542     for (size_t k = 9; k < 16; k++) {
23543       GemmMicrokernelTester()
23544         .mr(4)
23545         .nr(4)
23546         .kr(2)
23547         .sr(1)
23548         .m(4)
23549         .n(4)
23550         .k(k)
23551         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552     }
23553   }
23554 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)23555   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
23556     TEST_REQUIRES_X86_XOP;
23557     for (size_t k = 9; k < 16; k++) {
23558       for (uint32_t n = 1; n <= 4; n++) {
23559         for (uint32_t m = 1; m <= 4; m++) {
23560           GemmMicrokernelTester()
23561             .mr(4)
23562             .nr(4)
23563             .kr(2)
23564             .sr(1)
23565             .m(m)
23566             .n(n)
23567             .k(k)
23568             .iterations(1)
23569             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570         }
23571       }
23572     }
23573   }
23574 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)23575   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
23576     TEST_REQUIRES_X86_XOP;
23577     for (size_t k = 16; k <= 80; k += 8) {
23578       GemmMicrokernelTester()
23579         .mr(4)
23580         .nr(4)
23581         .kr(2)
23582         .sr(1)
23583         .m(4)
23584         .n(4)
23585         .k(k)
23586         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587     }
23588   }
23589 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)23590   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
23591     TEST_REQUIRES_X86_XOP;
23592     for (size_t k = 16; k <= 80; k += 8) {
23593       for (uint32_t n = 1; n <= 4; n++) {
23594         for (uint32_t m = 1; m <= 4; m++) {
23595           GemmMicrokernelTester()
23596             .mr(4)
23597             .nr(4)
23598             .kr(2)
23599             .sr(1)
23600             .m(m)
23601             .n(n)
23602             .k(k)
23603             .iterations(1)
23604             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605         }
23606       }
23607     }
23608   }
23609 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)23610   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
23611     TEST_REQUIRES_X86_XOP;
23612     for (uint32_t n = 5; n < 8; n++) {
23613       for (size_t k = 1; k <= 40; k += 9) {
23614         GemmMicrokernelTester()
23615           .mr(4)
23616           .nr(4)
23617           .kr(2)
23618           .sr(1)
23619           .m(4)
23620           .n(n)
23621           .k(k)
23622           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623       }
23624     }
23625   }
23626 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)23627   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
23628     TEST_REQUIRES_X86_XOP;
23629     for (uint32_t n = 5; n < 8; n++) {
23630       for (size_t k = 1; k <= 40; k += 9) {
23631         GemmMicrokernelTester()
23632           .mr(4)
23633           .nr(4)
23634           .kr(2)
23635           .sr(1)
23636           .m(4)
23637           .n(n)
23638           .k(k)
23639           .cn_stride(7)
23640           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641       }
23642     }
23643   }
23644 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)23645   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
23646     TEST_REQUIRES_X86_XOP;
23647     for (uint32_t n = 5; n < 8; n++) {
23648       for (size_t k = 1; k <= 40; k += 9) {
23649         for (uint32_t m = 1; m <= 4; m++) {
23650           GemmMicrokernelTester()
23651             .mr(4)
23652             .nr(4)
23653             .kr(2)
23654             .sr(1)
23655             .m(m)
23656             .n(n)
23657             .k(k)
23658             .iterations(1)
23659             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660         }
23661       }
23662     }
23663   }
23664 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)23665   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
23666     TEST_REQUIRES_X86_XOP;
23667     for (uint32_t n = 8; n <= 12; n += 4) {
23668       for (size_t k = 1; k <= 40; k += 9) {
23669         GemmMicrokernelTester()
23670           .mr(4)
23671           .nr(4)
23672           .kr(2)
23673           .sr(1)
23674           .m(4)
23675           .n(n)
23676           .k(k)
23677           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678       }
23679     }
23680   }
23681 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)23682   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
23683     TEST_REQUIRES_X86_XOP;
23684     for (uint32_t n = 8; n <= 12; n += 4) {
23685       for (size_t k = 1; k <= 40; k += 9) {
23686         GemmMicrokernelTester()
23687           .mr(4)
23688           .nr(4)
23689           .kr(2)
23690           .sr(1)
23691           .m(4)
23692           .n(n)
23693           .k(k)
23694           .cn_stride(7)
23695           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696       }
23697     }
23698   }
23699 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)23700   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
23701     TEST_REQUIRES_X86_XOP;
23702     for (uint32_t n = 8; n <= 12; n += 4) {
23703       for (size_t k = 1; k <= 40; k += 9) {
23704         for (uint32_t m = 1; m <= 4; m++) {
23705           GemmMicrokernelTester()
23706             .mr(4)
23707             .nr(4)
23708             .kr(2)
23709             .sr(1)
23710             .m(m)
23711             .n(n)
23712             .k(k)
23713             .iterations(1)
23714             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715         }
23716       }
23717     }
23718   }
23719 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)23720   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
23721     TEST_REQUIRES_X86_XOP;
23722     for (size_t k = 1; k <= 40; k += 9) {
23723       GemmMicrokernelTester()
23724         .mr(4)
23725         .nr(4)
23726         .kr(2)
23727         .sr(1)
23728         .m(4)
23729         .n(4)
23730         .k(k)
23731         .ks(3)
23732         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733     }
23734   }
23735 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)23736   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
23737     TEST_REQUIRES_X86_XOP;
23738     for (size_t k = 1; k <= 40; k += 9) {
23739       for (uint32_t n = 1; n <= 4; n++) {
23740         for (uint32_t m = 1; m <= 4; m++) {
23741           GemmMicrokernelTester()
23742             .mr(4)
23743             .nr(4)
23744             .kr(2)
23745             .sr(1)
23746             .m(m)
23747             .n(n)
23748             .k(k)
23749             .ks(3)
23750             .iterations(1)
23751             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752         }
23753       }
23754     }
23755   }
23756 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)23757   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
23758     TEST_REQUIRES_X86_XOP;
23759     for (uint32_t n = 5; n < 8; n++) {
23760       for (size_t k = 1; k <= 40; k += 9) {
23761         GemmMicrokernelTester()
23762           .mr(4)
23763           .nr(4)
23764           .kr(2)
23765           .sr(1)
23766           .m(4)
23767           .n(n)
23768           .k(k)
23769           .ks(3)
23770           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771       }
23772     }
23773   }
23774 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)23775   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
23776     TEST_REQUIRES_X86_XOP;
23777     for (uint32_t n = 8; n <= 12; n += 4) {
23778       for (size_t k = 1; k <= 40; k += 9) {
23779         GemmMicrokernelTester()
23780           .mr(4)
23781           .nr(4)
23782           .kr(2)
23783           .sr(1)
23784           .m(4)
23785           .n(n)
23786           .k(k)
23787           .ks(3)
23788           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789       }
23790     }
23791   }
23792 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)23793   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
23794     TEST_REQUIRES_X86_XOP;
23795     for (size_t k = 1; k <= 40; k += 9) {
23796       for (uint32_t n = 1; n <= 4; n++) {
23797         for (uint32_t m = 1; m <= 4; m++) {
23798           GemmMicrokernelTester()
23799             .mr(4)
23800             .nr(4)
23801             .kr(2)
23802             .sr(1)
23803             .m(m)
23804             .n(n)
23805             .k(k)
23806             .cm_stride(7)
23807             .iterations(1)
23808             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809         }
23810       }
23811     }
23812   }
23813 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)23814   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
23815     TEST_REQUIRES_X86_XOP;
23816     for (size_t k = 1; k <= 40; k += 9) {
23817       GemmMicrokernelTester()
23818         .mr(4)
23819         .nr(4)
23820         .kr(2)
23821         .sr(1)
23822         .m(4)
23823         .n(4)
23824         .k(k)
23825         .ks(3)
23826         .a_offset(163)
23827         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828     }
23829   }
23830 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)23831   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
23832     TEST_REQUIRES_X86_XOP;
23833     for (size_t k = 1; k <= 40; k += 9) {
23834       for (uint32_t mz = 0; mz < 4; mz++) {
23835         GemmMicrokernelTester()
23836           .mr(4)
23837           .nr(4)
23838           .kr(2)
23839           .sr(1)
23840           .m(4)
23841           .n(4)
23842           .k(k)
23843           .ks(3)
23844           .a_offset(163)
23845           .zero_index(mz)
23846           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847       }
23848     }
23849   }
23850 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)23851   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
23852     TEST_REQUIRES_X86_XOP;
23853     GemmMicrokernelTester()
23854       .mr(4)
23855       .nr(4)
23856       .kr(2)
23857       .sr(1)
23858       .m(4)
23859       .n(4)
23860       .k(8)
23861       .qmin(128)
23862       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863   }
23864 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)23865   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
23866     TEST_REQUIRES_X86_XOP;
23867     GemmMicrokernelTester()
23868       .mr(4)
23869       .nr(4)
23870       .kr(2)
23871       .sr(1)
23872       .m(4)
23873       .n(4)
23874       .k(8)
23875       .qmax(128)
23876       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877   }
23878 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)23879   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
23880     TEST_REQUIRES_X86_XOP;
23881     GemmMicrokernelTester()
23882       .mr(4)
23883       .nr(4)
23884       .kr(2)
23885       .sr(1)
23886       .m(4)
23887       .n(4)
23888       .k(8)
23889       .cm_stride(7)
23890       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891   }
23892 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893 
23894 
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)23896   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
23897     TEST_REQUIRES_X86_SSE2;
23898     GemmMicrokernelTester()
23899       .mr(1)
23900       .nr(4)
23901       .kr(2)
23902       .sr(4)
23903       .m(1)
23904       .n(4)
23905       .k(8)
23906       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23907   }
23908 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)23909   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
23910     TEST_REQUIRES_X86_SSE2;
23911     GemmMicrokernelTester()
23912       .mr(1)
23913       .nr(4)
23914       .kr(2)
23915       .sr(4)
23916       .m(1)
23917       .n(4)
23918       .k(8)
23919       .cn_stride(7)
23920       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23921   }
23922 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)23923   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
23924     TEST_REQUIRES_X86_SSE2;
23925     for (uint32_t n = 1; n <= 4; n++) {
23926       for (uint32_t m = 1; m <= 1; m++) {
23927         GemmMicrokernelTester()
23928           .mr(1)
23929           .nr(4)
23930           .kr(2)
23931           .sr(4)
23932           .m(m)
23933           .n(n)
23934           .k(8)
23935           .iterations(1)
23936           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23937       }
23938     }
23939   }
23940 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)23941   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
23942     TEST_REQUIRES_X86_SSE2;
23943     for (uint32_t m = 1; m <= 1; m++) {
23944       GemmMicrokernelTester()
23945         .mr(1)
23946         .nr(4)
23947         .kr(2)
23948         .sr(4)
23949         .m(m)
23950         .n(4)
23951         .k(8)
23952         .iterations(1)
23953         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23954     }
23955   }
23956 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)23957   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
23958     TEST_REQUIRES_X86_SSE2;
23959     for (uint32_t n = 1; n <= 4; n++) {
23960       GemmMicrokernelTester()
23961         .mr(1)
23962         .nr(4)
23963         .kr(2)
23964         .sr(4)
23965         .m(1)
23966         .n(n)
23967         .k(8)
23968         .iterations(1)
23969         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23970     }
23971   }
23972 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)23973   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
23974     TEST_REQUIRES_X86_SSE2;
23975     for (size_t k = 1; k < 8; k++) {
23976       GemmMicrokernelTester()
23977         .mr(1)
23978         .nr(4)
23979         .kr(2)
23980         .sr(4)
23981         .m(1)
23982         .n(4)
23983         .k(k)
23984         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23985     }
23986   }
23987 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)23988   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
23989     TEST_REQUIRES_X86_SSE2;
23990     for (size_t k = 1; k < 8; k++) {
23991       for (uint32_t n = 1; n <= 4; n++) {
23992         for (uint32_t m = 1; m <= 1; m++) {
23993           GemmMicrokernelTester()
23994             .mr(1)
23995             .nr(4)
23996             .kr(2)
23997             .sr(4)
23998             .m(m)
23999             .n(n)
24000             .k(k)
24001             .iterations(1)
24002             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24003         }
24004       }
24005     }
24006   }
24007 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)24008   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
24009     TEST_REQUIRES_X86_SSE2;
24010     for (size_t k = 9; k < 16; k++) {
24011       GemmMicrokernelTester()
24012         .mr(1)
24013         .nr(4)
24014         .kr(2)
24015         .sr(4)
24016         .m(1)
24017         .n(4)
24018         .k(k)
24019         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24020     }
24021   }
24022 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)24023   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24024     TEST_REQUIRES_X86_SSE2;
24025     for (size_t k = 9; k < 16; k++) {
24026       for (uint32_t n = 1; n <= 4; n++) {
24027         for (uint32_t m = 1; m <= 1; m++) {
24028           GemmMicrokernelTester()
24029             .mr(1)
24030             .nr(4)
24031             .kr(2)
24032             .sr(4)
24033             .m(m)
24034             .n(n)
24035             .k(k)
24036             .iterations(1)
24037             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24038         }
24039       }
24040     }
24041   }
24042 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)24043   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
24044     TEST_REQUIRES_X86_SSE2;
24045     for (size_t k = 16; k <= 80; k += 8) {
24046       GemmMicrokernelTester()
24047         .mr(1)
24048         .nr(4)
24049         .kr(2)
24050         .sr(4)
24051         .m(1)
24052         .n(4)
24053         .k(k)
24054         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24055     }
24056   }
24057 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)24058   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
24059     TEST_REQUIRES_X86_SSE2;
24060     for (size_t k = 16; k <= 80; k += 8) {
24061       for (uint32_t n = 1; n <= 4; n++) {
24062         for (uint32_t m = 1; m <= 1; m++) {
24063           GemmMicrokernelTester()
24064             .mr(1)
24065             .nr(4)
24066             .kr(2)
24067             .sr(4)
24068             .m(m)
24069             .n(n)
24070             .k(k)
24071             .iterations(1)
24072             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24073         }
24074       }
24075     }
24076   }
24077 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)24078   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
24079     TEST_REQUIRES_X86_SSE2;
24080     for (uint32_t n = 5; n < 8; n++) {
24081       for (size_t k = 1; k <= 40; k += 9) {
24082         GemmMicrokernelTester()
24083           .mr(1)
24084           .nr(4)
24085           .kr(2)
24086           .sr(4)
24087           .m(1)
24088           .n(n)
24089           .k(k)
24090           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24091       }
24092     }
24093   }
24094 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)24095   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
24096     TEST_REQUIRES_X86_SSE2;
24097     for (uint32_t n = 5; n < 8; n++) {
24098       for (size_t k = 1; k <= 40; k += 9) {
24099         GemmMicrokernelTester()
24100           .mr(1)
24101           .nr(4)
24102           .kr(2)
24103           .sr(4)
24104           .m(1)
24105           .n(n)
24106           .k(k)
24107           .cn_stride(7)
24108           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24109       }
24110     }
24111   }
24112 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)24113   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
24114     TEST_REQUIRES_X86_SSE2;
24115     for (uint32_t n = 5; n < 8; n++) {
24116       for (size_t k = 1; k <= 40; k += 9) {
24117         for (uint32_t m = 1; m <= 1; m++) {
24118           GemmMicrokernelTester()
24119             .mr(1)
24120             .nr(4)
24121             .kr(2)
24122             .sr(4)
24123             .m(m)
24124             .n(n)
24125             .k(k)
24126             .iterations(1)
24127             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24128         }
24129       }
24130     }
24131   }
24132 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)24133   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
24134     TEST_REQUIRES_X86_SSE2;
24135     for (uint32_t n = 8; n <= 12; n += 4) {
24136       for (size_t k = 1; k <= 40; k += 9) {
24137         GemmMicrokernelTester()
24138           .mr(1)
24139           .nr(4)
24140           .kr(2)
24141           .sr(4)
24142           .m(1)
24143           .n(n)
24144           .k(k)
24145           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24146       }
24147     }
24148   }
24149 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)24150   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
24151     TEST_REQUIRES_X86_SSE2;
24152     for (uint32_t n = 8; n <= 12; n += 4) {
24153       for (size_t k = 1; k <= 40; k += 9) {
24154         GemmMicrokernelTester()
24155           .mr(1)
24156           .nr(4)
24157           .kr(2)
24158           .sr(4)
24159           .m(1)
24160           .n(n)
24161           .k(k)
24162           .cn_stride(7)
24163           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24164       }
24165     }
24166   }
24167 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)24168   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
24169     TEST_REQUIRES_X86_SSE2;
24170     for (uint32_t n = 8; n <= 12; n += 4) {
24171       for (size_t k = 1; k <= 40; k += 9) {
24172         for (uint32_t m = 1; m <= 1; m++) {
24173           GemmMicrokernelTester()
24174             .mr(1)
24175             .nr(4)
24176             .kr(2)
24177             .sr(4)
24178             .m(m)
24179             .n(n)
24180             .k(k)
24181             .iterations(1)
24182             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24183         }
24184       }
24185     }
24186   }
24187 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel)24188   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel) {
24189     TEST_REQUIRES_X86_SSE2;
24190     for (size_t k = 1; k <= 40; k += 9) {
24191       GemmMicrokernelTester()
24192         .mr(1)
24193         .nr(4)
24194         .kr(2)
24195         .sr(4)
24196         .m(1)
24197         .n(4)
24198         .k(k)
24199         .ks(3)
24200         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24201     }
24202   }
24203 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel_subtile)24204   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel_subtile) {
24205     TEST_REQUIRES_X86_SSE2;
24206     for (size_t k = 1; k <= 40; k += 9) {
24207       for (uint32_t n = 1; n <= 4; n++) {
24208         for (uint32_t m = 1; m <= 1; m++) {
24209           GemmMicrokernelTester()
24210             .mr(1)
24211             .nr(4)
24212             .kr(2)
24213             .sr(4)
24214             .m(m)
24215             .n(n)
24216             .k(k)
24217             .ks(3)
24218             .iterations(1)
24219             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24220         }
24221       }
24222     }
24223   }
24224 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_small_kernel)24225   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
24226     TEST_REQUIRES_X86_SSE2;
24227     for (uint32_t n = 5; n < 8; n++) {
24228       for (size_t k = 1; k <= 40; k += 9) {
24229         GemmMicrokernelTester()
24230           .mr(1)
24231           .nr(4)
24232           .kr(2)
24233           .sr(4)
24234           .m(1)
24235           .n(n)
24236           .k(k)
24237           .ks(3)
24238           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24239       }
24240     }
24241   }
24242 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_small_kernel)24243   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
24244     TEST_REQUIRES_X86_SSE2;
24245     for (uint32_t n = 8; n <= 12; n += 4) {
24246       for (size_t k = 1; k <= 40; k += 9) {
24247         GemmMicrokernelTester()
24248           .mr(1)
24249           .nr(4)
24250           .kr(2)
24251           .sr(4)
24252           .m(1)
24253           .n(n)
24254           .k(k)
24255           .ks(3)
24256           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24257       }
24258     }
24259   }
24260 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)24261   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
24262     TEST_REQUIRES_X86_SSE2;
24263     for (size_t k = 1; k <= 40; k += 9) {
24264       for (uint32_t n = 1; n <= 4; n++) {
24265         for (uint32_t m = 1; m <= 1; m++) {
24266           GemmMicrokernelTester()
24267             .mr(1)
24268             .nr(4)
24269             .kr(2)
24270             .sr(4)
24271             .m(m)
24272             .n(n)
24273             .k(k)
24274             .cm_stride(7)
24275             .iterations(1)
24276             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24277         }
24278       }
24279     }
24280   }
24281 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,a_offset)24282   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, a_offset) {
24283     TEST_REQUIRES_X86_SSE2;
24284     for (size_t k = 1; k <= 40; k += 9) {
24285       GemmMicrokernelTester()
24286         .mr(1)
24287         .nr(4)
24288         .kr(2)
24289         .sr(4)
24290         .m(1)
24291         .n(4)
24292         .k(k)
24293         .ks(3)
24294         .a_offset(43)
24295         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24296     }
24297   }
24298 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,zero)24299   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, zero) {
24300     TEST_REQUIRES_X86_SSE2;
24301     for (size_t k = 1; k <= 40; k += 9) {
24302       for (uint32_t mz = 0; mz < 1; mz++) {
24303         GemmMicrokernelTester()
24304           .mr(1)
24305           .nr(4)
24306           .kr(2)
24307           .sr(4)
24308           .m(1)
24309           .n(4)
24310           .k(k)
24311           .ks(3)
24312           .a_offset(43)
24313           .zero_index(mz)
24314           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24315       }
24316     }
24317   }
24318 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)24319   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
24320     TEST_REQUIRES_X86_SSE2;
24321     GemmMicrokernelTester()
24322       .mr(1)
24323       .nr(4)
24324       .kr(2)
24325       .sr(4)
24326       .m(1)
24327       .n(4)
24328       .k(8)
24329       .qmin(128)
24330       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24331   }
24332 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)24333   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
24334     TEST_REQUIRES_X86_SSE2;
24335     GemmMicrokernelTester()
24336       .mr(1)
24337       .nr(4)
24338       .kr(2)
24339       .sr(4)
24340       .m(1)
24341       .n(4)
24342       .k(8)
24343       .qmax(128)
24344       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24345   }
24346 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)24347   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
24348     TEST_REQUIRES_X86_SSE2;
24349     GemmMicrokernelTester()
24350       .mr(1)
24351       .nr(4)
24352       .kr(2)
24353       .sr(4)
24354       .m(1)
24355       .n(4)
24356       .k(8)
24357       .cm_stride(7)
24358       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24359   }
24360 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361 
24362 
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)24364   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
24365     TEST_REQUIRES_X86_SSE2;
24366     GemmMicrokernelTester()
24367       .mr(2)
24368       .nr(4)
24369       .kr(2)
24370       .sr(4)
24371       .m(2)
24372       .n(4)
24373       .k(8)
24374       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24375   }
24376 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)24377   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
24378     TEST_REQUIRES_X86_SSE2;
24379     GemmMicrokernelTester()
24380       .mr(2)
24381       .nr(4)
24382       .kr(2)
24383       .sr(4)
24384       .m(2)
24385       .n(4)
24386       .k(8)
24387       .cn_stride(7)
24388       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24389   }
24390 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)24391   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
24392     TEST_REQUIRES_X86_SSE2;
24393     for (uint32_t n = 1; n <= 4; n++) {
24394       for (uint32_t m = 1; m <= 2; m++) {
24395         GemmMicrokernelTester()
24396           .mr(2)
24397           .nr(4)
24398           .kr(2)
24399           .sr(4)
24400           .m(m)
24401           .n(n)
24402           .k(8)
24403           .iterations(1)
24404           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24405       }
24406     }
24407   }
24408 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)24409   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
24410     TEST_REQUIRES_X86_SSE2;
24411     for (uint32_t m = 1; m <= 2; m++) {
24412       GemmMicrokernelTester()
24413         .mr(2)
24414         .nr(4)
24415         .kr(2)
24416         .sr(4)
24417         .m(m)
24418         .n(4)
24419         .k(8)
24420         .iterations(1)
24421         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24422     }
24423   }
24424 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)24425   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
24426     TEST_REQUIRES_X86_SSE2;
24427     for (uint32_t n = 1; n <= 4; n++) {
24428       GemmMicrokernelTester()
24429         .mr(2)
24430         .nr(4)
24431         .kr(2)
24432         .sr(4)
24433         .m(2)
24434         .n(n)
24435         .k(8)
24436         .iterations(1)
24437         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24438     }
24439   }
24440 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)24441   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
24442     TEST_REQUIRES_X86_SSE2;
24443     for (size_t k = 1; k < 8; k++) {
24444       GemmMicrokernelTester()
24445         .mr(2)
24446         .nr(4)
24447         .kr(2)
24448         .sr(4)
24449         .m(2)
24450         .n(4)
24451         .k(k)
24452         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24453     }
24454   }
24455 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)24456   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
24457     TEST_REQUIRES_X86_SSE2;
24458     for (size_t k = 1; k < 8; k++) {
24459       for (uint32_t n = 1; n <= 4; n++) {
24460         for (uint32_t m = 1; m <= 2; m++) {
24461           GemmMicrokernelTester()
24462             .mr(2)
24463             .nr(4)
24464             .kr(2)
24465             .sr(4)
24466             .m(m)
24467             .n(n)
24468             .k(k)
24469             .iterations(1)
24470             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24471         }
24472       }
24473     }
24474   }
24475 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)24476   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
24477     TEST_REQUIRES_X86_SSE2;
24478     for (size_t k = 9; k < 16; k++) {
24479       GemmMicrokernelTester()
24480         .mr(2)
24481         .nr(4)
24482         .kr(2)
24483         .sr(4)
24484         .m(2)
24485         .n(4)
24486         .k(k)
24487         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24488     }
24489   }
24490 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)24491   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24492     TEST_REQUIRES_X86_SSE2;
24493     for (size_t k = 9; k < 16; k++) {
24494       for (uint32_t n = 1; n <= 4; n++) {
24495         for (uint32_t m = 1; m <= 2; m++) {
24496           GemmMicrokernelTester()
24497             .mr(2)
24498             .nr(4)
24499             .kr(2)
24500             .sr(4)
24501             .m(m)
24502             .n(n)
24503             .k(k)
24504             .iterations(1)
24505             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24506         }
24507       }
24508     }
24509   }
24510 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)24511   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
24512     TEST_REQUIRES_X86_SSE2;
24513     for (size_t k = 16; k <= 80; k += 8) {
24514       GemmMicrokernelTester()
24515         .mr(2)
24516         .nr(4)
24517         .kr(2)
24518         .sr(4)
24519         .m(2)
24520         .n(4)
24521         .k(k)
24522         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24523     }
24524   }
24525 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)24526   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
24527     TEST_REQUIRES_X86_SSE2;
24528     for (size_t k = 16; k <= 80; k += 8) {
24529       for (uint32_t n = 1; n <= 4; n++) {
24530         for (uint32_t m = 1; m <= 2; m++) {
24531           GemmMicrokernelTester()
24532             .mr(2)
24533             .nr(4)
24534             .kr(2)
24535             .sr(4)
24536             .m(m)
24537             .n(n)
24538             .k(k)
24539             .iterations(1)
24540             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24541         }
24542       }
24543     }
24544   }
24545 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)24546   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
24547     TEST_REQUIRES_X86_SSE2;
24548     for (uint32_t n = 5; n < 8; n++) {
24549       for (size_t k = 1; k <= 40; k += 9) {
24550         GemmMicrokernelTester()
24551           .mr(2)
24552           .nr(4)
24553           .kr(2)
24554           .sr(4)
24555           .m(2)
24556           .n(n)
24557           .k(k)
24558           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24559       }
24560     }
24561   }
24562 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)24563   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
24564     TEST_REQUIRES_X86_SSE2;
24565     for (uint32_t n = 5; n < 8; n++) {
24566       for (size_t k = 1; k <= 40; k += 9) {
24567         GemmMicrokernelTester()
24568           .mr(2)
24569           .nr(4)
24570           .kr(2)
24571           .sr(4)
24572           .m(2)
24573           .n(n)
24574           .k(k)
24575           .cn_stride(7)
24576           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24577       }
24578     }
24579   }
24580 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)24581   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
24582     TEST_REQUIRES_X86_SSE2;
24583     for (uint32_t n = 5; n < 8; n++) {
24584       for (size_t k = 1; k <= 40; k += 9) {
24585         for (uint32_t m = 1; m <= 2; m++) {
24586           GemmMicrokernelTester()
24587             .mr(2)
24588             .nr(4)
24589             .kr(2)
24590             .sr(4)
24591             .m(m)
24592             .n(n)
24593             .k(k)
24594             .iterations(1)
24595             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24596         }
24597       }
24598     }
24599   }
24600 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)24601   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
24602     TEST_REQUIRES_X86_SSE2;
24603     for (uint32_t n = 8; n <= 12; n += 4) {
24604       for (size_t k = 1; k <= 40; k += 9) {
24605         GemmMicrokernelTester()
24606           .mr(2)
24607           .nr(4)
24608           .kr(2)
24609           .sr(4)
24610           .m(2)
24611           .n(n)
24612           .k(k)
24613           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24614       }
24615     }
24616   }
24617 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)24618   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
24619     TEST_REQUIRES_X86_SSE2;
24620     for (uint32_t n = 8; n <= 12; n += 4) {
24621       for (size_t k = 1; k <= 40; k += 9) {
24622         GemmMicrokernelTester()
24623           .mr(2)
24624           .nr(4)
24625           .kr(2)
24626           .sr(4)
24627           .m(2)
24628           .n(n)
24629           .k(k)
24630           .cn_stride(7)
24631           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24632       }
24633     }
24634   }
24635 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)24636   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
24637     TEST_REQUIRES_X86_SSE2;
24638     for (uint32_t n = 8; n <= 12; n += 4) {
24639       for (size_t k = 1; k <= 40; k += 9) {
24640         for (uint32_t m = 1; m <= 2; m++) {
24641           GemmMicrokernelTester()
24642             .mr(2)
24643             .nr(4)
24644             .kr(2)
24645             .sr(4)
24646             .m(m)
24647             .n(n)
24648             .k(k)
24649             .iterations(1)
24650             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24651         }
24652       }
24653     }
24654   }
24655 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel)24656   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel) {
24657     TEST_REQUIRES_X86_SSE2;
24658     for (size_t k = 1; k <= 40; k += 9) {
24659       GemmMicrokernelTester()
24660         .mr(2)
24661         .nr(4)
24662         .kr(2)
24663         .sr(4)
24664         .m(2)
24665         .n(4)
24666         .k(k)
24667         .ks(3)
24668         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24669     }
24670   }
24671 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel_subtile)24672   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel_subtile) {
24673     TEST_REQUIRES_X86_SSE2;
24674     for (size_t k = 1; k <= 40; k += 9) {
24675       for (uint32_t n = 1; n <= 4; n++) {
24676         for (uint32_t m = 1; m <= 2; m++) {
24677           GemmMicrokernelTester()
24678             .mr(2)
24679             .nr(4)
24680             .kr(2)
24681             .sr(4)
24682             .m(m)
24683             .n(n)
24684             .k(k)
24685             .ks(3)
24686             .iterations(1)
24687             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24688         }
24689       }
24690     }
24691   }
24692 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_small_kernel)24693   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
24694     TEST_REQUIRES_X86_SSE2;
24695     for (uint32_t n = 5; n < 8; n++) {
24696       for (size_t k = 1; k <= 40; k += 9) {
24697         GemmMicrokernelTester()
24698           .mr(2)
24699           .nr(4)
24700           .kr(2)
24701           .sr(4)
24702           .m(2)
24703           .n(n)
24704           .k(k)
24705           .ks(3)
24706           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24707       }
24708     }
24709   }
24710 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_small_kernel)24711   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
24712     TEST_REQUIRES_X86_SSE2;
24713     for (uint32_t n = 8; n <= 12; n += 4) {
24714       for (size_t k = 1; k <= 40; k += 9) {
24715         GemmMicrokernelTester()
24716           .mr(2)
24717           .nr(4)
24718           .kr(2)
24719           .sr(4)
24720           .m(2)
24721           .n(n)
24722           .k(k)
24723           .ks(3)
24724           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24725       }
24726     }
24727   }
24728 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)24729   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
24730     TEST_REQUIRES_X86_SSE2;
24731     for (size_t k = 1; k <= 40; k += 9) {
24732       for (uint32_t n = 1; n <= 4; n++) {
24733         for (uint32_t m = 1; m <= 2; m++) {
24734           GemmMicrokernelTester()
24735             .mr(2)
24736             .nr(4)
24737             .kr(2)
24738             .sr(4)
24739             .m(m)
24740             .n(n)
24741             .k(k)
24742             .cm_stride(7)
24743             .iterations(1)
24744             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24745         }
24746       }
24747     }
24748   }
24749 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,a_offset)24750   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, a_offset) {
24751     TEST_REQUIRES_X86_SSE2;
24752     for (size_t k = 1; k <= 40; k += 9) {
24753       GemmMicrokernelTester()
24754         .mr(2)
24755         .nr(4)
24756         .kr(2)
24757         .sr(4)
24758         .m(2)
24759         .n(4)
24760         .k(k)
24761         .ks(3)
24762         .a_offset(83)
24763         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24764     }
24765   }
24766 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,zero)24767   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, zero) {
24768     TEST_REQUIRES_X86_SSE2;
24769     for (size_t k = 1; k <= 40; k += 9) {
24770       for (uint32_t mz = 0; mz < 2; mz++) {
24771         GemmMicrokernelTester()
24772           .mr(2)
24773           .nr(4)
24774           .kr(2)
24775           .sr(4)
24776           .m(2)
24777           .n(4)
24778           .k(k)
24779           .ks(3)
24780           .a_offset(83)
24781           .zero_index(mz)
24782           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24783       }
24784     }
24785   }
24786 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)24787   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
24788     TEST_REQUIRES_X86_SSE2;
24789     GemmMicrokernelTester()
24790       .mr(2)
24791       .nr(4)
24792       .kr(2)
24793       .sr(4)
24794       .m(2)
24795       .n(4)
24796       .k(8)
24797       .qmin(128)
24798       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24799   }
24800 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)24801   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
24802     TEST_REQUIRES_X86_SSE2;
24803     GemmMicrokernelTester()
24804       .mr(2)
24805       .nr(4)
24806       .kr(2)
24807       .sr(4)
24808       .m(2)
24809       .n(4)
24810       .k(8)
24811       .qmax(128)
24812       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24813   }
24814 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)24815   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
24816     TEST_REQUIRES_X86_SSE2;
24817     GemmMicrokernelTester()
24818       .mr(2)
24819       .nr(4)
24820       .kr(2)
24821       .sr(4)
24822       .m(2)
24823       .n(4)
24824       .k(8)
24825       .cm_stride(7)
24826       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24827   }
24828 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829 
24830 
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)24832   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
24833     TEST_REQUIRES_X86_SSE41;
24834     GemmMicrokernelTester()
24835       .mr(2)
24836       .nr(4)
24837       .kr(2)
24838       .sr(4)
24839       .m(2)
24840       .n(4)
24841       .k(8)
24842       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24843   }
24844 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)24845   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
24846     TEST_REQUIRES_X86_SSE41;
24847     GemmMicrokernelTester()
24848       .mr(2)
24849       .nr(4)
24850       .kr(2)
24851       .sr(4)
24852       .m(2)
24853       .n(4)
24854       .k(8)
24855       .cn_stride(7)
24856       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857   }
24858 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)24859   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
24860     TEST_REQUIRES_X86_SSE41;
24861     for (uint32_t n = 1; n <= 4; n++) {
24862       for (uint32_t m = 1; m <= 2; m++) {
24863         GemmMicrokernelTester()
24864           .mr(2)
24865           .nr(4)
24866           .kr(2)
24867           .sr(4)
24868           .m(m)
24869           .n(n)
24870           .k(8)
24871           .iterations(1)
24872           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873       }
24874     }
24875   }
24876 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)24877   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
24878     TEST_REQUIRES_X86_SSE41;
24879     for (uint32_t m = 1; m <= 2; m++) {
24880       GemmMicrokernelTester()
24881         .mr(2)
24882         .nr(4)
24883         .kr(2)
24884         .sr(4)
24885         .m(m)
24886         .n(4)
24887         .k(8)
24888         .iterations(1)
24889         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24890     }
24891   }
24892 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)24893   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
24894     TEST_REQUIRES_X86_SSE41;
24895     for (uint32_t n = 1; n <= 4; n++) {
24896       GemmMicrokernelTester()
24897         .mr(2)
24898         .nr(4)
24899         .kr(2)
24900         .sr(4)
24901         .m(2)
24902         .n(n)
24903         .k(8)
24904         .iterations(1)
24905         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24906     }
24907   }
24908 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)24909   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
24910     TEST_REQUIRES_X86_SSE41;
24911     for (size_t k = 1; k < 8; k++) {
24912       GemmMicrokernelTester()
24913         .mr(2)
24914         .nr(4)
24915         .kr(2)
24916         .sr(4)
24917         .m(2)
24918         .n(4)
24919         .k(k)
24920         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24921     }
24922   }
24923 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)24924   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
24925     TEST_REQUIRES_X86_SSE41;
24926     for (size_t k = 1; k < 8; k++) {
24927       for (uint32_t n = 1; n <= 4; n++) {
24928         for (uint32_t m = 1; m <= 2; m++) {
24929           GemmMicrokernelTester()
24930             .mr(2)
24931             .nr(4)
24932             .kr(2)
24933             .sr(4)
24934             .m(m)
24935             .n(n)
24936             .k(k)
24937             .iterations(1)
24938             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24939         }
24940       }
24941     }
24942   }
24943 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)24944   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
24945     TEST_REQUIRES_X86_SSE41;
24946     for (size_t k = 9; k < 16; k++) {
24947       GemmMicrokernelTester()
24948         .mr(2)
24949         .nr(4)
24950         .kr(2)
24951         .sr(4)
24952         .m(2)
24953         .n(4)
24954         .k(k)
24955         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24956     }
24957   }
24958 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)24959   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
24960     TEST_REQUIRES_X86_SSE41;
24961     for (size_t k = 9; k < 16; k++) {
24962       for (uint32_t n = 1; n <= 4; n++) {
24963         for (uint32_t m = 1; m <= 2; m++) {
24964           GemmMicrokernelTester()
24965             .mr(2)
24966             .nr(4)
24967             .kr(2)
24968             .sr(4)
24969             .m(m)
24970             .n(n)
24971             .k(k)
24972             .iterations(1)
24973             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24974         }
24975       }
24976     }
24977   }
24978 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)24979   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
24980     TEST_REQUIRES_X86_SSE41;
24981     for (size_t k = 16; k <= 80; k += 8) {
24982       GemmMicrokernelTester()
24983         .mr(2)
24984         .nr(4)
24985         .kr(2)
24986         .sr(4)
24987         .m(2)
24988         .n(4)
24989         .k(k)
24990         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24991     }
24992   }
24993 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)24994   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
24995     TEST_REQUIRES_X86_SSE41;
24996     for (size_t k = 16; k <= 80; k += 8) {
24997       for (uint32_t n = 1; n <= 4; n++) {
24998         for (uint32_t m = 1; m <= 2; m++) {
24999           GemmMicrokernelTester()
25000             .mr(2)
25001             .nr(4)
25002             .kr(2)
25003             .sr(4)
25004             .m(m)
25005             .n(n)
25006             .k(k)
25007             .iterations(1)
25008             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25009         }
25010       }
25011     }
25012   }
25013 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)25014   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
25015     TEST_REQUIRES_X86_SSE41;
25016     for (uint32_t n = 5; n < 8; n++) {
25017       for (size_t k = 1; k <= 40; k += 9) {
25018         GemmMicrokernelTester()
25019           .mr(2)
25020           .nr(4)
25021           .kr(2)
25022           .sr(4)
25023           .m(2)
25024           .n(n)
25025           .k(k)
25026           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25027       }
25028     }
25029   }
25030 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25031   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25032     TEST_REQUIRES_X86_SSE41;
25033     for (uint32_t n = 5; n < 8; n++) {
25034       for (size_t k = 1; k <= 40; k += 9) {
25035         GemmMicrokernelTester()
25036           .mr(2)
25037           .nr(4)
25038           .kr(2)
25039           .sr(4)
25040           .m(2)
25041           .n(n)
25042           .k(k)
25043           .cn_stride(7)
25044           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25045       }
25046     }
25047   }
25048 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)25049   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25050     TEST_REQUIRES_X86_SSE41;
25051     for (uint32_t n = 5; n < 8; n++) {
25052       for (size_t k = 1; k <= 40; k += 9) {
25053         for (uint32_t m = 1; m <= 2; m++) {
25054           GemmMicrokernelTester()
25055             .mr(2)
25056             .nr(4)
25057             .kr(2)
25058             .sr(4)
25059             .m(m)
25060             .n(n)
25061             .k(k)
25062             .iterations(1)
25063             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25064         }
25065       }
25066     }
25067   }
25068 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)25069   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
25070     TEST_REQUIRES_X86_SSE41;
25071     for (uint32_t n = 8; n <= 12; n += 4) {
25072       for (size_t k = 1; k <= 40; k += 9) {
25073         GemmMicrokernelTester()
25074           .mr(2)
25075           .nr(4)
25076           .kr(2)
25077           .sr(4)
25078           .m(2)
25079           .n(n)
25080           .k(k)
25081           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25082       }
25083     }
25084   }
25085 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)25086   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25087     TEST_REQUIRES_X86_SSE41;
25088     for (uint32_t n = 8; n <= 12; n += 4) {
25089       for (size_t k = 1; k <= 40; k += 9) {
25090         GemmMicrokernelTester()
25091           .mr(2)
25092           .nr(4)
25093           .kr(2)
25094           .sr(4)
25095           .m(2)
25096           .n(n)
25097           .k(k)
25098           .cn_stride(7)
25099           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25100       }
25101     }
25102   }
25103 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)25104   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
25105     TEST_REQUIRES_X86_SSE41;
25106     for (uint32_t n = 8; n <= 12; n += 4) {
25107       for (size_t k = 1; k <= 40; k += 9) {
25108         for (uint32_t m = 1; m <= 2; m++) {
25109           GemmMicrokernelTester()
25110             .mr(2)
25111             .nr(4)
25112             .kr(2)
25113             .sr(4)
25114             .m(m)
25115             .n(n)
25116             .k(k)
25117             .iterations(1)
25118             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119         }
25120       }
25121     }
25122   }
25123 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel)25124   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel) {
25125     TEST_REQUIRES_X86_SSE41;
25126     for (size_t k = 1; k <= 40; k += 9) {
25127       GemmMicrokernelTester()
25128         .mr(2)
25129         .nr(4)
25130         .kr(2)
25131         .sr(4)
25132         .m(2)
25133         .n(4)
25134         .k(k)
25135         .ks(3)
25136         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25137     }
25138   }
25139 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel_subtile)25140   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel_subtile) {
25141     TEST_REQUIRES_X86_SSE41;
25142     for (size_t k = 1; k <= 40; k += 9) {
25143       for (uint32_t n = 1; n <= 4; n++) {
25144         for (uint32_t m = 1; m <= 2; m++) {
25145           GemmMicrokernelTester()
25146             .mr(2)
25147             .nr(4)
25148             .kr(2)
25149             .sr(4)
25150             .m(m)
25151             .n(n)
25152             .k(k)
25153             .ks(3)
25154             .iterations(1)
25155             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25156         }
25157       }
25158     }
25159   }
25160 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25161   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25162     TEST_REQUIRES_X86_SSE41;
25163     for (uint32_t n = 5; n < 8; n++) {
25164       for (size_t k = 1; k <= 40; k += 9) {
25165         GemmMicrokernelTester()
25166           .mr(2)
25167           .nr(4)
25168           .kr(2)
25169           .sr(4)
25170           .m(2)
25171           .n(n)
25172           .k(k)
25173           .ks(3)
25174           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25175       }
25176     }
25177   }
25178 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_small_kernel)25179   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25180     TEST_REQUIRES_X86_SSE41;
25181     for (uint32_t n = 8; n <= 12; n += 4) {
25182       for (size_t k = 1; k <= 40; k += 9) {
25183         GemmMicrokernelTester()
25184           .mr(2)
25185           .nr(4)
25186           .kr(2)
25187           .sr(4)
25188           .m(2)
25189           .n(n)
25190           .k(k)
25191           .ks(3)
25192           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25193       }
25194     }
25195   }
25196 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)25197   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
25198     TEST_REQUIRES_X86_SSE41;
25199     for (size_t k = 1; k <= 40; k += 9) {
25200       for (uint32_t n = 1; n <= 4; n++) {
25201         for (uint32_t m = 1; m <= 2; m++) {
25202           GemmMicrokernelTester()
25203             .mr(2)
25204             .nr(4)
25205             .kr(2)
25206             .sr(4)
25207             .m(m)
25208             .n(n)
25209             .k(k)
25210             .cm_stride(7)
25211             .iterations(1)
25212             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25213         }
25214       }
25215     }
25216   }
25217 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,a_offset)25218   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, a_offset) {
25219     TEST_REQUIRES_X86_SSE41;
25220     for (size_t k = 1; k <= 40; k += 9) {
25221       GemmMicrokernelTester()
25222         .mr(2)
25223         .nr(4)
25224         .kr(2)
25225         .sr(4)
25226         .m(2)
25227         .n(4)
25228         .k(k)
25229         .ks(3)
25230         .a_offset(83)
25231         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25232     }
25233   }
25234 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,zero)25235   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, zero) {
25236     TEST_REQUIRES_X86_SSE41;
25237     for (size_t k = 1; k <= 40; k += 9) {
25238       for (uint32_t mz = 0; mz < 2; mz++) {
25239         GemmMicrokernelTester()
25240           .mr(2)
25241           .nr(4)
25242           .kr(2)
25243           .sr(4)
25244           .m(2)
25245           .n(4)
25246           .k(k)
25247           .ks(3)
25248           .a_offset(83)
25249           .zero_index(mz)
25250           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25251       }
25252     }
25253   }
25254 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)25255   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
25256     TEST_REQUIRES_X86_SSE41;
25257     GemmMicrokernelTester()
25258       .mr(2)
25259       .nr(4)
25260       .kr(2)
25261       .sr(4)
25262       .m(2)
25263       .n(4)
25264       .k(8)
25265       .qmin(128)
25266       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25267   }
25268 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)25269   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
25270     TEST_REQUIRES_X86_SSE41;
25271     GemmMicrokernelTester()
25272       .mr(2)
25273       .nr(4)
25274       .kr(2)
25275       .sr(4)
25276       .m(2)
25277       .n(4)
25278       .k(8)
25279       .qmax(128)
25280       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25281   }
25282 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)25283   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
25284     TEST_REQUIRES_X86_SSE41;
25285     GemmMicrokernelTester()
25286       .mr(2)
25287       .nr(4)
25288       .kr(2)
25289       .sr(4)
25290       .m(2)
25291       .n(4)
25292       .k(8)
25293       .cm_stride(7)
25294       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25295   }
25296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297 
25298 
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)25300   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
25301     TEST_REQUIRES_X86_SSE41;
25302     GemmMicrokernelTester()
25303       .mr(3)
25304       .nr(4)
25305       .kr(2)
25306       .sr(4)
25307       .m(3)
25308       .n(4)
25309       .k(8)
25310       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25311   }
25312 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)25313   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
25314     TEST_REQUIRES_X86_SSE41;
25315     GemmMicrokernelTester()
25316       .mr(3)
25317       .nr(4)
25318       .kr(2)
25319       .sr(4)
25320       .m(3)
25321       .n(4)
25322       .k(8)
25323       .cn_stride(7)
25324       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25325   }
25326 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)25327   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
25328     TEST_REQUIRES_X86_SSE41;
25329     for (uint32_t n = 1; n <= 4; n++) {
25330       for (uint32_t m = 1; m <= 3; m++) {
25331         GemmMicrokernelTester()
25332           .mr(3)
25333           .nr(4)
25334           .kr(2)
25335           .sr(4)
25336           .m(m)
25337           .n(n)
25338           .k(8)
25339           .iterations(1)
25340           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25341       }
25342     }
25343   }
25344 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)25345   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
25346     TEST_REQUIRES_X86_SSE41;
25347     for (uint32_t m = 1; m <= 3; m++) {
25348       GemmMicrokernelTester()
25349         .mr(3)
25350         .nr(4)
25351         .kr(2)
25352         .sr(4)
25353         .m(m)
25354         .n(4)
25355         .k(8)
25356         .iterations(1)
25357         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25358     }
25359   }
25360 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)25361   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
25362     TEST_REQUIRES_X86_SSE41;
25363     for (uint32_t n = 1; n <= 4; n++) {
25364       GemmMicrokernelTester()
25365         .mr(3)
25366         .nr(4)
25367         .kr(2)
25368         .sr(4)
25369         .m(3)
25370         .n(n)
25371         .k(8)
25372         .iterations(1)
25373         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25374     }
25375   }
25376 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)25377   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
25378     TEST_REQUIRES_X86_SSE41;
25379     for (size_t k = 1; k < 8; k++) {
25380       GemmMicrokernelTester()
25381         .mr(3)
25382         .nr(4)
25383         .kr(2)
25384         .sr(4)
25385         .m(3)
25386         .n(4)
25387         .k(k)
25388         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25389     }
25390   }
25391 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)25392   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
25393     TEST_REQUIRES_X86_SSE41;
25394     for (size_t k = 1; k < 8; k++) {
25395       for (uint32_t n = 1; n <= 4; n++) {
25396         for (uint32_t m = 1; m <= 3; m++) {
25397           GemmMicrokernelTester()
25398             .mr(3)
25399             .nr(4)
25400             .kr(2)
25401             .sr(4)
25402             .m(m)
25403             .n(n)
25404             .k(k)
25405             .iterations(1)
25406             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25407         }
25408       }
25409     }
25410   }
25411 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)25412   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
25413     TEST_REQUIRES_X86_SSE41;
25414     for (size_t k = 9; k < 16; k++) {
25415       GemmMicrokernelTester()
25416         .mr(3)
25417         .nr(4)
25418         .kr(2)
25419         .sr(4)
25420         .m(3)
25421         .n(4)
25422         .k(k)
25423         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25424     }
25425   }
25426 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)25427   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
25428     TEST_REQUIRES_X86_SSE41;
25429     for (size_t k = 9; k < 16; k++) {
25430       for (uint32_t n = 1; n <= 4; n++) {
25431         for (uint32_t m = 1; m <= 3; m++) {
25432           GemmMicrokernelTester()
25433             .mr(3)
25434             .nr(4)
25435             .kr(2)
25436             .sr(4)
25437             .m(m)
25438             .n(n)
25439             .k(k)
25440             .iterations(1)
25441             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25442         }
25443       }
25444     }
25445   }
25446 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)25447   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
25448     TEST_REQUIRES_X86_SSE41;
25449     for (size_t k = 16; k <= 80; k += 8) {
25450       GemmMicrokernelTester()
25451         .mr(3)
25452         .nr(4)
25453         .kr(2)
25454         .sr(4)
25455         .m(3)
25456         .n(4)
25457         .k(k)
25458         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25459     }
25460   }
25461 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)25462   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
25463     TEST_REQUIRES_X86_SSE41;
25464     for (size_t k = 16; k <= 80; k += 8) {
25465       for (uint32_t n = 1; n <= 4; n++) {
25466         for (uint32_t m = 1; m <= 3; m++) {
25467           GemmMicrokernelTester()
25468             .mr(3)
25469             .nr(4)
25470             .kr(2)
25471             .sr(4)
25472             .m(m)
25473             .n(n)
25474             .k(k)
25475             .iterations(1)
25476             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25477         }
25478       }
25479     }
25480   }
25481 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)25482   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
25483     TEST_REQUIRES_X86_SSE41;
25484     for (uint32_t n = 5; n < 8; n++) {
25485       for (size_t k = 1; k <= 40; k += 9) {
25486         GemmMicrokernelTester()
25487           .mr(3)
25488           .nr(4)
25489           .kr(2)
25490           .sr(4)
25491           .m(3)
25492           .n(n)
25493           .k(k)
25494           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25495       }
25496     }
25497   }
25498 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25499   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25500     TEST_REQUIRES_X86_SSE41;
25501     for (uint32_t n = 5; n < 8; n++) {
25502       for (size_t k = 1; k <= 40; k += 9) {
25503         GemmMicrokernelTester()
25504           .mr(3)
25505           .nr(4)
25506           .kr(2)
25507           .sr(4)
25508           .m(3)
25509           .n(n)
25510           .k(k)
25511           .cn_stride(7)
25512           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25513       }
25514     }
25515   }
25516 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)25517   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25518     TEST_REQUIRES_X86_SSE41;
25519     for (uint32_t n = 5; n < 8; n++) {
25520       for (size_t k = 1; k <= 40; k += 9) {
25521         for (uint32_t m = 1; m <= 3; m++) {
25522           GemmMicrokernelTester()
25523             .mr(3)
25524             .nr(4)
25525             .kr(2)
25526             .sr(4)
25527             .m(m)
25528             .n(n)
25529             .k(k)
25530             .iterations(1)
25531             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25532         }
25533       }
25534     }
25535   }
25536 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)25537   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
25538     TEST_REQUIRES_X86_SSE41;
25539     for (uint32_t n = 8; n <= 12; n += 4) {
25540       for (size_t k = 1; k <= 40; k += 9) {
25541         GemmMicrokernelTester()
25542           .mr(3)
25543           .nr(4)
25544           .kr(2)
25545           .sr(4)
25546           .m(3)
25547           .n(n)
25548           .k(k)
25549           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25550       }
25551     }
25552   }
25553 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)25554   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25555     TEST_REQUIRES_X86_SSE41;
25556     for (uint32_t n = 8; n <= 12; n += 4) {
25557       for (size_t k = 1; k <= 40; k += 9) {
25558         GemmMicrokernelTester()
25559           .mr(3)
25560           .nr(4)
25561           .kr(2)
25562           .sr(4)
25563           .m(3)
25564           .n(n)
25565           .k(k)
25566           .cn_stride(7)
25567           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25568       }
25569     }
25570   }
25571 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)25572   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
25573     TEST_REQUIRES_X86_SSE41;
25574     for (uint32_t n = 8; n <= 12; n += 4) {
25575       for (size_t k = 1; k <= 40; k += 9) {
25576         for (uint32_t m = 1; m <= 3; m++) {
25577           GemmMicrokernelTester()
25578             .mr(3)
25579             .nr(4)
25580             .kr(2)
25581             .sr(4)
25582             .m(m)
25583             .n(n)
25584             .k(k)
25585             .iterations(1)
25586             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25587         }
25588       }
25589     }
25590   }
25591 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel)25592   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel) {
25593     TEST_REQUIRES_X86_SSE41;
25594     for (size_t k = 1; k <= 40; k += 9) {
25595       GemmMicrokernelTester()
25596         .mr(3)
25597         .nr(4)
25598         .kr(2)
25599         .sr(4)
25600         .m(3)
25601         .n(4)
25602         .k(k)
25603         .ks(3)
25604         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25605     }
25606   }
25607 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel_subtile)25608   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel_subtile) {
25609     TEST_REQUIRES_X86_SSE41;
25610     for (size_t k = 1; k <= 40; k += 9) {
25611       for (uint32_t n = 1; n <= 4; n++) {
25612         for (uint32_t m = 1; m <= 3; m++) {
25613           GemmMicrokernelTester()
25614             .mr(3)
25615             .nr(4)
25616             .kr(2)
25617             .sr(4)
25618             .m(m)
25619             .n(n)
25620             .k(k)
25621             .ks(3)
25622             .iterations(1)
25623             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25624         }
25625       }
25626     }
25627   }
25628 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25629   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25630     TEST_REQUIRES_X86_SSE41;
25631     for (uint32_t n = 5; n < 8; n++) {
25632       for (size_t k = 1; k <= 40; k += 9) {
25633         GemmMicrokernelTester()
25634           .mr(3)
25635           .nr(4)
25636           .kr(2)
25637           .sr(4)
25638           .m(3)
25639           .n(n)
25640           .k(k)
25641           .ks(3)
25642           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25643       }
25644     }
25645   }
25646 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_small_kernel)25647   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25648     TEST_REQUIRES_X86_SSE41;
25649     for (uint32_t n = 8; n <= 12; n += 4) {
25650       for (size_t k = 1; k <= 40; k += 9) {
25651         GemmMicrokernelTester()
25652           .mr(3)
25653           .nr(4)
25654           .kr(2)
25655           .sr(4)
25656           .m(3)
25657           .n(n)
25658           .k(k)
25659           .ks(3)
25660           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25661       }
25662     }
25663   }
25664 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)25665   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
25666     TEST_REQUIRES_X86_SSE41;
25667     for (size_t k = 1; k <= 40; k += 9) {
25668       for (uint32_t n = 1; n <= 4; n++) {
25669         for (uint32_t m = 1; m <= 3; m++) {
25670           GemmMicrokernelTester()
25671             .mr(3)
25672             .nr(4)
25673             .kr(2)
25674             .sr(4)
25675             .m(m)
25676             .n(n)
25677             .k(k)
25678             .cm_stride(7)
25679             .iterations(1)
25680             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25681         }
25682       }
25683     }
25684   }
25685 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,a_offset)25686   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, a_offset) {
25687     TEST_REQUIRES_X86_SSE41;
25688     for (size_t k = 1; k <= 40; k += 9) {
25689       GemmMicrokernelTester()
25690         .mr(3)
25691         .nr(4)
25692         .kr(2)
25693         .sr(4)
25694         .m(3)
25695         .n(4)
25696         .k(k)
25697         .ks(3)
25698         .a_offset(127)
25699         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25700     }
25701   }
25702 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,zero)25703   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, zero) {
25704     TEST_REQUIRES_X86_SSE41;
25705     for (size_t k = 1; k <= 40; k += 9) {
25706       for (uint32_t mz = 0; mz < 3; mz++) {
25707         GemmMicrokernelTester()
25708           .mr(3)
25709           .nr(4)
25710           .kr(2)
25711           .sr(4)
25712           .m(3)
25713           .n(4)
25714           .k(k)
25715           .ks(3)
25716           .a_offset(127)
25717           .zero_index(mz)
25718           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25719       }
25720     }
25721   }
25722 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)25723   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
25724     TEST_REQUIRES_X86_SSE41;
25725     GemmMicrokernelTester()
25726       .mr(3)
25727       .nr(4)
25728       .kr(2)
25729       .sr(4)
25730       .m(3)
25731       .n(4)
25732       .k(8)
25733       .qmin(128)
25734       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25735   }
25736 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)25737   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
25738     TEST_REQUIRES_X86_SSE41;
25739     GemmMicrokernelTester()
25740       .mr(3)
25741       .nr(4)
25742       .kr(2)
25743       .sr(4)
25744       .m(3)
25745       .n(4)
25746       .k(8)
25747       .qmax(128)
25748       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25749   }
25750 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)25751   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
25752     TEST_REQUIRES_X86_SSE41;
25753     GemmMicrokernelTester()
25754       .mr(3)
25755       .nr(4)
25756       .kr(2)
25757       .sr(4)
25758       .m(3)
25759       .n(4)
25760       .k(8)
25761       .cm_stride(7)
25762       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25763   }
25764 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765 
25766 
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)25768   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
25769     TEST_REQUIRES_X86_AVX;
25770     GemmMicrokernelTester()
25771       .mr(3)
25772       .nr(4)
25773       .kr(2)
25774       .sr(4)
25775       .m(3)
25776       .n(4)
25777       .k(8)
25778       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779   }
25780 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)25781   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
25782     TEST_REQUIRES_X86_AVX;
25783     GemmMicrokernelTester()
25784       .mr(3)
25785       .nr(4)
25786       .kr(2)
25787       .sr(4)
25788       .m(3)
25789       .n(4)
25790       .k(8)
25791       .cn_stride(7)
25792       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793   }
25794 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)25795   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
25796     TEST_REQUIRES_X86_AVX;
25797     for (uint32_t n = 1; n <= 4; n++) {
25798       for (uint32_t m = 1; m <= 3; m++) {
25799         GemmMicrokernelTester()
25800           .mr(3)
25801           .nr(4)
25802           .kr(2)
25803           .sr(4)
25804           .m(m)
25805           .n(n)
25806           .k(8)
25807           .iterations(1)
25808           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809       }
25810     }
25811   }
25812 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)25813   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
25814     TEST_REQUIRES_X86_AVX;
25815     for (uint32_t m = 1; m <= 3; m++) {
25816       GemmMicrokernelTester()
25817         .mr(3)
25818         .nr(4)
25819         .kr(2)
25820         .sr(4)
25821         .m(m)
25822         .n(4)
25823         .k(8)
25824         .iterations(1)
25825         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826     }
25827   }
25828 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)25829   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
25830     TEST_REQUIRES_X86_AVX;
25831     for (uint32_t n = 1; n <= 4; n++) {
25832       GemmMicrokernelTester()
25833         .mr(3)
25834         .nr(4)
25835         .kr(2)
25836         .sr(4)
25837         .m(3)
25838         .n(n)
25839         .k(8)
25840         .iterations(1)
25841         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842     }
25843   }
25844 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)25845   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
25846     TEST_REQUIRES_X86_AVX;
25847     for (size_t k = 1; k < 8; k++) {
25848       GemmMicrokernelTester()
25849         .mr(3)
25850         .nr(4)
25851         .kr(2)
25852         .sr(4)
25853         .m(3)
25854         .n(4)
25855         .k(k)
25856         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857     }
25858   }
25859 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)25860   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
25861     TEST_REQUIRES_X86_AVX;
25862     for (size_t k = 1; k < 8; k++) {
25863       for (uint32_t n = 1; n <= 4; n++) {
25864         for (uint32_t m = 1; m <= 3; m++) {
25865           GemmMicrokernelTester()
25866             .mr(3)
25867             .nr(4)
25868             .kr(2)
25869             .sr(4)
25870             .m(m)
25871             .n(n)
25872             .k(k)
25873             .iterations(1)
25874             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875         }
25876       }
25877     }
25878   }
25879 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)25880   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
25881     TEST_REQUIRES_X86_AVX;
25882     for (size_t k = 9; k < 16; k++) {
25883       GemmMicrokernelTester()
25884         .mr(3)
25885         .nr(4)
25886         .kr(2)
25887         .sr(4)
25888         .m(3)
25889         .n(4)
25890         .k(k)
25891         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892     }
25893   }
25894 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)25895   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
25896     TEST_REQUIRES_X86_AVX;
25897     for (size_t k = 9; k < 16; k++) {
25898       for (uint32_t n = 1; n <= 4; n++) {
25899         for (uint32_t m = 1; m <= 3; m++) {
25900           GemmMicrokernelTester()
25901             .mr(3)
25902             .nr(4)
25903             .kr(2)
25904             .sr(4)
25905             .m(m)
25906             .n(n)
25907             .k(k)
25908             .iterations(1)
25909             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910         }
25911       }
25912     }
25913   }
25914 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)25915   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
25916     TEST_REQUIRES_X86_AVX;
25917     for (size_t k = 16; k <= 80; k += 8) {
25918       GemmMicrokernelTester()
25919         .mr(3)
25920         .nr(4)
25921         .kr(2)
25922         .sr(4)
25923         .m(3)
25924         .n(4)
25925         .k(k)
25926         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927     }
25928   }
25929 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)25930   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
25931     TEST_REQUIRES_X86_AVX;
25932     for (size_t k = 16; k <= 80; k += 8) {
25933       for (uint32_t n = 1; n <= 4; n++) {
25934         for (uint32_t m = 1; m <= 3; m++) {
25935           GemmMicrokernelTester()
25936             .mr(3)
25937             .nr(4)
25938             .kr(2)
25939             .sr(4)
25940             .m(m)
25941             .n(n)
25942             .k(k)
25943             .iterations(1)
25944             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945         }
25946       }
25947     }
25948   }
25949 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)25950   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
25951     TEST_REQUIRES_X86_AVX;
25952     for (uint32_t n = 5; n < 8; n++) {
25953       for (size_t k = 1; k <= 40; k += 9) {
25954         GemmMicrokernelTester()
25955           .mr(3)
25956           .nr(4)
25957           .kr(2)
25958           .sr(4)
25959           .m(3)
25960           .n(n)
25961           .k(k)
25962           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963       }
25964     }
25965   }
25966 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)25967   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
25968     TEST_REQUIRES_X86_AVX;
25969     for (uint32_t n = 5; n < 8; n++) {
25970       for (size_t k = 1; k <= 40; k += 9) {
25971         GemmMicrokernelTester()
25972           .mr(3)
25973           .nr(4)
25974           .kr(2)
25975           .sr(4)
25976           .m(3)
25977           .n(n)
25978           .k(k)
25979           .cn_stride(7)
25980           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981       }
25982     }
25983   }
25984 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)25985   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
25986     TEST_REQUIRES_X86_AVX;
25987     for (uint32_t n = 5; n < 8; n++) {
25988       for (size_t k = 1; k <= 40; k += 9) {
25989         for (uint32_t m = 1; m <= 3; m++) {
25990           GemmMicrokernelTester()
25991             .mr(3)
25992             .nr(4)
25993             .kr(2)
25994             .sr(4)
25995             .m(m)
25996             .n(n)
25997             .k(k)
25998             .iterations(1)
25999             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000         }
26001       }
26002     }
26003   }
26004 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)26005   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
26006     TEST_REQUIRES_X86_AVX;
26007     for (uint32_t n = 8; n <= 12; n += 4) {
26008       for (size_t k = 1; k <= 40; k += 9) {
26009         GemmMicrokernelTester()
26010           .mr(3)
26011           .nr(4)
26012           .kr(2)
26013           .sr(4)
26014           .m(3)
26015           .n(n)
26016           .k(k)
26017           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018       }
26019     }
26020   }
26021 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)26022   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
26023     TEST_REQUIRES_X86_AVX;
26024     for (uint32_t n = 8; n <= 12; n += 4) {
26025       for (size_t k = 1; k <= 40; k += 9) {
26026         GemmMicrokernelTester()
26027           .mr(3)
26028           .nr(4)
26029           .kr(2)
26030           .sr(4)
26031           .m(3)
26032           .n(n)
26033           .k(k)
26034           .cn_stride(7)
26035           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036       }
26037     }
26038   }
26039 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)26040   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
26041     TEST_REQUIRES_X86_AVX;
26042     for (uint32_t n = 8; n <= 12; n += 4) {
26043       for (size_t k = 1; k <= 40; k += 9) {
26044         for (uint32_t m = 1; m <= 3; m++) {
26045           GemmMicrokernelTester()
26046             .mr(3)
26047             .nr(4)
26048             .kr(2)
26049             .sr(4)
26050             .m(m)
26051             .n(n)
26052             .k(k)
26053             .iterations(1)
26054             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055         }
26056       }
26057     }
26058   }
26059 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel)26060   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel) {
26061     TEST_REQUIRES_X86_AVX;
26062     for (size_t k = 1; k <= 40; k += 9) {
26063       GemmMicrokernelTester()
26064         .mr(3)
26065         .nr(4)
26066         .kr(2)
26067         .sr(4)
26068         .m(3)
26069         .n(4)
26070         .k(k)
26071         .ks(3)
26072         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073     }
26074   }
26075 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel_subtile)26076   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel_subtile) {
26077     TEST_REQUIRES_X86_AVX;
26078     for (size_t k = 1; k <= 40; k += 9) {
26079       for (uint32_t n = 1; n <= 4; n++) {
26080         for (uint32_t m = 1; m <= 3; m++) {
26081           GemmMicrokernelTester()
26082             .mr(3)
26083             .nr(4)
26084             .kr(2)
26085             .sr(4)
26086             .m(m)
26087             .n(n)
26088             .k(k)
26089             .ks(3)
26090             .iterations(1)
26091             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092         }
26093       }
26094     }
26095   }
26096 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_small_kernel)26097   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
26098     TEST_REQUIRES_X86_AVX;
26099     for (uint32_t n = 5; n < 8; n++) {
26100       for (size_t k = 1; k <= 40; k += 9) {
26101         GemmMicrokernelTester()
26102           .mr(3)
26103           .nr(4)
26104           .kr(2)
26105           .sr(4)
26106           .m(3)
26107           .n(n)
26108           .k(k)
26109           .ks(3)
26110           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111       }
26112     }
26113   }
26114 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_small_kernel)26115   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_small_kernel) {
26116     TEST_REQUIRES_X86_AVX;
26117     for (uint32_t n = 8; n <= 12; n += 4) {
26118       for (size_t k = 1; k <= 40; k += 9) {
26119         GemmMicrokernelTester()
26120           .mr(3)
26121           .nr(4)
26122           .kr(2)
26123           .sr(4)
26124           .m(3)
26125           .n(n)
26126           .k(k)
26127           .ks(3)
26128           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129       }
26130     }
26131   }
26132 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)26133   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
26134     TEST_REQUIRES_X86_AVX;
26135     for (size_t k = 1; k <= 40; k += 9) {
26136       for (uint32_t n = 1; n <= 4; n++) {
26137         for (uint32_t m = 1; m <= 3; m++) {
26138           GemmMicrokernelTester()
26139             .mr(3)
26140             .nr(4)
26141             .kr(2)
26142             .sr(4)
26143             .m(m)
26144             .n(n)
26145             .k(k)
26146             .cm_stride(7)
26147             .iterations(1)
26148             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149         }
26150       }
26151     }
26152   }
26153 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,a_offset)26154   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, a_offset) {
26155     TEST_REQUIRES_X86_AVX;
26156     for (size_t k = 1; k <= 40; k += 9) {
26157       GemmMicrokernelTester()
26158         .mr(3)
26159         .nr(4)
26160         .kr(2)
26161         .sr(4)
26162         .m(3)
26163         .n(4)
26164         .k(k)
26165         .ks(3)
26166         .a_offset(127)
26167         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168     }
26169   }
26170 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,zero)26171   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, zero) {
26172     TEST_REQUIRES_X86_AVX;
26173     for (size_t k = 1; k <= 40; k += 9) {
26174       for (uint32_t mz = 0; mz < 3; mz++) {
26175         GemmMicrokernelTester()
26176           .mr(3)
26177           .nr(4)
26178           .kr(2)
26179           .sr(4)
26180           .m(3)
26181           .n(4)
26182           .k(k)
26183           .ks(3)
26184           .a_offset(127)
26185           .zero_index(mz)
26186           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187       }
26188     }
26189   }
26190 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)26191   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
26192     TEST_REQUIRES_X86_AVX;
26193     GemmMicrokernelTester()
26194       .mr(3)
26195       .nr(4)
26196       .kr(2)
26197       .sr(4)
26198       .m(3)
26199       .n(4)
26200       .k(8)
26201       .qmin(128)
26202       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203   }
26204 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)26205   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
26206     TEST_REQUIRES_X86_AVX;
26207     GemmMicrokernelTester()
26208       .mr(3)
26209       .nr(4)
26210       .kr(2)
26211       .sr(4)
26212       .m(3)
26213       .n(4)
26214       .k(8)
26215       .qmax(128)
26216       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217   }
26218 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)26219   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
26220     TEST_REQUIRES_X86_AVX;
26221     GemmMicrokernelTester()
26222       .mr(3)
26223       .nr(4)
26224       .kr(2)
26225       .sr(4)
26226       .m(3)
26227       .n(4)
26228       .k(8)
26229       .cm_stride(7)
26230       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231   }
26232 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233 
26234 
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)26236   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
26237     TEST_REQUIRES_X86_XOP;
26238     GemmMicrokernelTester()
26239       .mr(3)
26240       .nr(4)
26241       .kr(2)
26242       .sr(4)
26243       .m(3)
26244       .n(4)
26245       .k(8)
26246       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247   }
26248 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)26249   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
26250     TEST_REQUIRES_X86_XOP;
26251     GemmMicrokernelTester()
26252       .mr(3)
26253       .nr(4)
26254       .kr(2)
26255       .sr(4)
26256       .m(3)
26257       .n(4)
26258       .k(8)
26259       .cn_stride(7)
26260       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261   }
26262 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)26263   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
26264     TEST_REQUIRES_X86_XOP;
26265     for (uint32_t n = 1; n <= 4; n++) {
26266       for (uint32_t m = 1; m <= 3; m++) {
26267         GemmMicrokernelTester()
26268           .mr(3)
26269           .nr(4)
26270           .kr(2)
26271           .sr(4)
26272           .m(m)
26273           .n(n)
26274           .k(8)
26275           .iterations(1)
26276           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277       }
26278     }
26279   }
26280 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)26281   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
26282     TEST_REQUIRES_X86_XOP;
26283     for (uint32_t m = 1; m <= 3; m++) {
26284       GemmMicrokernelTester()
26285         .mr(3)
26286         .nr(4)
26287         .kr(2)
26288         .sr(4)
26289         .m(m)
26290         .n(4)
26291         .k(8)
26292         .iterations(1)
26293         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294     }
26295   }
26296 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)26297   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
26298     TEST_REQUIRES_X86_XOP;
26299     for (uint32_t n = 1; n <= 4; n++) {
26300       GemmMicrokernelTester()
26301         .mr(3)
26302         .nr(4)
26303         .kr(2)
26304         .sr(4)
26305         .m(3)
26306         .n(n)
26307         .k(8)
26308         .iterations(1)
26309         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310     }
26311   }
26312 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)26313   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
26314     TEST_REQUIRES_X86_XOP;
26315     for (size_t k = 1; k < 8; k++) {
26316       GemmMicrokernelTester()
26317         .mr(3)
26318         .nr(4)
26319         .kr(2)
26320         .sr(4)
26321         .m(3)
26322         .n(4)
26323         .k(k)
26324         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325     }
26326   }
26327 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)26328   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
26329     TEST_REQUIRES_X86_XOP;
26330     for (size_t k = 1; k < 8; k++) {
26331       for (uint32_t n = 1; n <= 4; n++) {
26332         for (uint32_t m = 1; m <= 3; m++) {
26333           GemmMicrokernelTester()
26334             .mr(3)
26335             .nr(4)
26336             .kr(2)
26337             .sr(4)
26338             .m(m)
26339             .n(n)
26340             .k(k)
26341             .iterations(1)
26342             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343         }
26344       }
26345     }
26346   }
26347 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)26348   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
26349     TEST_REQUIRES_X86_XOP;
26350     for (size_t k = 9; k < 16; k++) {
26351       GemmMicrokernelTester()
26352         .mr(3)
26353         .nr(4)
26354         .kr(2)
26355         .sr(4)
26356         .m(3)
26357         .n(4)
26358         .k(k)
26359         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360     }
26361   }
26362 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)26363   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
26364     TEST_REQUIRES_X86_XOP;
26365     for (size_t k = 9; k < 16; k++) {
26366       for (uint32_t n = 1; n <= 4; n++) {
26367         for (uint32_t m = 1; m <= 3; m++) {
26368           GemmMicrokernelTester()
26369             .mr(3)
26370             .nr(4)
26371             .kr(2)
26372             .sr(4)
26373             .m(m)
26374             .n(n)
26375             .k(k)
26376             .iterations(1)
26377             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378         }
26379       }
26380     }
26381   }
26382 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)26383   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
26384     TEST_REQUIRES_X86_XOP;
26385     for (size_t k = 16; k <= 80; k += 8) {
26386       GemmMicrokernelTester()
26387         .mr(3)
26388         .nr(4)
26389         .kr(2)
26390         .sr(4)
26391         .m(3)
26392         .n(4)
26393         .k(k)
26394         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395     }
26396   }
26397 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)26398   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
26399     TEST_REQUIRES_X86_XOP;
26400     for (size_t k = 16; k <= 80; k += 8) {
26401       for (uint32_t n = 1; n <= 4; n++) {
26402         for (uint32_t m = 1; m <= 3; m++) {
26403           GemmMicrokernelTester()
26404             .mr(3)
26405             .nr(4)
26406             .kr(2)
26407             .sr(4)
26408             .m(m)
26409             .n(n)
26410             .k(k)
26411             .iterations(1)
26412             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413         }
26414       }
26415     }
26416   }
26417 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)26418   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
26419     TEST_REQUIRES_X86_XOP;
26420     for (uint32_t n = 5; n < 8; n++) {
26421       for (size_t k = 1; k <= 40; k += 9) {
26422         GemmMicrokernelTester()
26423           .mr(3)
26424           .nr(4)
26425           .kr(2)
26426           .sr(4)
26427           .m(3)
26428           .n(n)
26429           .k(k)
26430           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431       }
26432     }
26433   }
26434 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)26435   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
26436     TEST_REQUIRES_X86_XOP;
26437     for (uint32_t n = 5; n < 8; n++) {
26438       for (size_t k = 1; k <= 40; k += 9) {
26439         GemmMicrokernelTester()
26440           .mr(3)
26441           .nr(4)
26442           .kr(2)
26443           .sr(4)
26444           .m(3)
26445           .n(n)
26446           .k(k)
26447           .cn_stride(7)
26448           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449       }
26450     }
26451   }
26452 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)26453   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
26454     TEST_REQUIRES_X86_XOP;
26455     for (uint32_t n = 5; n < 8; n++) {
26456       for (size_t k = 1; k <= 40; k += 9) {
26457         for (uint32_t m = 1; m <= 3; m++) {
26458           GemmMicrokernelTester()
26459             .mr(3)
26460             .nr(4)
26461             .kr(2)
26462             .sr(4)
26463             .m(m)
26464             .n(n)
26465             .k(k)
26466             .iterations(1)
26467             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468         }
26469       }
26470     }
26471   }
26472 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)26473   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
26474     TEST_REQUIRES_X86_XOP;
26475     for (uint32_t n = 8; n <= 12; n += 4) {
26476       for (size_t k = 1; k <= 40; k += 9) {
26477         GemmMicrokernelTester()
26478           .mr(3)
26479           .nr(4)
26480           .kr(2)
26481           .sr(4)
26482           .m(3)
26483           .n(n)
26484           .k(k)
26485           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486       }
26487     }
26488   }
26489 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)26490   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26491     TEST_REQUIRES_X86_XOP;
26492     for (uint32_t n = 8; n <= 12; n += 4) {
26493       for (size_t k = 1; k <= 40; k += 9) {
26494         GemmMicrokernelTester()
26495           .mr(3)
26496           .nr(4)
26497           .kr(2)
26498           .sr(4)
26499           .m(3)
26500           .n(n)
26501           .k(k)
26502           .cn_stride(7)
26503           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504       }
26505     }
26506   }
26507 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)26508   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
26509     TEST_REQUIRES_X86_XOP;
26510     for (uint32_t n = 8; n <= 12; n += 4) {
26511       for (size_t k = 1; k <= 40; k += 9) {
26512         for (uint32_t m = 1; m <= 3; m++) {
26513           GemmMicrokernelTester()
26514             .mr(3)
26515             .nr(4)
26516             .kr(2)
26517             .sr(4)
26518             .m(m)
26519             .n(n)
26520             .k(k)
26521             .iterations(1)
26522             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523         }
26524       }
26525     }
26526   }
26527 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)26528   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
26529     TEST_REQUIRES_X86_XOP;
26530     for (size_t k = 1; k <= 40; k += 9) {
26531       GemmMicrokernelTester()
26532         .mr(3)
26533         .nr(4)
26534         .kr(2)
26535         .sr(4)
26536         .m(3)
26537         .n(4)
26538         .k(k)
26539         .ks(3)
26540         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541     }
26542   }
26543 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)26544   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
26545     TEST_REQUIRES_X86_XOP;
26546     for (size_t k = 1; k <= 40; k += 9) {
26547       for (uint32_t n = 1; n <= 4; n++) {
26548         for (uint32_t m = 1; m <= 3; m++) {
26549           GemmMicrokernelTester()
26550             .mr(3)
26551             .nr(4)
26552             .kr(2)
26553             .sr(4)
26554             .m(m)
26555             .n(n)
26556             .k(k)
26557             .ks(3)
26558             .iterations(1)
26559             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560         }
26561       }
26562     }
26563   }
26564 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)26565   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26566     TEST_REQUIRES_X86_XOP;
26567     for (uint32_t n = 5; n < 8; n++) {
26568       for (size_t k = 1; k <= 40; k += 9) {
26569         GemmMicrokernelTester()
26570           .mr(3)
26571           .nr(4)
26572           .kr(2)
26573           .sr(4)
26574           .m(3)
26575           .n(n)
26576           .k(k)
26577           .ks(3)
26578           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579       }
26580     }
26581   }
26582 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)26583   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26584     TEST_REQUIRES_X86_XOP;
26585     for (uint32_t n = 8; n <= 12; n += 4) {
26586       for (size_t k = 1; k <= 40; k += 9) {
26587         GemmMicrokernelTester()
26588           .mr(3)
26589           .nr(4)
26590           .kr(2)
26591           .sr(4)
26592           .m(3)
26593           .n(n)
26594           .k(k)
26595           .ks(3)
26596           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597       }
26598     }
26599   }
26600 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)26601   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
26602     TEST_REQUIRES_X86_XOP;
26603     for (size_t k = 1; k <= 40; k += 9) {
26604       for (uint32_t n = 1; n <= 4; n++) {
26605         for (uint32_t m = 1; m <= 3; m++) {
26606           GemmMicrokernelTester()
26607             .mr(3)
26608             .nr(4)
26609             .kr(2)
26610             .sr(4)
26611             .m(m)
26612             .n(n)
26613             .k(k)
26614             .cm_stride(7)
26615             .iterations(1)
26616             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617         }
26618       }
26619     }
26620   }
26621 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)26622   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
26623     TEST_REQUIRES_X86_XOP;
26624     for (size_t k = 1; k <= 40; k += 9) {
26625       GemmMicrokernelTester()
26626         .mr(3)
26627         .nr(4)
26628         .kr(2)
26629         .sr(4)
26630         .m(3)
26631         .n(4)
26632         .k(k)
26633         .ks(3)
26634         .a_offset(127)
26635         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636     }
26637   }
26638 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)26639   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
26640     TEST_REQUIRES_X86_XOP;
26641     for (size_t k = 1; k <= 40; k += 9) {
26642       for (uint32_t mz = 0; mz < 3; mz++) {
26643         GemmMicrokernelTester()
26644           .mr(3)
26645           .nr(4)
26646           .kr(2)
26647           .sr(4)
26648           .m(3)
26649           .n(4)
26650           .k(k)
26651           .ks(3)
26652           .a_offset(127)
26653           .zero_index(mz)
26654           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655       }
26656     }
26657   }
26658 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)26659   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
26660     TEST_REQUIRES_X86_XOP;
26661     GemmMicrokernelTester()
26662       .mr(3)
26663       .nr(4)
26664       .kr(2)
26665       .sr(4)
26666       .m(3)
26667       .n(4)
26668       .k(8)
26669       .qmin(128)
26670       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671   }
26672 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)26673   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
26674     TEST_REQUIRES_X86_XOP;
26675     GemmMicrokernelTester()
26676       .mr(3)
26677       .nr(4)
26678       .kr(2)
26679       .sr(4)
26680       .m(3)
26681       .n(4)
26682       .k(8)
26683       .qmax(128)
26684       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685   }
26686 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)26687   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
26688     TEST_REQUIRES_X86_XOP;
26689     GemmMicrokernelTester()
26690       .mr(3)
26691       .nr(4)
26692       .kr(2)
26693       .sr(4)
26694       .m(3)
26695       .n(4)
26696       .k(8)
26697       .cm_stride(7)
26698       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699   }
26700 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701 
26702 
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)26704   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
26705     TEST_REQUIRES_X86_SSE2;
26706     GemmMicrokernelTester()
26707       .mr(1)
26708       .nr(4)
26709       .kr(2)
26710       .sr(4)
26711       .m(1)
26712       .n(4)
26713       .k(8)
26714       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26715   }
26716 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)26717   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
26718     TEST_REQUIRES_X86_SSE2;
26719     GemmMicrokernelTester()
26720       .mr(1)
26721       .nr(4)
26722       .kr(2)
26723       .sr(4)
26724       .m(1)
26725       .n(4)
26726       .k(8)
26727       .cn_stride(7)
26728       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26729   }
26730 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)26731   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
26732     TEST_REQUIRES_X86_SSE2;
26733     for (uint32_t n = 1; n <= 4; n++) {
26734       for (uint32_t m = 1; m <= 1; m++) {
26735         GemmMicrokernelTester()
26736           .mr(1)
26737           .nr(4)
26738           .kr(2)
26739           .sr(4)
26740           .m(m)
26741           .n(n)
26742           .k(8)
26743           .iterations(1)
26744           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26745       }
26746     }
26747   }
26748 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)26749   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
26750     TEST_REQUIRES_X86_SSE2;
26751     for (uint32_t m = 1; m <= 1; m++) {
26752       GemmMicrokernelTester()
26753         .mr(1)
26754         .nr(4)
26755         .kr(2)
26756         .sr(4)
26757         .m(m)
26758         .n(4)
26759         .k(8)
26760         .iterations(1)
26761         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26762     }
26763   }
26764 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)26765   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
26766     TEST_REQUIRES_X86_SSE2;
26767     for (uint32_t n = 1; n <= 4; n++) {
26768       GemmMicrokernelTester()
26769         .mr(1)
26770         .nr(4)
26771         .kr(2)
26772         .sr(4)
26773         .m(1)
26774         .n(n)
26775         .k(8)
26776         .iterations(1)
26777         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26778     }
26779   }
26780 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)26781   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
26782     TEST_REQUIRES_X86_SSE2;
26783     for (size_t k = 1; k < 8; k++) {
26784       GemmMicrokernelTester()
26785         .mr(1)
26786         .nr(4)
26787         .kr(2)
26788         .sr(4)
26789         .m(1)
26790         .n(4)
26791         .k(k)
26792         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26793     }
26794   }
26795 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)26796   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
26797     TEST_REQUIRES_X86_SSE2;
26798     for (size_t k = 1; k < 8; k++) {
26799       for (uint32_t n = 1; n <= 4; n++) {
26800         for (uint32_t m = 1; m <= 1; m++) {
26801           GemmMicrokernelTester()
26802             .mr(1)
26803             .nr(4)
26804             .kr(2)
26805             .sr(4)
26806             .m(m)
26807             .n(n)
26808             .k(k)
26809             .iterations(1)
26810             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26811         }
26812       }
26813     }
26814   }
26815 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)26816   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
26817     TEST_REQUIRES_X86_SSE2;
26818     for (size_t k = 9; k < 16; k++) {
26819       GemmMicrokernelTester()
26820         .mr(1)
26821         .nr(4)
26822         .kr(2)
26823         .sr(4)
26824         .m(1)
26825         .n(4)
26826         .k(k)
26827         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26828     }
26829   }
26830 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)26831   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
26832     TEST_REQUIRES_X86_SSE2;
26833     for (size_t k = 9; k < 16; k++) {
26834       for (uint32_t n = 1; n <= 4; n++) {
26835         for (uint32_t m = 1; m <= 1; m++) {
26836           GemmMicrokernelTester()
26837             .mr(1)
26838             .nr(4)
26839             .kr(2)
26840             .sr(4)
26841             .m(m)
26842             .n(n)
26843             .k(k)
26844             .iterations(1)
26845             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26846         }
26847       }
26848     }
26849   }
26850 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)26851   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
26852     TEST_REQUIRES_X86_SSE2;
26853     for (size_t k = 16; k <= 80; k += 8) {
26854       GemmMicrokernelTester()
26855         .mr(1)
26856         .nr(4)
26857         .kr(2)
26858         .sr(4)
26859         .m(1)
26860         .n(4)
26861         .k(k)
26862         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26863     }
26864   }
26865 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)26866   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
26867     TEST_REQUIRES_X86_SSE2;
26868     for (size_t k = 16; k <= 80; k += 8) {
26869       for (uint32_t n = 1; n <= 4; n++) {
26870         for (uint32_t m = 1; m <= 1; m++) {
26871           GemmMicrokernelTester()
26872             .mr(1)
26873             .nr(4)
26874             .kr(2)
26875             .sr(4)
26876             .m(m)
26877             .n(n)
26878             .k(k)
26879             .iterations(1)
26880             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26881         }
26882       }
26883     }
26884   }
26885 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)26886   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
26887     TEST_REQUIRES_X86_SSE2;
26888     for (uint32_t n = 5; n < 8; n++) {
26889       for (size_t k = 1; k <= 40; k += 9) {
26890         GemmMicrokernelTester()
26891           .mr(1)
26892           .nr(4)
26893           .kr(2)
26894           .sr(4)
26895           .m(1)
26896           .n(n)
26897           .k(k)
26898           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26899       }
26900     }
26901   }
26902 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)26903   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
26904     TEST_REQUIRES_X86_SSE2;
26905     for (uint32_t n = 5; n < 8; n++) {
26906       for (size_t k = 1; k <= 40; k += 9) {
26907         GemmMicrokernelTester()
26908           .mr(1)
26909           .nr(4)
26910           .kr(2)
26911           .sr(4)
26912           .m(1)
26913           .n(n)
26914           .k(k)
26915           .cn_stride(7)
26916           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26917       }
26918     }
26919   }
26920 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)26921   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
26922     TEST_REQUIRES_X86_SSE2;
26923     for (uint32_t n = 5; n < 8; n++) {
26924       for (size_t k = 1; k <= 40; k += 9) {
26925         for (uint32_t m = 1; m <= 1; m++) {
26926           GemmMicrokernelTester()
26927             .mr(1)
26928             .nr(4)
26929             .kr(2)
26930             .sr(4)
26931             .m(m)
26932             .n(n)
26933             .k(k)
26934             .iterations(1)
26935             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26936         }
26937       }
26938     }
26939   }
26940 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)26941   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
26942     TEST_REQUIRES_X86_SSE2;
26943     for (uint32_t n = 8; n <= 12; n += 4) {
26944       for (size_t k = 1; k <= 40; k += 9) {
26945         GemmMicrokernelTester()
26946           .mr(1)
26947           .nr(4)
26948           .kr(2)
26949           .sr(4)
26950           .m(1)
26951           .n(n)
26952           .k(k)
26953           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26954       }
26955     }
26956   }
26957 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)26958   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
26959     TEST_REQUIRES_X86_SSE2;
26960     for (uint32_t n = 8; n <= 12; n += 4) {
26961       for (size_t k = 1; k <= 40; k += 9) {
26962         GemmMicrokernelTester()
26963           .mr(1)
26964           .nr(4)
26965           .kr(2)
26966           .sr(4)
26967           .m(1)
26968           .n(n)
26969           .k(k)
26970           .cn_stride(7)
26971           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26972       }
26973     }
26974   }
26975 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)26976   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
26977     TEST_REQUIRES_X86_SSE2;
26978     for (uint32_t n = 8; n <= 12; n += 4) {
26979       for (size_t k = 1; k <= 40; k += 9) {
26980         for (uint32_t m = 1; m <= 1; m++) {
26981           GemmMicrokernelTester()
26982             .mr(1)
26983             .nr(4)
26984             .kr(2)
26985             .sr(4)
26986             .m(m)
26987             .n(n)
26988             .k(k)
26989             .iterations(1)
26990             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26991         }
26992       }
26993     }
26994   }
26995 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel)26996   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel) {
26997     TEST_REQUIRES_X86_SSE2;
26998     for (size_t k = 1; k <= 40; k += 9) {
26999       GemmMicrokernelTester()
27000         .mr(1)
27001         .nr(4)
27002         .kr(2)
27003         .sr(4)
27004         .m(1)
27005         .n(4)
27006         .k(k)
27007         .ks(3)
27008         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27009     }
27010   }
27011 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel_subtile)27012   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel_subtile) {
27013     TEST_REQUIRES_X86_SSE2;
27014     for (size_t k = 1; k <= 40; k += 9) {
27015       for (uint32_t n = 1; n <= 4; n++) {
27016         for (uint32_t m = 1; m <= 1; m++) {
27017           GemmMicrokernelTester()
27018             .mr(1)
27019             .nr(4)
27020             .kr(2)
27021             .sr(4)
27022             .m(m)
27023             .n(n)
27024             .k(k)
27025             .ks(3)
27026             .iterations(1)
27027             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27028         }
27029       }
27030     }
27031   }
27032 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27033   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27034     TEST_REQUIRES_X86_SSE2;
27035     for (uint32_t n = 5; n < 8; n++) {
27036       for (size_t k = 1; k <= 40; k += 9) {
27037         GemmMicrokernelTester()
27038           .mr(1)
27039           .nr(4)
27040           .kr(2)
27041           .sr(4)
27042           .m(1)
27043           .n(n)
27044           .k(k)
27045           .ks(3)
27046           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27047       }
27048     }
27049   }
27050 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_small_kernel)27051   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27052     TEST_REQUIRES_X86_SSE2;
27053     for (uint32_t n = 8; n <= 12; n += 4) {
27054       for (size_t k = 1; k <= 40; k += 9) {
27055         GemmMicrokernelTester()
27056           .mr(1)
27057           .nr(4)
27058           .kr(2)
27059           .sr(4)
27060           .m(1)
27061           .n(n)
27062           .k(k)
27063           .ks(3)
27064           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27065       }
27066     }
27067   }
27068 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)27069   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
27070     TEST_REQUIRES_X86_SSE2;
27071     for (size_t k = 1; k <= 40; k += 9) {
27072       for (uint32_t n = 1; n <= 4; n++) {
27073         for (uint32_t m = 1; m <= 1; m++) {
27074           GemmMicrokernelTester()
27075             .mr(1)
27076             .nr(4)
27077             .kr(2)
27078             .sr(4)
27079             .m(m)
27080             .n(n)
27081             .k(k)
27082             .cm_stride(7)
27083             .iterations(1)
27084             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27085         }
27086       }
27087     }
27088   }
27089 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,a_offset)27090   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, a_offset) {
27091     TEST_REQUIRES_X86_SSE2;
27092     for (size_t k = 1; k <= 40; k += 9) {
27093       GemmMicrokernelTester()
27094         .mr(1)
27095         .nr(4)
27096         .kr(2)
27097         .sr(4)
27098         .m(1)
27099         .n(4)
27100         .k(k)
27101         .ks(3)
27102         .a_offset(43)
27103         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27104     }
27105   }
27106 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,zero)27107   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, zero) {
27108     TEST_REQUIRES_X86_SSE2;
27109     for (size_t k = 1; k <= 40; k += 9) {
27110       for (uint32_t mz = 0; mz < 1; mz++) {
27111         GemmMicrokernelTester()
27112           .mr(1)
27113           .nr(4)
27114           .kr(2)
27115           .sr(4)
27116           .m(1)
27117           .n(4)
27118           .k(k)
27119           .ks(3)
27120           .a_offset(43)
27121           .zero_index(mz)
27122           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27123       }
27124     }
27125   }
27126 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)27127   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
27128     TEST_REQUIRES_X86_SSE2;
27129     GemmMicrokernelTester()
27130       .mr(1)
27131       .nr(4)
27132       .kr(2)
27133       .sr(4)
27134       .m(1)
27135       .n(4)
27136       .k(8)
27137       .qmin(128)
27138       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27139   }
27140 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)27141   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
27142     TEST_REQUIRES_X86_SSE2;
27143     GemmMicrokernelTester()
27144       .mr(1)
27145       .nr(4)
27146       .kr(2)
27147       .sr(4)
27148       .m(1)
27149       .n(4)
27150       .k(8)
27151       .qmax(128)
27152       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27153   }
27154 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)27155   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
27156     TEST_REQUIRES_X86_SSE2;
27157     GemmMicrokernelTester()
27158       .mr(1)
27159       .nr(4)
27160       .kr(2)
27161       .sr(4)
27162       .m(1)
27163       .n(4)
27164       .k(8)
27165       .cm_stride(7)
27166       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27167   }
27168 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169 
27170 
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)27172   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
27173     TEST_REQUIRES_X86_SSE2;
27174     GemmMicrokernelTester()
27175       .mr(4)
27176       .nr(4)
27177       .kr(2)
27178       .sr(4)
27179       .m(4)
27180       .n(4)
27181       .k(8)
27182       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27183   }
27184 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)27185   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
27186     TEST_REQUIRES_X86_SSE2;
27187     GemmMicrokernelTester()
27188       .mr(4)
27189       .nr(4)
27190       .kr(2)
27191       .sr(4)
27192       .m(4)
27193       .n(4)
27194       .k(8)
27195       .cn_stride(7)
27196       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27197   }
27198 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)27199   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
27200     TEST_REQUIRES_X86_SSE2;
27201     for (uint32_t n = 1; n <= 4; n++) {
27202       for (uint32_t m = 1; m <= 4; m++) {
27203         GemmMicrokernelTester()
27204           .mr(4)
27205           .nr(4)
27206           .kr(2)
27207           .sr(4)
27208           .m(m)
27209           .n(n)
27210           .k(8)
27211           .iterations(1)
27212           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27213       }
27214     }
27215   }
27216 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)27217   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
27218     TEST_REQUIRES_X86_SSE2;
27219     for (uint32_t m = 1; m <= 4; m++) {
27220       GemmMicrokernelTester()
27221         .mr(4)
27222         .nr(4)
27223         .kr(2)
27224         .sr(4)
27225         .m(m)
27226         .n(4)
27227         .k(8)
27228         .iterations(1)
27229         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27230     }
27231   }
27232 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27233   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27234     TEST_REQUIRES_X86_SSE2;
27235     for (uint32_t n = 1; n <= 4; n++) {
27236       GemmMicrokernelTester()
27237         .mr(4)
27238         .nr(4)
27239         .kr(2)
27240         .sr(4)
27241         .m(4)
27242         .n(n)
27243         .k(8)
27244         .iterations(1)
27245         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27246     }
27247   }
27248 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)27249   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
27250     TEST_REQUIRES_X86_SSE2;
27251     for (size_t k = 1; k < 8; k++) {
27252       GemmMicrokernelTester()
27253         .mr(4)
27254         .nr(4)
27255         .kr(2)
27256         .sr(4)
27257         .m(4)
27258         .n(4)
27259         .k(k)
27260         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27261     }
27262   }
27263 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)27264   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27265     TEST_REQUIRES_X86_SSE2;
27266     for (size_t k = 1; k < 8; k++) {
27267       for (uint32_t n = 1; n <= 4; n++) {
27268         for (uint32_t m = 1; m <= 4; m++) {
27269           GemmMicrokernelTester()
27270             .mr(4)
27271             .nr(4)
27272             .kr(2)
27273             .sr(4)
27274             .m(m)
27275             .n(n)
27276             .k(k)
27277             .iterations(1)
27278             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27279         }
27280       }
27281     }
27282   }
27283 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)27284   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
27285     TEST_REQUIRES_X86_SSE2;
27286     for (size_t k = 9; k < 16; k++) {
27287       GemmMicrokernelTester()
27288         .mr(4)
27289         .nr(4)
27290         .kr(2)
27291         .sr(4)
27292         .m(4)
27293         .n(4)
27294         .k(k)
27295         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27296     }
27297   }
27298 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)27299   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
27300     TEST_REQUIRES_X86_SSE2;
27301     for (size_t k = 9; k < 16; k++) {
27302       for (uint32_t n = 1; n <= 4; n++) {
27303         for (uint32_t m = 1; m <= 4; m++) {
27304           GemmMicrokernelTester()
27305             .mr(4)
27306             .nr(4)
27307             .kr(2)
27308             .sr(4)
27309             .m(m)
27310             .n(n)
27311             .k(k)
27312             .iterations(1)
27313             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27314         }
27315       }
27316     }
27317   }
27318 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)27319   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
27320     TEST_REQUIRES_X86_SSE2;
27321     for (size_t k = 16; k <= 80; k += 8) {
27322       GemmMicrokernelTester()
27323         .mr(4)
27324         .nr(4)
27325         .kr(2)
27326         .sr(4)
27327         .m(4)
27328         .n(4)
27329         .k(k)
27330         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27331     }
27332   }
27333 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)27334   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
27335     TEST_REQUIRES_X86_SSE2;
27336     for (size_t k = 16; k <= 80; k += 8) {
27337       for (uint32_t n = 1; n <= 4; n++) {
27338         for (uint32_t m = 1; m <= 4; m++) {
27339           GemmMicrokernelTester()
27340             .mr(4)
27341             .nr(4)
27342             .kr(2)
27343             .sr(4)
27344             .m(m)
27345             .n(n)
27346             .k(k)
27347             .iterations(1)
27348             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27349         }
27350       }
27351     }
27352   }
27353 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)27354   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
27355     TEST_REQUIRES_X86_SSE2;
27356     for (uint32_t n = 5; n < 8; n++) {
27357       for (size_t k = 1; k <= 40; k += 9) {
27358         GemmMicrokernelTester()
27359           .mr(4)
27360           .nr(4)
27361           .kr(2)
27362           .sr(4)
27363           .m(4)
27364           .n(n)
27365           .k(k)
27366           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27367       }
27368     }
27369   }
27370 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)27371   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
27372     TEST_REQUIRES_X86_SSE2;
27373     for (uint32_t n = 5; n < 8; n++) {
27374       for (size_t k = 1; k <= 40; k += 9) {
27375         GemmMicrokernelTester()
27376           .mr(4)
27377           .nr(4)
27378           .kr(2)
27379           .sr(4)
27380           .m(4)
27381           .n(n)
27382           .k(k)
27383           .cn_stride(7)
27384           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27385       }
27386     }
27387   }
27388 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)27389   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
27390     TEST_REQUIRES_X86_SSE2;
27391     for (uint32_t n = 5; n < 8; n++) {
27392       for (size_t k = 1; k <= 40; k += 9) {
27393         for (uint32_t m = 1; m <= 4; m++) {
27394           GemmMicrokernelTester()
27395             .mr(4)
27396             .nr(4)
27397             .kr(2)
27398             .sr(4)
27399             .m(m)
27400             .n(n)
27401             .k(k)
27402             .iterations(1)
27403             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27404         }
27405       }
27406     }
27407   }
27408 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)27409   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
27410     TEST_REQUIRES_X86_SSE2;
27411     for (uint32_t n = 8; n <= 12; n += 4) {
27412       for (size_t k = 1; k <= 40; k += 9) {
27413         GemmMicrokernelTester()
27414           .mr(4)
27415           .nr(4)
27416           .kr(2)
27417           .sr(4)
27418           .m(4)
27419           .n(n)
27420           .k(k)
27421           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27422       }
27423     }
27424   }
27425 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)27426   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
27427     TEST_REQUIRES_X86_SSE2;
27428     for (uint32_t n = 8; n <= 12; n += 4) {
27429       for (size_t k = 1; k <= 40; k += 9) {
27430         GemmMicrokernelTester()
27431           .mr(4)
27432           .nr(4)
27433           .kr(2)
27434           .sr(4)
27435           .m(4)
27436           .n(n)
27437           .k(k)
27438           .cn_stride(7)
27439           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27440       }
27441     }
27442   }
27443 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)27444   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
27445     TEST_REQUIRES_X86_SSE2;
27446     for (uint32_t n = 8; n <= 12; n += 4) {
27447       for (size_t k = 1; k <= 40; k += 9) {
27448         for (uint32_t m = 1; m <= 4; m++) {
27449           GemmMicrokernelTester()
27450             .mr(4)
27451             .nr(4)
27452             .kr(2)
27453             .sr(4)
27454             .m(m)
27455             .n(n)
27456             .k(k)
27457             .iterations(1)
27458             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27459         }
27460       }
27461     }
27462   }
27463 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel)27464   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel) {
27465     TEST_REQUIRES_X86_SSE2;
27466     for (size_t k = 1; k <= 40; k += 9) {
27467       GemmMicrokernelTester()
27468         .mr(4)
27469         .nr(4)
27470         .kr(2)
27471         .sr(4)
27472         .m(4)
27473         .n(4)
27474         .k(k)
27475         .ks(3)
27476         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27477     }
27478   }
27479 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel_subtile)27480   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel_subtile) {
27481     TEST_REQUIRES_X86_SSE2;
27482     for (size_t k = 1; k <= 40; k += 9) {
27483       for (uint32_t n = 1; n <= 4; n++) {
27484         for (uint32_t m = 1; m <= 4; m++) {
27485           GemmMicrokernelTester()
27486             .mr(4)
27487             .nr(4)
27488             .kr(2)
27489             .sr(4)
27490             .m(m)
27491             .n(n)
27492             .k(k)
27493             .ks(3)
27494             .iterations(1)
27495             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27496         }
27497       }
27498     }
27499   }
27500 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27501   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27502     TEST_REQUIRES_X86_SSE2;
27503     for (uint32_t n = 5; n < 8; n++) {
27504       for (size_t k = 1; k <= 40; k += 9) {
27505         GemmMicrokernelTester()
27506           .mr(4)
27507           .nr(4)
27508           .kr(2)
27509           .sr(4)
27510           .m(4)
27511           .n(n)
27512           .k(k)
27513           .ks(3)
27514           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27515       }
27516     }
27517   }
27518 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_small_kernel)27519   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27520     TEST_REQUIRES_X86_SSE2;
27521     for (uint32_t n = 8; n <= 12; n += 4) {
27522       for (size_t k = 1; k <= 40; k += 9) {
27523         GemmMicrokernelTester()
27524           .mr(4)
27525           .nr(4)
27526           .kr(2)
27527           .sr(4)
27528           .m(4)
27529           .n(n)
27530           .k(k)
27531           .ks(3)
27532           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27533       }
27534     }
27535   }
27536 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)27537   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
27538     TEST_REQUIRES_X86_SSE2;
27539     for (size_t k = 1; k <= 40; k += 9) {
27540       for (uint32_t n = 1; n <= 4; n++) {
27541         for (uint32_t m = 1; m <= 4; m++) {
27542           GemmMicrokernelTester()
27543             .mr(4)
27544             .nr(4)
27545             .kr(2)
27546             .sr(4)
27547             .m(m)
27548             .n(n)
27549             .k(k)
27550             .cm_stride(7)
27551             .iterations(1)
27552             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27553         }
27554       }
27555     }
27556   }
27557 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,a_offset)27558   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, a_offset) {
27559     TEST_REQUIRES_X86_SSE2;
27560     for (size_t k = 1; k <= 40; k += 9) {
27561       GemmMicrokernelTester()
27562         .mr(4)
27563         .nr(4)
27564         .kr(2)
27565         .sr(4)
27566         .m(4)
27567         .n(4)
27568         .k(k)
27569         .ks(3)
27570         .a_offset(163)
27571         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27572     }
27573   }
27574 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,zero)27575   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, zero) {
27576     TEST_REQUIRES_X86_SSE2;
27577     for (size_t k = 1; k <= 40; k += 9) {
27578       for (uint32_t mz = 0; mz < 4; mz++) {
27579         GemmMicrokernelTester()
27580           .mr(4)
27581           .nr(4)
27582           .kr(2)
27583           .sr(4)
27584           .m(4)
27585           .n(4)
27586           .k(k)
27587           .ks(3)
27588           .a_offset(163)
27589           .zero_index(mz)
27590           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27591       }
27592     }
27593   }
27594 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)27595   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
27596     TEST_REQUIRES_X86_SSE2;
27597     GemmMicrokernelTester()
27598       .mr(4)
27599       .nr(4)
27600       .kr(2)
27601       .sr(4)
27602       .m(4)
27603       .n(4)
27604       .k(8)
27605       .qmin(128)
27606       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27607   }
27608 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)27609   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
27610     TEST_REQUIRES_X86_SSE2;
27611     GemmMicrokernelTester()
27612       .mr(4)
27613       .nr(4)
27614       .kr(2)
27615       .sr(4)
27616       .m(4)
27617       .n(4)
27618       .k(8)
27619       .qmax(128)
27620       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27621   }
27622 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)27623   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
27624     TEST_REQUIRES_X86_SSE2;
27625     GemmMicrokernelTester()
27626       .mr(4)
27627       .nr(4)
27628       .kr(2)
27629       .sr(4)
27630       .m(4)
27631       .n(4)
27632       .k(8)
27633       .cm_stride(7)
27634       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27635   }
27636 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637 
27638 
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)27640   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
27641     TEST_REQUIRES_X86_SSE41;
27642     GemmMicrokernelTester()
27643       .mr(4)
27644       .nr(4)
27645       .kr(2)
27646       .sr(4)
27647       .m(4)
27648       .n(4)
27649       .k(8)
27650       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27651   }
27652 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)27653   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
27654     TEST_REQUIRES_X86_SSE41;
27655     GemmMicrokernelTester()
27656       .mr(4)
27657       .nr(4)
27658       .kr(2)
27659       .sr(4)
27660       .m(4)
27661       .n(4)
27662       .k(8)
27663       .cn_stride(7)
27664       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27665   }
27666 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)27667   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
27668     TEST_REQUIRES_X86_SSE41;
27669     for (uint32_t n = 1; n <= 4; n++) {
27670       for (uint32_t m = 1; m <= 4; m++) {
27671         GemmMicrokernelTester()
27672           .mr(4)
27673           .nr(4)
27674           .kr(2)
27675           .sr(4)
27676           .m(m)
27677           .n(n)
27678           .k(8)
27679           .iterations(1)
27680           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681       }
27682     }
27683   }
27684 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)27685   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
27686     TEST_REQUIRES_X86_SSE41;
27687     for (uint32_t m = 1; m <= 4; m++) {
27688       GemmMicrokernelTester()
27689         .mr(4)
27690         .nr(4)
27691         .kr(2)
27692         .sr(4)
27693         .m(m)
27694         .n(4)
27695         .k(8)
27696         .iterations(1)
27697         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27698     }
27699   }
27700 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)27701   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
27702     TEST_REQUIRES_X86_SSE41;
27703     for (uint32_t n = 1; n <= 4; n++) {
27704       GemmMicrokernelTester()
27705         .mr(4)
27706         .nr(4)
27707         .kr(2)
27708         .sr(4)
27709         .m(4)
27710         .n(n)
27711         .k(8)
27712         .iterations(1)
27713         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27714     }
27715   }
27716 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)27717   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
27718     TEST_REQUIRES_X86_SSE41;
27719     for (size_t k = 1; k < 8; k++) {
27720       GemmMicrokernelTester()
27721         .mr(4)
27722         .nr(4)
27723         .kr(2)
27724         .sr(4)
27725         .m(4)
27726         .n(4)
27727         .k(k)
27728         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27729     }
27730   }
27731 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)27732   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
27733     TEST_REQUIRES_X86_SSE41;
27734     for (size_t k = 1; k < 8; k++) {
27735       for (uint32_t n = 1; n <= 4; n++) {
27736         for (uint32_t m = 1; m <= 4; m++) {
27737           GemmMicrokernelTester()
27738             .mr(4)
27739             .nr(4)
27740             .kr(2)
27741             .sr(4)
27742             .m(m)
27743             .n(n)
27744             .k(k)
27745             .iterations(1)
27746             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27747         }
27748       }
27749     }
27750   }
27751 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)27752   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
27753     TEST_REQUIRES_X86_SSE41;
27754     for (size_t k = 9; k < 16; k++) {
27755       GemmMicrokernelTester()
27756         .mr(4)
27757         .nr(4)
27758         .kr(2)
27759         .sr(4)
27760         .m(4)
27761         .n(4)
27762         .k(k)
27763         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27764     }
27765   }
27766 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)27767   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
27768     TEST_REQUIRES_X86_SSE41;
27769     for (size_t k = 9; k < 16; k++) {
27770       for (uint32_t n = 1; n <= 4; n++) {
27771         for (uint32_t m = 1; m <= 4; m++) {
27772           GemmMicrokernelTester()
27773             .mr(4)
27774             .nr(4)
27775             .kr(2)
27776             .sr(4)
27777             .m(m)
27778             .n(n)
27779             .k(k)
27780             .iterations(1)
27781             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27782         }
27783       }
27784     }
27785   }
27786 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)27787   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
27788     TEST_REQUIRES_X86_SSE41;
27789     for (size_t k = 16; k <= 80; k += 8) {
27790       GemmMicrokernelTester()
27791         .mr(4)
27792         .nr(4)
27793         .kr(2)
27794         .sr(4)
27795         .m(4)
27796         .n(4)
27797         .k(k)
27798         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27799     }
27800   }
27801 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)27802   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
27803     TEST_REQUIRES_X86_SSE41;
27804     for (size_t k = 16; k <= 80; k += 8) {
27805       for (uint32_t n = 1; n <= 4; n++) {
27806         for (uint32_t m = 1; m <= 4; m++) {
27807           GemmMicrokernelTester()
27808             .mr(4)
27809             .nr(4)
27810             .kr(2)
27811             .sr(4)
27812             .m(m)
27813             .n(n)
27814             .k(k)
27815             .iterations(1)
27816             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27817         }
27818       }
27819     }
27820   }
27821 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)27822   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
27823     TEST_REQUIRES_X86_SSE41;
27824     for (uint32_t n = 5; n < 8; n++) {
27825       for (size_t k = 1; k <= 40; k += 9) {
27826         GemmMicrokernelTester()
27827           .mr(4)
27828           .nr(4)
27829           .kr(2)
27830           .sr(4)
27831           .m(4)
27832           .n(n)
27833           .k(k)
27834           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27835       }
27836     }
27837   }
27838 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)27839   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
27840     TEST_REQUIRES_X86_SSE41;
27841     for (uint32_t n = 5; n < 8; n++) {
27842       for (size_t k = 1; k <= 40; k += 9) {
27843         GemmMicrokernelTester()
27844           .mr(4)
27845           .nr(4)
27846           .kr(2)
27847           .sr(4)
27848           .m(4)
27849           .n(n)
27850           .k(k)
27851           .cn_stride(7)
27852           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27853       }
27854     }
27855   }
27856 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)27857   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
27858     TEST_REQUIRES_X86_SSE41;
27859     for (uint32_t n = 5; n < 8; n++) {
27860       for (size_t k = 1; k <= 40; k += 9) {
27861         for (uint32_t m = 1; m <= 4; m++) {
27862           GemmMicrokernelTester()
27863             .mr(4)
27864             .nr(4)
27865             .kr(2)
27866             .sr(4)
27867             .m(m)
27868             .n(n)
27869             .k(k)
27870             .iterations(1)
27871             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27872         }
27873       }
27874     }
27875   }
27876 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)27877   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
27878     TEST_REQUIRES_X86_SSE41;
27879     for (uint32_t n = 8; n <= 12; n += 4) {
27880       for (size_t k = 1; k <= 40; k += 9) {
27881         GemmMicrokernelTester()
27882           .mr(4)
27883           .nr(4)
27884           .kr(2)
27885           .sr(4)
27886           .m(4)
27887           .n(n)
27888           .k(k)
27889           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27890       }
27891     }
27892   }
27893 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)27894   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
27895     TEST_REQUIRES_X86_SSE41;
27896     for (uint32_t n = 8; n <= 12; n += 4) {
27897       for (size_t k = 1; k <= 40; k += 9) {
27898         GemmMicrokernelTester()
27899           .mr(4)
27900           .nr(4)
27901           .kr(2)
27902           .sr(4)
27903           .m(4)
27904           .n(n)
27905           .k(k)
27906           .cn_stride(7)
27907           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27908       }
27909     }
27910   }
27911 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)27912   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
27913     TEST_REQUIRES_X86_SSE41;
27914     for (uint32_t n = 8; n <= 12; n += 4) {
27915       for (size_t k = 1; k <= 40; k += 9) {
27916         for (uint32_t m = 1; m <= 4; m++) {
27917           GemmMicrokernelTester()
27918             .mr(4)
27919             .nr(4)
27920             .kr(2)
27921             .sr(4)
27922             .m(m)
27923             .n(n)
27924             .k(k)
27925             .iterations(1)
27926             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27927         }
27928       }
27929     }
27930   }
27931 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)27932   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
27933     TEST_REQUIRES_X86_SSE41;
27934     for (size_t k = 1; k <= 40; k += 9) {
27935       GemmMicrokernelTester()
27936         .mr(4)
27937         .nr(4)
27938         .kr(2)
27939         .sr(4)
27940         .m(4)
27941         .n(4)
27942         .k(k)
27943         .ks(3)
27944         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27945     }
27946   }
27947 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)27948   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
27949     TEST_REQUIRES_X86_SSE41;
27950     for (size_t k = 1; k <= 40; k += 9) {
27951       for (uint32_t n = 1; n <= 4; n++) {
27952         for (uint32_t m = 1; m <= 4; m++) {
27953           GemmMicrokernelTester()
27954             .mr(4)
27955             .nr(4)
27956             .kr(2)
27957             .sr(4)
27958             .m(m)
27959             .n(n)
27960             .k(k)
27961             .ks(3)
27962             .iterations(1)
27963             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27964         }
27965       }
27966     }
27967   }
27968 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)27969   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
27970     TEST_REQUIRES_X86_SSE41;
27971     for (uint32_t n = 5; n < 8; n++) {
27972       for (size_t k = 1; k <= 40; k += 9) {
27973         GemmMicrokernelTester()
27974           .mr(4)
27975           .nr(4)
27976           .kr(2)
27977           .sr(4)
27978           .m(4)
27979           .n(n)
27980           .k(k)
27981           .ks(3)
27982           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27983       }
27984     }
27985   }
27986 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)27987   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
27988     TEST_REQUIRES_X86_SSE41;
27989     for (uint32_t n = 8; n <= 12; n += 4) {
27990       for (size_t k = 1; k <= 40; k += 9) {
27991         GemmMicrokernelTester()
27992           .mr(4)
27993           .nr(4)
27994           .kr(2)
27995           .sr(4)
27996           .m(4)
27997           .n(n)
27998           .k(k)
27999           .ks(3)
28000           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28001       }
28002     }
28003   }
28004 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)28005   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
28006     TEST_REQUIRES_X86_SSE41;
28007     for (size_t k = 1; k <= 40; k += 9) {
28008       for (uint32_t n = 1; n <= 4; n++) {
28009         for (uint32_t m = 1; m <= 4; m++) {
28010           GemmMicrokernelTester()
28011             .mr(4)
28012             .nr(4)
28013             .kr(2)
28014             .sr(4)
28015             .m(m)
28016             .n(n)
28017             .k(k)
28018             .cm_stride(7)
28019             .iterations(1)
28020             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28021         }
28022       }
28023     }
28024   }
28025 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)28026   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
28027     TEST_REQUIRES_X86_SSE41;
28028     for (size_t k = 1; k <= 40; k += 9) {
28029       GemmMicrokernelTester()
28030         .mr(4)
28031         .nr(4)
28032         .kr(2)
28033         .sr(4)
28034         .m(4)
28035         .n(4)
28036         .k(k)
28037         .ks(3)
28038         .a_offset(163)
28039         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28040     }
28041   }
28042 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)28043   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
28044     TEST_REQUIRES_X86_SSE41;
28045     for (size_t k = 1; k <= 40; k += 9) {
28046       for (uint32_t mz = 0; mz < 4; mz++) {
28047         GemmMicrokernelTester()
28048           .mr(4)
28049           .nr(4)
28050           .kr(2)
28051           .sr(4)
28052           .m(4)
28053           .n(4)
28054           .k(k)
28055           .ks(3)
28056           .a_offset(163)
28057           .zero_index(mz)
28058           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28059       }
28060     }
28061   }
28062 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)28063   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
28064     TEST_REQUIRES_X86_SSE41;
28065     GemmMicrokernelTester()
28066       .mr(4)
28067       .nr(4)
28068       .kr(2)
28069       .sr(4)
28070       .m(4)
28071       .n(4)
28072       .k(8)
28073       .qmin(128)
28074       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28075   }
28076 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)28077   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
28078     TEST_REQUIRES_X86_SSE41;
28079     GemmMicrokernelTester()
28080       .mr(4)
28081       .nr(4)
28082       .kr(2)
28083       .sr(4)
28084       .m(4)
28085       .n(4)
28086       .k(8)
28087       .qmax(128)
28088       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28089   }
28090 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)28091   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
28092     TEST_REQUIRES_X86_SSE41;
28093     GemmMicrokernelTester()
28094       .mr(4)
28095       .nr(4)
28096       .kr(2)
28097       .sr(4)
28098       .m(4)
28099       .n(4)
28100       .k(8)
28101       .cm_stride(7)
28102       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28103   }
28104 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105 
28106 
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)28108   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
28109     TEST_REQUIRES_X86_AVX;
28110     GemmMicrokernelTester()
28111       .mr(1)
28112       .nr(4)
28113       .kr(2)
28114       .sr(4)
28115       .m(1)
28116       .n(4)
28117       .k(8)
28118       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119   }
28120 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)28121   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
28122     TEST_REQUIRES_X86_AVX;
28123     GemmMicrokernelTester()
28124       .mr(1)
28125       .nr(4)
28126       .kr(2)
28127       .sr(4)
28128       .m(1)
28129       .n(4)
28130       .k(8)
28131       .cn_stride(7)
28132       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133   }
28134 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)28135   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
28136     TEST_REQUIRES_X86_AVX;
28137     for (uint32_t n = 1; n <= 4; n++) {
28138       for (uint32_t m = 1; m <= 1; m++) {
28139         GemmMicrokernelTester()
28140           .mr(1)
28141           .nr(4)
28142           .kr(2)
28143           .sr(4)
28144           .m(m)
28145           .n(n)
28146           .k(8)
28147           .iterations(1)
28148           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149       }
28150     }
28151   }
28152 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)28153   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28154     TEST_REQUIRES_X86_AVX;
28155     for (uint32_t m = 1; m <= 1; m++) {
28156       GemmMicrokernelTester()
28157         .mr(1)
28158         .nr(4)
28159         .kr(2)
28160         .sr(4)
28161         .m(m)
28162         .n(4)
28163         .k(8)
28164         .iterations(1)
28165         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166     }
28167   }
28168 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)28169   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28170     TEST_REQUIRES_X86_AVX;
28171     for (uint32_t n = 1; n <= 4; n++) {
28172       GemmMicrokernelTester()
28173         .mr(1)
28174         .nr(4)
28175         .kr(2)
28176         .sr(4)
28177         .m(1)
28178         .n(n)
28179         .k(8)
28180         .iterations(1)
28181         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182     }
28183   }
28184 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)28185   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
28186     TEST_REQUIRES_X86_AVX;
28187     for (size_t k = 1; k < 8; k++) {
28188       GemmMicrokernelTester()
28189         .mr(1)
28190         .nr(4)
28191         .kr(2)
28192         .sr(4)
28193         .m(1)
28194         .n(4)
28195         .k(k)
28196         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197     }
28198   }
28199 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)28200   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
28201     TEST_REQUIRES_X86_AVX;
28202     for (size_t k = 1; k < 8; k++) {
28203       for (uint32_t n = 1; n <= 4; n++) {
28204         for (uint32_t m = 1; m <= 1; m++) {
28205           GemmMicrokernelTester()
28206             .mr(1)
28207             .nr(4)
28208             .kr(2)
28209             .sr(4)
28210             .m(m)
28211             .n(n)
28212             .k(k)
28213             .iterations(1)
28214             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215         }
28216       }
28217     }
28218   }
28219 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)28220   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
28221     TEST_REQUIRES_X86_AVX;
28222     for (size_t k = 9; k < 16; k++) {
28223       GemmMicrokernelTester()
28224         .mr(1)
28225         .nr(4)
28226         .kr(2)
28227         .sr(4)
28228         .m(1)
28229         .n(4)
28230         .k(k)
28231         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232     }
28233   }
28234 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)28235   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
28236     TEST_REQUIRES_X86_AVX;
28237     for (size_t k = 9; k < 16; k++) {
28238       for (uint32_t n = 1; n <= 4; n++) {
28239         for (uint32_t m = 1; m <= 1; m++) {
28240           GemmMicrokernelTester()
28241             .mr(1)
28242             .nr(4)
28243             .kr(2)
28244             .sr(4)
28245             .m(m)
28246             .n(n)
28247             .k(k)
28248             .iterations(1)
28249             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250         }
28251       }
28252     }
28253   }
28254 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)28255   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
28256     TEST_REQUIRES_X86_AVX;
28257     for (size_t k = 16; k <= 80; k += 8) {
28258       GemmMicrokernelTester()
28259         .mr(1)
28260         .nr(4)
28261         .kr(2)
28262         .sr(4)
28263         .m(1)
28264         .n(4)
28265         .k(k)
28266         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267     }
28268   }
28269 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)28270   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
28271     TEST_REQUIRES_X86_AVX;
28272     for (size_t k = 16; k <= 80; k += 8) {
28273       for (uint32_t n = 1; n <= 4; n++) {
28274         for (uint32_t m = 1; m <= 1; m++) {
28275           GemmMicrokernelTester()
28276             .mr(1)
28277             .nr(4)
28278             .kr(2)
28279             .sr(4)
28280             .m(m)
28281             .n(n)
28282             .k(k)
28283             .iterations(1)
28284             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285         }
28286       }
28287     }
28288   }
28289 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)28290   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
28291     TEST_REQUIRES_X86_AVX;
28292     for (uint32_t n = 5; n < 8; n++) {
28293       for (size_t k = 1; k <= 40; k += 9) {
28294         GemmMicrokernelTester()
28295           .mr(1)
28296           .nr(4)
28297           .kr(2)
28298           .sr(4)
28299           .m(1)
28300           .n(n)
28301           .k(k)
28302           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303       }
28304     }
28305   }
28306 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)28307   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28308     TEST_REQUIRES_X86_AVX;
28309     for (uint32_t n = 5; n < 8; n++) {
28310       for (size_t k = 1; k <= 40; k += 9) {
28311         GemmMicrokernelTester()
28312           .mr(1)
28313           .nr(4)
28314           .kr(2)
28315           .sr(4)
28316           .m(1)
28317           .n(n)
28318           .k(k)
28319           .cn_stride(7)
28320           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321       }
28322     }
28323   }
28324 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)28325   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
28326     TEST_REQUIRES_X86_AVX;
28327     for (uint32_t n = 5; n < 8; n++) {
28328       for (size_t k = 1; k <= 40; k += 9) {
28329         for (uint32_t m = 1; m <= 1; m++) {
28330           GemmMicrokernelTester()
28331             .mr(1)
28332             .nr(4)
28333             .kr(2)
28334             .sr(4)
28335             .m(m)
28336             .n(n)
28337             .k(k)
28338             .iterations(1)
28339             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340         }
28341       }
28342     }
28343   }
28344 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)28345   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
28346     TEST_REQUIRES_X86_AVX;
28347     for (uint32_t n = 8; n <= 12; n += 4) {
28348       for (size_t k = 1; k <= 40; k += 9) {
28349         GemmMicrokernelTester()
28350           .mr(1)
28351           .nr(4)
28352           .kr(2)
28353           .sr(4)
28354           .m(1)
28355           .n(n)
28356           .k(k)
28357           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358       }
28359     }
28360   }
28361 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)28362   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28363     TEST_REQUIRES_X86_AVX;
28364     for (uint32_t n = 8; n <= 12; n += 4) {
28365       for (size_t k = 1; k <= 40; k += 9) {
28366         GemmMicrokernelTester()
28367           .mr(1)
28368           .nr(4)
28369           .kr(2)
28370           .sr(4)
28371           .m(1)
28372           .n(n)
28373           .k(k)
28374           .cn_stride(7)
28375           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376       }
28377     }
28378   }
28379 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)28380   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
28381     TEST_REQUIRES_X86_AVX;
28382     for (uint32_t n = 8; n <= 12; n += 4) {
28383       for (size_t k = 1; k <= 40; k += 9) {
28384         for (uint32_t m = 1; m <= 1; m++) {
28385           GemmMicrokernelTester()
28386             .mr(1)
28387             .nr(4)
28388             .kr(2)
28389             .sr(4)
28390             .m(m)
28391             .n(n)
28392             .k(k)
28393             .iterations(1)
28394             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395         }
28396       }
28397     }
28398   }
28399 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)28400   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
28401     TEST_REQUIRES_X86_AVX;
28402     for (size_t k = 1; k <= 40; k += 9) {
28403       GemmMicrokernelTester()
28404         .mr(1)
28405         .nr(4)
28406         .kr(2)
28407         .sr(4)
28408         .m(1)
28409         .n(4)
28410         .k(k)
28411         .ks(3)
28412         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413     }
28414   }
28415 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)28416   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
28417     TEST_REQUIRES_X86_AVX;
28418     for (size_t k = 1; k <= 40; k += 9) {
28419       for (uint32_t n = 1; n <= 4; n++) {
28420         for (uint32_t m = 1; m <= 1; m++) {
28421           GemmMicrokernelTester()
28422             .mr(1)
28423             .nr(4)
28424             .kr(2)
28425             .sr(4)
28426             .m(m)
28427             .n(n)
28428             .k(k)
28429             .ks(3)
28430             .iterations(1)
28431             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432         }
28433       }
28434     }
28435   }
28436 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)28437   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28438     TEST_REQUIRES_X86_AVX;
28439     for (uint32_t n = 5; n < 8; n++) {
28440       for (size_t k = 1; k <= 40; k += 9) {
28441         GemmMicrokernelTester()
28442           .mr(1)
28443           .nr(4)
28444           .kr(2)
28445           .sr(4)
28446           .m(1)
28447           .n(n)
28448           .k(k)
28449           .ks(3)
28450           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451       }
28452     }
28453   }
28454 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)28455   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28456     TEST_REQUIRES_X86_AVX;
28457     for (uint32_t n = 8; n <= 12; n += 4) {
28458       for (size_t k = 1; k <= 40; k += 9) {
28459         GemmMicrokernelTester()
28460           .mr(1)
28461           .nr(4)
28462           .kr(2)
28463           .sr(4)
28464           .m(1)
28465           .n(n)
28466           .k(k)
28467           .ks(3)
28468           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469       }
28470     }
28471   }
28472 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)28473   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
28474     TEST_REQUIRES_X86_AVX;
28475     for (size_t k = 1; k <= 40; k += 9) {
28476       for (uint32_t n = 1; n <= 4; n++) {
28477         for (uint32_t m = 1; m <= 1; m++) {
28478           GemmMicrokernelTester()
28479             .mr(1)
28480             .nr(4)
28481             .kr(2)
28482             .sr(4)
28483             .m(m)
28484             .n(n)
28485             .k(k)
28486             .cm_stride(7)
28487             .iterations(1)
28488             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489         }
28490       }
28491     }
28492   }
28493 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)28494   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
28495     TEST_REQUIRES_X86_AVX;
28496     for (size_t k = 1; k <= 40; k += 9) {
28497       GemmMicrokernelTester()
28498         .mr(1)
28499         .nr(4)
28500         .kr(2)
28501         .sr(4)
28502         .m(1)
28503         .n(4)
28504         .k(k)
28505         .ks(3)
28506         .a_offset(43)
28507         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508     }
28509   }
28510 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)28511   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
28512     TEST_REQUIRES_X86_AVX;
28513     for (size_t k = 1; k <= 40; k += 9) {
28514       for (uint32_t mz = 0; mz < 1; mz++) {
28515         GemmMicrokernelTester()
28516           .mr(1)
28517           .nr(4)
28518           .kr(2)
28519           .sr(4)
28520           .m(1)
28521           .n(4)
28522           .k(k)
28523           .ks(3)
28524           .a_offset(43)
28525           .zero_index(mz)
28526           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527       }
28528     }
28529   }
28530 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)28531   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
28532     TEST_REQUIRES_X86_AVX;
28533     GemmMicrokernelTester()
28534       .mr(1)
28535       .nr(4)
28536       .kr(2)
28537       .sr(4)
28538       .m(1)
28539       .n(4)
28540       .k(8)
28541       .qmin(128)
28542       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543   }
28544 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)28545   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
28546     TEST_REQUIRES_X86_AVX;
28547     GemmMicrokernelTester()
28548       .mr(1)
28549       .nr(4)
28550       .kr(2)
28551       .sr(4)
28552       .m(1)
28553       .n(4)
28554       .k(8)
28555       .qmax(128)
28556       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557   }
28558 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)28559   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
28560     TEST_REQUIRES_X86_AVX;
28561     GemmMicrokernelTester()
28562       .mr(1)
28563       .nr(4)
28564       .kr(2)
28565       .sr(4)
28566       .m(1)
28567       .n(4)
28568       .k(8)
28569       .cm_stride(7)
28570       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571   }
28572 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573 
28574 
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)28576   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
28577     TEST_REQUIRES_X86_AVX;
28578     GemmMicrokernelTester()
28579       .mr(2)
28580       .nr(4)
28581       .kr(2)
28582       .sr(4)
28583       .m(2)
28584       .n(4)
28585       .k(8)
28586       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28587   }
28588 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)28589   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
28590     TEST_REQUIRES_X86_AVX;
28591     GemmMicrokernelTester()
28592       .mr(2)
28593       .nr(4)
28594       .kr(2)
28595       .sr(4)
28596       .m(2)
28597       .n(4)
28598       .k(8)
28599       .cn_stride(7)
28600       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28601   }
28602 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)28603   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
28604     TEST_REQUIRES_X86_AVX;
28605     for (uint32_t n = 1; n <= 4; n++) {
28606       for (uint32_t m = 1; m <= 2; m++) {
28607         GemmMicrokernelTester()
28608           .mr(2)
28609           .nr(4)
28610           .kr(2)
28611           .sr(4)
28612           .m(m)
28613           .n(n)
28614           .k(8)
28615           .iterations(1)
28616           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28617       }
28618     }
28619   }
28620 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)28621   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28622     TEST_REQUIRES_X86_AVX;
28623     for (uint32_t m = 1; m <= 2; m++) {
28624       GemmMicrokernelTester()
28625         .mr(2)
28626         .nr(4)
28627         .kr(2)
28628         .sr(4)
28629         .m(m)
28630         .n(4)
28631         .k(8)
28632         .iterations(1)
28633         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28634     }
28635   }
28636 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)28637   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28638     TEST_REQUIRES_X86_AVX;
28639     for (uint32_t n = 1; n <= 4; n++) {
28640       GemmMicrokernelTester()
28641         .mr(2)
28642         .nr(4)
28643         .kr(2)
28644         .sr(4)
28645         .m(2)
28646         .n(n)
28647         .k(8)
28648         .iterations(1)
28649         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28650     }
28651   }
28652 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)28653   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
28654     TEST_REQUIRES_X86_AVX;
28655     for (size_t k = 1; k < 8; k++) {
28656       GemmMicrokernelTester()
28657         .mr(2)
28658         .nr(4)
28659         .kr(2)
28660         .sr(4)
28661         .m(2)
28662         .n(4)
28663         .k(k)
28664         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28665     }
28666   }
28667 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)28668   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
28669     TEST_REQUIRES_X86_AVX;
28670     for (size_t k = 1; k < 8; k++) {
28671       for (uint32_t n = 1; n <= 4; n++) {
28672         for (uint32_t m = 1; m <= 2; m++) {
28673           GemmMicrokernelTester()
28674             .mr(2)
28675             .nr(4)
28676             .kr(2)
28677             .sr(4)
28678             .m(m)
28679             .n(n)
28680             .k(k)
28681             .iterations(1)
28682             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28683         }
28684       }
28685     }
28686   }
28687 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)28688   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
28689     TEST_REQUIRES_X86_AVX;
28690     for (size_t k = 9; k < 16; k++) {
28691       GemmMicrokernelTester()
28692         .mr(2)
28693         .nr(4)
28694         .kr(2)
28695         .sr(4)
28696         .m(2)
28697         .n(4)
28698         .k(k)
28699         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28700     }
28701   }
28702 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)28703   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
28704     TEST_REQUIRES_X86_AVX;
28705     for (size_t k = 9; k < 16; k++) {
28706       for (uint32_t n = 1; n <= 4; n++) {
28707         for (uint32_t m = 1; m <= 2; m++) {
28708           GemmMicrokernelTester()
28709             .mr(2)
28710             .nr(4)
28711             .kr(2)
28712             .sr(4)
28713             .m(m)
28714             .n(n)
28715             .k(k)
28716             .iterations(1)
28717             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28718         }
28719       }
28720     }
28721   }
28722 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)28723   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
28724     TEST_REQUIRES_X86_AVX;
28725     for (size_t k = 16; k <= 80; k += 8) {
28726       GemmMicrokernelTester()
28727         .mr(2)
28728         .nr(4)
28729         .kr(2)
28730         .sr(4)
28731         .m(2)
28732         .n(4)
28733         .k(k)
28734         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28735     }
28736   }
28737 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)28738   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
28739     TEST_REQUIRES_X86_AVX;
28740     for (size_t k = 16; k <= 80; k += 8) {
28741       for (uint32_t n = 1; n <= 4; n++) {
28742         for (uint32_t m = 1; m <= 2; m++) {
28743           GemmMicrokernelTester()
28744             .mr(2)
28745             .nr(4)
28746             .kr(2)
28747             .sr(4)
28748             .m(m)
28749             .n(n)
28750             .k(k)
28751             .iterations(1)
28752             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28753         }
28754       }
28755     }
28756   }
28757 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)28758   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
28759     TEST_REQUIRES_X86_AVX;
28760     for (uint32_t n = 5; n < 8; n++) {
28761       for (size_t k = 1; k <= 40; k += 9) {
28762         GemmMicrokernelTester()
28763           .mr(2)
28764           .nr(4)
28765           .kr(2)
28766           .sr(4)
28767           .m(2)
28768           .n(n)
28769           .k(k)
28770           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28771       }
28772     }
28773   }
28774 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)28775   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28776     TEST_REQUIRES_X86_AVX;
28777     for (uint32_t n = 5; n < 8; n++) {
28778       for (size_t k = 1; k <= 40; k += 9) {
28779         GemmMicrokernelTester()
28780           .mr(2)
28781           .nr(4)
28782           .kr(2)
28783           .sr(4)
28784           .m(2)
28785           .n(n)
28786           .k(k)
28787           .cn_stride(7)
28788           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28789       }
28790     }
28791   }
28792 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)28793   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
28794     TEST_REQUIRES_X86_AVX;
28795     for (uint32_t n = 5; n < 8; n++) {
28796       for (size_t k = 1; k <= 40; k += 9) {
28797         for (uint32_t m = 1; m <= 2; m++) {
28798           GemmMicrokernelTester()
28799             .mr(2)
28800             .nr(4)
28801             .kr(2)
28802             .sr(4)
28803             .m(m)
28804             .n(n)
28805             .k(k)
28806             .iterations(1)
28807             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28808         }
28809       }
28810     }
28811   }
28812 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)28813   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
28814     TEST_REQUIRES_X86_AVX;
28815     for (uint32_t n = 8; n <= 12; n += 4) {
28816       for (size_t k = 1; k <= 40; k += 9) {
28817         GemmMicrokernelTester()
28818           .mr(2)
28819           .nr(4)
28820           .kr(2)
28821           .sr(4)
28822           .m(2)
28823           .n(n)
28824           .k(k)
28825           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28826       }
28827     }
28828   }
28829 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)28830   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28831     TEST_REQUIRES_X86_AVX;
28832     for (uint32_t n = 8; n <= 12; n += 4) {
28833       for (size_t k = 1; k <= 40; k += 9) {
28834         GemmMicrokernelTester()
28835           .mr(2)
28836           .nr(4)
28837           .kr(2)
28838           .sr(4)
28839           .m(2)
28840           .n(n)
28841           .k(k)
28842           .cn_stride(7)
28843           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28844       }
28845     }
28846   }
28847 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)28848   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
28849     TEST_REQUIRES_X86_AVX;
28850     for (uint32_t n = 8; n <= 12; n += 4) {
28851       for (size_t k = 1; k <= 40; k += 9) {
28852         for (uint32_t m = 1; m <= 2; m++) {
28853           GemmMicrokernelTester()
28854             .mr(2)
28855             .nr(4)
28856             .kr(2)
28857             .sr(4)
28858             .m(m)
28859             .n(n)
28860             .k(k)
28861             .iterations(1)
28862             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28863         }
28864       }
28865     }
28866   }
28867 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel)28868   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel) {
28869     TEST_REQUIRES_X86_AVX;
28870     for (size_t k = 1; k <= 40; k += 9) {
28871       GemmMicrokernelTester()
28872         .mr(2)
28873         .nr(4)
28874         .kr(2)
28875         .sr(4)
28876         .m(2)
28877         .n(4)
28878         .k(k)
28879         .ks(3)
28880         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28881     }
28882   }
28883 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel_subtile)28884   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel_subtile) {
28885     TEST_REQUIRES_X86_AVX;
28886     for (size_t k = 1; k <= 40; k += 9) {
28887       for (uint32_t n = 1; n <= 4; n++) {
28888         for (uint32_t m = 1; m <= 2; m++) {
28889           GemmMicrokernelTester()
28890             .mr(2)
28891             .nr(4)
28892             .kr(2)
28893             .sr(4)
28894             .m(m)
28895             .n(n)
28896             .k(k)
28897             .ks(3)
28898             .iterations(1)
28899             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28900         }
28901       }
28902     }
28903   }
28904 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_small_kernel)28905   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28906     TEST_REQUIRES_X86_AVX;
28907     for (uint32_t n = 5; n < 8; n++) {
28908       for (size_t k = 1; k <= 40; k += 9) {
28909         GemmMicrokernelTester()
28910           .mr(2)
28911           .nr(4)
28912           .kr(2)
28913           .sr(4)
28914           .m(2)
28915           .n(n)
28916           .k(k)
28917           .ks(3)
28918           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28919       }
28920     }
28921   }
28922 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_small_kernel)28923   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28924     TEST_REQUIRES_X86_AVX;
28925     for (uint32_t n = 8; n <= 12; n += 4) {
28926       for (size_t k = 1; k <= 40; k += 9) {
28927         GemmMicrokernelTester()
28928           .mr(2)
28929           .nr(4)
28930           .kr(2)
28931           .sr(4)
28932           .m(2)
28933           .n(n)
28934           .k(k)
28935           .ks(3)
28936           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28937       }
28938     }
28939   }
28940 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)28941   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
28942     TEST_REQUIRES_X86_AVX;
28943     for (size_t k = 1; k <= 40; k += 9) {
28944       for (uint32_t n = 1; n <= 4; n++) {
28945         for (uint32_t m = 1; m <= 2; m++) {
28946           GemmMicrokernelTester()
28947             .mr(2)
28948             .nr(4)
28949             .kr(2)
28950             .sr(4)
28951             .m(m)
28952             .n(n)
28953             .k(k)
28954             .cm_stride(7)
28955             .iterations(1)
28956             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28957         }
28958       }
28959     }
28960   }
28961 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,a_offset)28962   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, a_offset) {
28963     TEST_REQUIRES_X86_AVX;
28964     for (size_t k = 1; k <= 40; k += 9) {
28965       GemmMicrokernelTester()
28966         .mr(2)
28967         .nr(4)
28968         .kr(2)
28969         .sr(4)
28970         .m(2)
28971         .n(4)
28972         .k(k)
28973         .ks(3)
28974         .a_offset(83)
28975         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28976     }
28977   }
28978 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,zero)28979   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, zero) {
28980     TEST_REQUIRES_X86_AVX;
28981     for (size_t k = 1; k <= 40; k += 9) {
28982       for (uint32_t mz = 0; mz < 2; mz++) {
28983         GemmMicrokernelTester()
28984           .mr(2)
28985           .nr(4)
28986           .kr(2)
28987           .sr(4)
28988           .m(2)
28989           .n(4)
28990           .k(k)
28991           .ks(3)
28992           .a_offset(83)
28993           .zero_index(mz)
28994           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28995       }
28996     }
28997   }
28998 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)28999   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
29000     TEST_REQUIRES_X86_AVX;
29001     GemmMicrokernelTester()
29002       .mr(2)
29003       .nr(4)
29004       .kr(2)
29005       .sr(4)
29006       .m(2)
29007       .n(4)
29008       .k(8)
29009       .qmin(128)
29010       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29011   }
29012 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)29013   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
29014     TEST_REQUIRES_X86_AVX;
29015     GemmMicrokernelTester()
29016       .mr(2)
29017       .nr(4)
29018       .kr(2)
29019       .sr(4)
29020       .m(2)
29021       .n(4)
29022       .k(8)
29023       .qmax(128)
29024       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29025   }
29026 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)29027   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
29028     TEST_REQUIRES_X86_AVX;
29029     GemmMicrokernelTester()
29030       .mr(2)
29031       .nr(4)
29032       .kr(2)
29033       .sr(4)
29034       .m(2)
29035       .n(4)
29036       .k(8)
29037       .cm_stride(7)
29038       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29039   }
29040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041 
29042 
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)29044   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
29045     TEST_REQUIRES_X86_XOP;
29046     GemmMicrokernelTester()
29047       .mr(3)
29048       .nr(4)
29049       .kr(2)
29050       .sr(4)
29051       .m(3)
29052       .n(4)
29053       .k(8)
29054       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29055   }
29056 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)29057   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
29058     TEST_REQUIRES_X86_XOP;
29059     GemmMicrokernelTester()
29060       .mr(3)
29061       .nr(4)
29062       .kr(2)
29063       .sr(4)
29064       .m(3)
29065       .n(4)
29066       .k(8)
29067       .cn_stride(7)
29068       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29069   }
29070 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)29071   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
29072     TEST_REQUIRES_X86_XOP;
29073     for (uint32_t n = 1; n <= 4; n++) {
29074       for (uint32_t m = 1; m <= 3; m++) {
29075         GemmMicrokernelTester()
29076           .mr(3)
29077           .nr(4)
29078           .kr(2)
29079           .sr(4)
29080           .m(m)
29081           .n(n)
29082           .k(8)
29083           .iterations(1)
29084           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29085       }
29086     }
29087   }
29088 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)29089   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
29090     TEST_REQUIRES_X86_XOP;
29091     for (uint32_t m = 1; m <= 3; m++) {
29092       GemmMicrokernelTester()
29093         .mr(3)
29094         .nr(4)
29095         .kr(2)
29096         .sr(4)
29097         .m(m)
29098         .n(4)
29099         .k(8)
29100         .iterations(1)
29101         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29102     }
29103   }
29104 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)29105   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
29106     TEST_REQUIRES_X86_XOP;
29107     for (uint32_t n = 1; n <= 4; n++) {
29108       GemmMicrokernelTester()
29109         .mr(3)
29110         .nr(4)
29111         .kr(2)
29112         .sr(4)
29113         .m(3)
29114         .n(n)
29115         .k(8)
29116         .iterations(1)
29117         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29118     }
29119   }
29120 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)29121   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
29122     TEST_REQUIRES_X86_XOP;
29123     for (size_t k = 1; k < 8; k++) {
29124       GemmMicrokernelTester()
29125         .mr(3)
29126         .nr(4)
29127         .kr(2)
29128         .sr(4)
29129         .m(3)
29130         .n(4)
29131         .k(k)
29132         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29133     }
29134   }
29135 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)29136   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
29137     TEST_REQUIRES_X86_XOP;
29138     for (size_t k = 1; k < 8; k++) {
29139       for (uint32_t n = 1; n <= 4; n++) {
29140         for (uint32_t m = 1; m <= 3; m++) {
29141           GemmMicrokernelTester()
29142             .mr(3)
29143             .nr(4)
29144             .kr(2)
29145             .sr(4)
29146             .m(m)
29147             .n(n)
29148             .k(k)
29149             .iterations(1)
29150             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29151         }
29152       }
29153     }
29154   }
29155 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)29156   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
29157     TEST_REQUIRES_X86_XOP;
29158     for (size_t k = 9; k < 16; k++) {
29159       GemmMicrokernelTester()
29160         .mr(3)
29161         .nr(4)
29162         .kr(2)
29163         .sr(4)
29164         .m(3)
29165         .n(4)
29166         .k(k)
29167         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29168     }
29169   }
29170 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)29171   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
29172     TEST_REQUIRES_X86_XOP;
29173     for (size_t k = 9; k < 16; k++) {
29174       for (uint32_t n = 1; n <= 4; n++) {
29175         for (uint32_t m = 1; m <= 3; m++) {
29176           GemmMicrokernelTester()
29177             .mr(3)
29178             .nr(4)
29179             .kr(2)
29180             .sr(4)
29181             .m(m)
29182             .n(n)
29183             .k(k)
29184             .iterations(1)
29185             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29186         }
29187       }
29188     }
29189   }
29190 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)29191   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
29192     TEST_REQUIRES_X86_XOP;
29193     for (size_t k = 16; k <= 80; k += 8) {
29194       GemmMicrokernelTester()
29195         .mr(3)
29196         .nr(4)
29197         .kr(2)
29198         .sr(4)
29199         .m(3)
29200         .n(4)
29201         .k(k)
29202         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29203     }
29204   }
29205 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)29206   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
29207     TEST_REQUIRES_X86_XOP;
29208     for (size_t k = 16; k <= 80; k += 8) {
29209       for (uint32_t n = 1; n <= 4; n++) {
29210         for (uint32_t m = 1; m <= 3; m++) {
29211           GemmMicrokernelTester()
29212             .mr(3)
29213             .nr(4)
29214             .kr(2)
29215             .sr(4)
29216             .m(m)
29217             .n(n)
29218             .k(k)
29219             .iterations(1)
29220             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29221         }
29222       }
29223     }
29224   }
29225 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)29226   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
29227     TEST_REQUIRES_X86_XOP;
29228     for (uint32_t n = 5; n < 8; n++) {
29229       for (size_t k = 1; k <= 40; k += 9) {
29230         GemmMicrokernelTester()
29231           .mr(3)
29232           .nr(4)
29233           .kr(2)
29234           .sr(4)
29235           .m(3)
29236           .n(n)
29237           .k(k)
29238           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29239       }
29240     }
29241   }
29242 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)29243   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
29244     TEST_REQUIRES_X86_XOP;
29245     for (uint32_t n = 5; n < 8; n++) {
29246       for (size_t k = 1; k <= 40; k += 9) {
29247         GemmMicrokernelTester()
29248           .mr(3)
29249           .nr(4)
29250           .kr(2)
29251           .sr(4)
29252           .m(3)
29253           .n(n)
29254           .k(k)
29255           .cn_stride(7)
29256           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29257       }
29258     }
29259   }
29260 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)29261   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
29262     TEST_REQUIRES_X86_XOP;
29263     for (uint32_t n = 5; n < 8; n++) {
29264       for (size_t k = 1; k <= 40; k += 9) {
29265         for (uint32_t m = 1; m <= 3; m++) {
29266           GemmMicrokernelTester()
29267             .mr(3)
29268             .nr(4)
29269             .kr(2)
29270             .sr(4)
29271             .m(m)
29272             .n(n)
29273             .k(k)
29274             .iterations(1)
29275             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29276         }
29277       }
29278     }
29279   }
29280 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)29281   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
29282     TEST_REQUIRES_X86_XOP;
29283     for (uint32_t n = 8; n <= 12; n += 4) {
29284       for (size_t k = 1; k <= 40; k += 9) {
29285         GemmMicrokernelTester()
29286           .mr(3)
29287           .nr(4)
29288           .kr(2)
29289           .sr(4)
29290           .m(3)
29291           .n(n)
29292           .k(k)
29293           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29294       }
29295     }
29296   }
29297 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)29298   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
29299     TEST_REQUIRES_X86_XOP;
29300     for (uint32_t n = 8; n <= 12; n += 4) {
29301       for (size_t k = 1; k <= 40; k += 9) {
29302         GemmMicrokernelTester()
29303           .mr(3)
29304           .nr(4)
29305           .kr(2)
29306           .sr(4)
29307           .m(3)
29308           .n(n)
29309           .k(k)
29310           .cn_stride(7)
29311           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29312       }
29313     }
29314   }
29315 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)29316   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
29317     TEST_REQUIRES_X86_XOP;
29318     for (uint32_t n = 8; n <= 12; n += 4) {
29319       for (size_t k = 1; k <= 40; k += 9) {
29320         for (uint32_t m = 1; m <= 3; m++) {
29321           GemmMicrokernelTester()
29322             .mr(3)
29323             .nr(4)
29324             .kr(2)
29325             .sr(4)
29326             .m(m)
29327             .n(n)
29328             .k(k)
29329             .iterations(1)
29330             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331         }
29332       }
29333     }
29334   }
29335 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel)29336   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel) {
29337     TEST_REQUIRES_X86_XOP;
29338     for (size_t k = 1; k <= 40; k += 9) {
29339       GemmMicrokernelTester()
29340         .mr(3)
29341         .nr(4)
29342         .kr(2)
29343         .sr(4)
29344         .m(3)
29345         .n(4)
29346         .k(k)
29347         .ks(3)
29348         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349     }
29350   }
29351 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel_subtile)29352   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel_subtile) {
29353     TEST_REQUIRES_X86_XOP;
29354     for (size_t k = 1; k <= 40; k += 9) {
29355       for (uint32_t n = 1; n <= 4; n++) {
29356         for (uint32_t m = 1; m <= 3; m++) {
29357           GemmMicrokernelTester()
29358             .mr(3)
29359             .nr(4)
29360             .kr(2)
29361             .sr(4)
29362             .m(m)
29363             .n(n)
29364             .k(k)
29365             .ks(3)
29366             .iterations(1)
29367             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29368         }
29369       }
29370     }
29371   }
29372 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_small_kernel)29373   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
29374     TEST_REQUIRES_X86_XOP;
29375     for (uint32_t n = 5; n < 8; n++) {
29376       for (size_t k = 1; k <= 40; k += 9) {
29377         GemmMicrokernelTester()
29378           .mr(3)
29379           .nr(4)
29380           .kr(2)
29381           .sr(4)
29382           .m(3)
29383           .n(n)
29384           .k(k)
29385           .ks(3)
29386           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29387       }
29388     }
29389   }
29390 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_small_kernel)29391   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_small_kernel) {
29392     TEST_REQUIRES_X86_XOP;
29393     for (uint32_t n = 8; n <= 12; n += 4) {
29394       for (size_t k = 1; k <= 40; k += 9) {
29395         GemmMicrokernelTester()
29396           .mr(3)
29397           .nr(4)
29398           .kr(2)
29399           .sr(4)
29400           .m(3)
29401           .n(n)
29402           .k(k)
29403           .ks(3)
29404           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29405       }
29406     }
29407   }
29408 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)29409   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
29410     TEST_REQUIRES_X86_XOP;
29411     for (size_t k = 1; k <= 40; k += 9) {
29412       for (uint32_t n = 1; n <= 4; n++) {
29413         for (uint32_t m = 1; m <= 3; m++) {
29414           GemmMicrokernelTester()
29415             .mr(3)
29416             .nr(4)
29417             .kr(2)
29418             .sr(4)
29419             .m(m)
29420             .n(n)
29421             .k(k)
29422             .cm_stride(7)
29423             .iterations(1)
29424             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29425         }
29426       }
29427     }
29428   }
29429 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,a_offset)29430   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, a_offset) {
29431     TEST_REQUIRES_X86_XOP;
29432     for (size_t k = 1; k <= 40; k += 9) {
29433       GemmMicrokernelTester()
29434         .mr(3)
29435         .nr(4)
29436         .kr(2)
29437         .sr(4)
29438         .m(3)
29439         .n(4)
29440         .k(k)
29441         .ks(3)
29442         .a_offset(127)
29443         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29444     }
29445   }
29446 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,zero)29447   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, zero) {
29448     TEST_REQUIRES_X86_XOP;
29449     for (size_t k = 1; k <= 40; k += 9) {
29450       for (uint32_t mz = 0; mz < 3; mz++) {
29451         GemmMicrokernelTester()
29452           .mr(3)
29453           .nr(4)
29454           .kr(2)
29455           .sr(4)
29456           .m(3)
29457           .n(4)
29458           .k(k)
29459           .ks(3)
29460           .a_offset(127)
29461           .zero_index(mz)
29462           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29463       }
29464     }
29465   }
29466 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)29467   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
29468     TEST_REQUIRES_X86_XOP;
29469     GemmMicrokernelTester()
29470       .mr(3)
29471       .nr(4)
29472       .kr(2)
29473       .sr(4)
29474       .m(3)
29475       .n(4)
29476       .k(8)
29477       .qmin(128)
29478       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29479   }
29480 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)29481   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
29482     TEST_REQUIRES_X86_XOP;
29483     GemmMicrokernelTester()
29484       .mr(3)
29485       .nr(4)
29486       .kr(2)
29487       .sr(4)
29488       .m(3)
29489       .n(4)
29490       .k(8)
29491       .qmax(128)
29492       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29493   }
29494 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)29495   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
29496     TEST_REQUIRES_X86_XOP;
29497     GemmMicrokernelTester()
29498       .mr(3)
29499       .nr(4)
29500       .kr(2)
29501       .sr(4)
29502       .m(3)
29503       .n(4)
29504       .k(8)
29505       .cm_stride(7)
29506       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29507   }
29508 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509 
29510 
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)29512   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
29513     TEST_REQUIRES_X86_XOP;
29514     GemmMicrokernelTester()
29515       .mr(4)
29516       .nr(4)
29517       .kr(2)
29518       .sr(4)
29519       .m(4)
29520       .n(4)
29521       .k(8)
29522       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29523   }
29524 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)29525   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
29526     TEST_REQUIRES_X86_XOP;
29527     GemmMicrokernelTester()
29528       .mr(4)
29529       .nr(4)
29530       .kr(2)
29531       .sr(4)
29532       .m(4)
29533       .n(4)
29534       .k(8)
29535       .cn_stride(7)
29536       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29537   }
29538 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)29539   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
29540     TEST_REQUIRES_X86_XOP;
29541     for (uint32_t n = 1; n <= 4; n++) {
29542       for (uint32_t m = 1; m <= 4; m++) {
29543         GemmMicrokernelTester()
29544           .mr(4)
29545           .nr(4)
29546           .kr(2)
29547           .sr(4)
29548           .m(m)
29549           .n(n)
29550           .k(8)
29551           .iterations(1)
29552           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29553       }
29554     }
29555   }
29556 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)29557   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
29558     TEST_REQUIRES_X86_XOP;
29559     for (uint32_t m = 1; m <= 4; m++) {
29560       GemmMicrokernelTester()
29561         .mr(4)
29562         .nr(4)
29563         .kr(2)
29564         .sr(4)
29565         .m(m)
29566         .n(4)
29567         .k(8)
29568         .iterations(1)
29569         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29570     }
29571   }
29572 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)29573   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
29574     TEST_REQUIRES_X86_XOP;
29575     for (uint32_t n = 1; n <= 4; n++) {
29576       GemmMicrokernelTester()
29577         .mr(4)
29578         .nr(4)
29579         .kr(2)
29580         .sr(4)
29581         .m(4)
29582         .n(n)
29583         .k(8)
29584         .iterations(1)
29585         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29586     }
29587   }
29588 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)29589   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
29590     TEST_REQUIRES_X86_XOP;
29591     for (size_t k = 1; k < 8; k++) {
29592       GemmMicrokernelTester()
29593         .mr(4)
29594         .nr(4)
29595         .kr(2)
29596         .sr(4)
29597         .m(4)
29598         .n(4)
29599         .k(k)
29600         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29601     }
29602   }
29603 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)29604   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
29605     TEST_REQUIRES_X86_XOP;
29606     for (size_t k = 1; k < 8; k++) {
29607       for (uint32_t n = 1; n <= 4; n++) {
29608         for (uint32_t m = 1; m <= 4; m++) {
29609           GemmMicrokernelTester()
29610             .mr(4)
29611             .nr(4)
29612             .kr(2)
29613             .sr(4)
29614             .m(m)
29615             .n(n)
29616             .k(k)
29617             .iterations(1)
29618             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29619         }
29620       }
29621     }
29622   }
29623 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)29624   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
29625     TEST_REQUIRES_X86_XOP;
29626     for (size_t k = 9; k < 16; k++) {
29627       GemmMicrokernelTester()
29628         .mr(4)
29629         .nr(4)
29630         .kr(2)
29631         .sr(4)
29632         .m(4)
29633         .n(4)
29634         .k(k)
29635         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29636     }
29637   }
29638 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)29639   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
29640     TEST_REQUIRES_X86_XOP;
29641     for (size_t k = 9; k < 16; k++) {
29642       for (uint32_t n = 1; n <= 4; n++) {
29643         for (uint32_t m = 1; m <= 4; m++) {
29644           GemmMicrokernelTester()
29645             .mr(4)
29646             .nr(4)
29647             .kr(2)
29648             .sr(4)
29649             .m(m)
29650             .n(n)
29651             .k(k)
29652             .iterations(1)
29653             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29654         }
29655       }
29656     }
29657   }
29658 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)29659   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
29660     TEST_REQUIRES_X86_XOP;
29661     for (size_t k = 16; k <= 80; k += 8) {
29662       GemmMicrokernelTester()
29663         .mr(4)
29664         .nr(4)
29665         .kr(2)
29666         .sr(4)
29667         .m(4)
29668         .n(4)
29669         .k(k)
29670         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29671     }
29672   }
29673 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)29674   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
29675     TEST_REQUIRES_X86_XOP;
29676     for (size_t k = 16; k <= 80; k += 8) {
29677       for (uint32_t n = 1; n <= 4; n++) {
29678         for (uint32_t m = 1; m <= 4; m++) {
29679           GemmMicrokernelTester()
29680             .mr(4)
29681             .nr(4)
29682             .kr(2)
29683             .sr(4)
29684             .m(m)
29685             .n(n)
29686             .k(k)
29687             .iterations(1)
29688             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29689         }
29690       }
29691     }
29692   }
29693 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)29694   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
29695     TEST_REQUIRES_X86_XOP;
29696     for (uint32_t n = 5; n < 8; n++) {
29697       for (size_t k = 1; k <= 40; k += 9) {
29698         GemmMicrokernelTester()
29699           .mr(4)
29700           .nr(4)
29701           .kr(2)
29702           .sr(4)
29703           .m(4)
29704           .n(n)
29705           .k(k)
29706           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29707       }
29708     }
29709   }
29710 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)29711   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
29712     TEST_REQUIRES_X86_XOP;
29713     for (uint32_t n = 5; n < 8; n++) {
29714       for (size_t k = 1; k <= 40; k += 9) {
29715         GemmMicrokernelTester()
29716           .mr(4)
29717           .nr(4)
29718           .kr(2)
29719           .sr(4)
29720           .m(4)
29721           .n(n)
29722           .k(k)
29723           .cn_stride(7)
29724           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29725       }
29726     }
29727   }
29728 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)29729   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
29730     TEST_REQUIRES_X86_XOP;
29731     for (uint32_t n = 5; n < 8; n++) {
29732       for (size_t k = 1; k <= 40; k += 9) {
29733         for (uint32_t m = 1; m <= 4; m++) {
29734           GemmMicrokernelTester()
29735             .mr(4)
29736             .nr(4)
29737             .kr(2)
29738             .sr(4)
29739             .m(m)
29740             .n(n)
29741             .k(k)
29742             .iterations(1)
29743             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29744         }
29745       }
29746     }
29747   }
29748 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)29749   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
29750     TEST_REQUIRES_X86_XOP;
29751     for (uint32_t n = 8; n <= 12; n += 4) {
29752       for (size_t k = 1; k <= 40; k += 9) {
29753         GemmMicrokernelTester()
29754           .mr(4)
29755           .nr(4)
29756           .kr(2)
29757           .sr(4)
29758           .m(4)
29759           .n(n)
29760           .k(k)
29761           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29762       }
29763     }
29764   }
29765 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)29766   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
29767     TEST_REQUIRES_X86_XOP;
29768     for (uint32_t n = 8; n <= 12; n += 4) {
29769       for (size_t k = 1; k <= 40; k += 9) {
29770         GemmMicrokernelTester()
29771           .mr(4)
29772           .nr(4)
29773           .kr(2)
29774           .sr(4)
29775           .m(4)
29776           .n(n)
29777           .k(k)
29778           .cn_stride(7)
29779           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29780       }
29781     }
29782   }
29783 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)29784   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
29785     TEST_REQUIRES_X86_XOP;
29786     for (uint32_t n = 8; n <= 12; n += 4) {
29787       for (size_t k = 1; k <= 40; k += 9) {
29788         for (uint32_t m = 1; m <= 4; m++) {
29789           GemmMicrokernelTester()
29790             .mr(4)
29791             .nr(4)
29792             .kr(2)
29793             .sr(4)
29794             .m(m)
29795             .n(n)
29796             .k(k)
29797             .iterations(1)
29798             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29799         }
29800       }
29801     }
29802   }
29803 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel)29804   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel) {
29805     TEST_REQUIRES_X86_XOP;
29806     for (size_t k = 1; k <= 40; k += 9) {
29807       GemmMicrokernelTester()
29808         .mr(4)
29809         .nr(4)
29810         .kr(2)
29811         .sr(4)
29812         .m(4)
29813         .n(4)
29814         .k(k)
29815         .ks(3)
29816         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29817     }
29818   }
29819 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel_subtile)29820   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel_subtile) {
29821     TEST_REQUIRES_X86_XOP;
29822     for (size_t k = 1; k <= 40; k += 9) {
29823       for (uint32_t n = 1; n <= 4; n++) {
29824         for (uint32_t m = 1; m <= 4; m++) {
29825           GemmMicrokernelTester()
29826             .mr(4)
29827             .nr(4)
29828             .kr(2)
29829             .sr(4)
29830             .m(m)
29831             .n(n)
29832             .k(k)
29833             .ks(3)
29834             .iterations(1)
29835             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29836         }
29837       }
29838     }
29839   }
29840 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_small_kernel)29841   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
29842     TEST_REQUIRES_X86_XOP;
29843     for (uint32_t n = 5; n < 8; n++) {
29844       for (size_t k = 1; k <= 40; k += 9) {
29845         GemmMicrokernelTester()
29846           .mr(4)
29847           .nr(4)
29848           .kr(2)
29849           .sr(4)
29850           .m(4)
29851           .n(n)
29852           .k(k)
29853           .ks(3)
29854           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29855       }
29856     }
29857   }
29858 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_small_kernel)29859   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_small_kernel) {
29860     TEST_REQUIRES_X86_XOP;
29861     for (uint32_t n = 8; n <= 12; n += 4) {
29862       for (size_t k = 1; k <= 40; k += 9) {
29863         GemmMicrokernelTester()
29864           .mr(4)
29865           .nr(4)
29866           .kr(2)
29867           .sr(4)
29868           .m(4)
29869           .n(n)
29870           .k(k)
29871           .ks(3)
29872           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29873       }
29874     }
29875   }
29876 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)29877   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
29878     TEST_REQUIRES_X86_XOP;
29879     for (size_t k = 1; k <= 40; k += 9) {
29880       for (uint32_t n = 1; n <= 4; n++) {
29881         for (uint32_t m = 1; m <= 4; m++) {
29882           GemmMicrokernelTester()
29883             .mr(4)
29884             .nr(4)
29885             .kr(2)
29886             .sr(4)
29887             .m(m)
29888             .n(n)
29889             .k(k)
29890             .cm_stride(7)
29891             .iterations(1)
29892             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29893         }
29894       }
29895     }
29896   }
29897 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,a_offset)29898   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, a_offset) {
29899     TEST_REQUIRES_X86_XOP;
29900     for (size_t k = 1; k <= 40; k += 9) {
29901       GemmMicrokernelTester()
29902         .mr(4)
29903         .nr(4)
29904         .kr(2)
29905         .sr(4)
29906         .m(4)
29907         .n(4)
29908         .k(k)
29909         .ks(3)
29910         .a_offset(163)
29911         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29912     }
29913   }
29914 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,zero)29915   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, zero) {
29916     TEST_REQUIRES_X86_XOP;
29917     for (size_t k = 1; k <= 40; k += 9) {
29918       for (uint32_t mz = 0; mz < 4; mz++) {
29919         GemmMicrokernelTester()
29920           .mr(4)
29921           .nr(4)
29922           .kr(2)
29923           .sr(4)
29924           .m(4)
29925           .n(4)
29926           .k(k)
29927           .ks(3)
29928           .a_offset(163)
29929           .zero_index(mz)
29930           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29931       }
29932     }
29933   }
29934 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)29935   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
29936     TEST_REQUIRES_X86_XOP;
29937     GemmMicrokernelTester()
29938       .mr(4)
29939       .nr(4)
29940       .kr(2)
29941       .sr(4)
29942       .m(4)
29943       .n(4)
29944       .k(8)
29945       .qmin(128)
29946       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29947   }
29948 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)29949   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
29950     TEST_REQUIRES_X86_XOP;
29951     GemmMicrokernelTester()
29952       .mr(4)
29953       .nr(4)
29954       .kr(2)
29955       .sr(4)
29956       .m(4)
29957       .n(4)
29958       .k(8)
29959       .qmax(128)
29960       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29961   }
29962 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)29963   TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
29964     TEST_REQUIRES_X86_XOP;
29965     GemmMicrokernelTester()
29966       .mr(4)
29967       .nr(4)
29968       .kr(2)
29969       .sr(4)
29970       .m(4)
29971       .n(4)
29972       .k(8)
29973       .cm_stride(7)
29974       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29975   }
29976 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977 
29978 
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)29980   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
29981     TEST_REQUIRES_X86_SSE2;
29982     GemmMicrokernelTester()
29983       .mr(1)
29984       .nr(4)
29985       .kr(8)
29986       .sr(1)
29987       .m(1)
29988       .n(4)
29989       .k(8)
29990       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29991   }
29992 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)29993   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
29994     TEST_REQUIRES_X86_SSE2;
29995     GemmMicrokernelTester()
29996       .mr(1)
29997       .nr(4)
29998       .kr(8)
29999       .sr(1)
30000       .m(1)
30001       .n(4)
30002       .k(8)
30003       .cn_stride(7)
30004       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30005   }
30006 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)30007   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
30008     TEST_REQUIRES_X86_SSE2;
30009     for (uint32_t n = 1; n <= 4; n++) {
30010       for (uint32_t m = 1; m <= 1; m++) {
30011         GemmMicrokernelTester()
30012           .mr(1)
30013           .nr(4)
30014           .kr(8)
30015           .sr(1)
30016           .m(m)
30017           .n(n)
30018           .k(8)
30019           .iterations(1)
30020           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30021       }
30022     }
30023   }
30024 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)30025   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
30026     TEST_REQUIRES_X86_SSE2;
30027     for (uint32_t m = 1; m <= 1; m++) {
30028       GemmMicrokernelTester()
30029         .mr(1)
30030         .nr(4)
30031         .kr(8)
30032         .sr(1)
30033         .m(m)
30034         .n(4)
30035         .k(8)
30036         .iterations(1)
30037         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30038     }
30039   }
30040 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)30041   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
30042     TEST_REQUIRES_X86_SSE2;
30043     for (uint32_t n = 1; n <= 4; n++) {
30044       GemmMicrokernelTester()
30045         .mr(1)
30046         .nr(4)
30047         .kr(8)
30048         .sr(1)
30049         .m(1)
30050         .n(n)
30051         .k(8)
30052         .iterations(1)
30053         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30054     }
30055   }
30056 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)30057   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
30058     TEST_REQUIRES_X86_SSE2;
30059     for (size_t k = 1; k < 8; k++) {
30060       GemmMicrokernelTester()
30061         .mr(1)
30062         .nr(4)
30063         .kr(8)
30064         .sr(1)
30065         .m(1)
30066         .n(4)
30067         .k(k)
30068         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30069     }
30070   }
30071 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)30072   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
30073     TEST_REQUIRES_X86_SSE2;
30074     for (size_t k = 1; k < 8; k++) {
30075       for (uint32_t n = 1; n <= 4; n++) {
30076         for (uint32_t m = 1; m <= 1; m++) {
30077           GemmMicrokernelTester()
30078             .mr(1)
30079             .nr(4)
30080             .kr(8)
30081             .sr(1)
30082             .m(m)
30083             .n(n)
30084             .k(k)
30085             .iterations(1)
30086             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30087         }
30088       }
30089     }
30090   }
30091 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)30092   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
30093     TEST_REQUIRES_X86_SSE2;
30094     for (size_t k = 9; k < 16; k++) {
30095       GemmMicrokernelTester()
30096         .mr(1)
30097         .nr(4)
30098         .kr(8)
30099         .sr(1)
30100         .m(1)
30101         .n(4)
30102         .k(k)
30103         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30104     }
30105   }
30106 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)30107   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
30108     TEST_REQUIRES_X86_SSE2;
30109     for (size_t k = 9; k < 16; k++) {
30110       for (uint32_t n = 1; n <= 4; n++) {
30111         for (uint32_t m = 1; m <= 1; m++) {
30112           GemmMicrokernelTester()
30113             .mr(1)
30114             .nr(4)
30115             .kr(8)
30116             .sr(1)
30117             .m(m)
30118             .n(n)
30119             .k(k)
30120             .iterations(1)
30121             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30122         }
30123       }
30124     }
30125   }
30126 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)30127   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
30128     TEST_REQUIRES_X86_SSE2;
30129     for (size_t k = 16; k <= 80; k += 8) {
30130       GemmMicrokernelTester()
30131         .mr(1)
30132         .nr(4)
30133         .kr(8)
30134         .sr(1)
30135         .m(1)
30136         .n(4)
30137         .k(k)
30138         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30139     }
30140   }
30141 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)30142   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
30143     TEST_REQUIRES_X86_SSE2;
30144     for (size_t k = 16; k <= 80; k += 8) {
30145       for (uint32_t n = 1; n <= 4; n++) {
30146         for (uint32_t m = 1; m <= 1; m++) {
30147           GemmMicrokernelTester()
30148             .mr(1)
30149             .nr(4)
30150             .kr(8)
30151             .sr(1)
30152             .m(m)
30153             .n(n)
30154             .k(k)
30155             .iterations(1)
30156             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30157         }
30158       }
30159     }
30160   }
30161 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)30162   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
30163     TEST_REQUIRES_X86_SSE2;
30164     for (uint32_t n = 5; n < 8; n++) {
30165       for (size_t k = 1; k <= 40; k += 9) {
30166         GemmMicrokernelTester()
30167           .mr(1)
30168           .nr(4)
30169           .kr(8)
30170           .sr(1)
30171           .m(1)
30172           .n(n)
30173           .k(k)
30174           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30175       }
30176     }
30177   }
30178 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)30179   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
30180     TEST_REQUIRES_X86_SSE2;
30181     for (uint32_t n = 5; n < 8; n++) {
30182       for (size_t k = 1; k <= 40; k += 9) {
30183         GemmMicrokernelTester()
30184           .mr(1)
30185           .nr(4)
30186           .kr(8)
30187           .sr(1)
30188           .m(1)
30189           .n(n)
30190           .k(k)
30191           .cn_stride(7)
30192           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30193       }
30194     }
30195   }
30196 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)30197   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
30198     TEST_REQUIRES_X86_SSE2;
30199     for (uint32_t n = 5; n < 8; n++) {
30200       for (size_t k = 1; k <= 40; k += 9) {
30201         for (uint32_t m = 1; m <= 1; m++) {
30202           GemmMicrokernelTester()
30203             .mr(1)
30204             .nr(4)
30205             .kr(8)
30206             .sr(1)
30207             .m(m)
30208             .n(n)
30209             .k(k)
30210             .iterations(1)
30211             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30212         }
30213       }
30214     }
30215   }
30216 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)30217   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
30218     TEST_REQUIRES_X86_SSE2;
30219     for (uint32_t n = 8; n <= 12; n += 4) {
30220       for (size_t k = 1; k <= 40; k += 9) {
30221         GemmMicrokernelTester()
30222           .mr(1)
30223           .nr(4)
30224           .kr(8)
30225           .sr(1)
30226           .m(1)
30227           .n(n)
30228           .k(k)
30229           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30230       }
30231     }
30232   }
30233 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)30234   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
30235     TEST_REQUIRES_X86_SSE2;
30236     for (uint32_t n = 8; n <= 12; n += 4) {
30237       for (size_t k = 1; k <= 40; k += 9) {
30238         GemmMicrokernelTester()
30239           .mr(1)
30240           .nr(4)
30241           .kr(8)
30242           .sr(1)
30243           .m(1)
30244           .n(n)
30245           .k(k)
30246           .cn_stride(7)
30247           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30248       }
30249     }
30250   }
30251 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)30252   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
30253     TEST_REQUIRES_X86_SSE2;
30254     for (uint32_t n = 8; n <= 12; n += 4) {
30255       for (size_t k = 1; k <= 40; k += 9) {
30256         for (uint32_t m = 1; m <= 1; m++) {
30257           GemmMicrokernelTester()
30258             .mr(1)
30259             .nr(4)
30260             .kr(8)
30261             .sr(1)
30262             .m(m)
30263             .n(n)
30264             .k(k)
30265             .iterations(1)
30266             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30267         }
30268       }
30269     }
30270   }
30271 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel)30272   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
30273     TEST_REQUIRES_X86_SSE2;
30274     for (size_t k = 1; k <= 40; k += 9) {
30275       GemmMicrokernelTester()
30276         .mr(1)
30277         .nr(4)
30278         .kr(8)
30279         .sr(1)
30280         .m(1)
30281         .n(4)
30282         .k(k)
30283         .ks(3)
30284         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30285     }
30286   }
30287 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel_subtile)30288   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
30289     TEST_REQUIRES_X86_SSE2;
30290     for (size_t k = 1; k <= 40; k += 9) {
30291       for (uint32_t n = 1; n <= 4; n++) {
30292         for (uint32_t m = 1; m <= 1; m++) {
30293           GemmMicrokernelTester()
30294             .mr(1)
30295             .nr(4)
30296             .kr(8)
30297             .sr(1)
30298             .m(m)
30299             .n(n)
30300             .k(k)
30301             .ks(3)
30302             .iterations(1)
30303             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30304         }
30305       }
30306     }
30307   }
30308 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_small_kernel)30309   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
30310     TEST_REQUIRES_X86_SSE2;
30311     for (uint32_t n = 5; n < 8; n++) {
30312       for (size_t k = 1; k <= 40; k += 9) {
30313         GemmMicrokernelTester()
30314           .mr(1)
30315           .nr(4)
30316           .kr(8)
30317           .sr(1)
30318           .m(1)
30319           .n(n)
30320           .k(k)
30321           .ks(3)
30322           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30323       }
30324     }
30325   }
30326 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_small_kernel)30327   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
30328     TEST_REQUIRES_X86_SSE2;
30329     for (uint32_t n = 8; n <= 12; n += 4) {
30330       for (size_t k = 1; k <= 40; k += 9) {
30331         GemmMicrokernelTester()
30332           .mr(1)
30333           .nr(4)
30334           .kr(8)
30335           .sr(1)
30336           .m(1)
30337           .n(n)
30338           .k(k)
30339           .ks(3)
30340           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30341       }
30342     }
30343   }
30344 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)30345   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
30346     TEST_REQUIRES_X86_SSE2;
30347     for (size_t k = 1; k <= 40; k += 9) {
30348       for (uint32_t n = 1; n <= 4; n++) {
30349         for (uint32_t m = 1; m <= 1; m++) {
30350           GemmMicrokernelTester()
30351             .mr(1)
30352             .nr(4)
30353             .kr(8)
30354             .sr(1)
30355             .m(m)
30356             .n(n)
30357             .k(k)
30358             .cm_stride(7)
30359             .iterations(1)
30360             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30361         }
30362       }
30363     }
30364   }
30365 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,a_offset)30366   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
30367     TEST_REQUIRES_X86_SSE2;
30368     for (size_t k = 1; k <= 40; k += 9) {
30369       GemmMicrokernelTester()
30370         .mr(1)
30371         .nr(4)
30372         .kr(8)
30373         .sr(1)
30374         .m(1)
30375         .n(4)
30376         .k(k)
30377         .ks(3)
30378         .a_offset(43)
30379         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30380     }
30381   }
30382 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,zero)30383   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
30384     TEST_REQUIRES_X86_SSE2;
30385     for (size_t k = 1; k <= 40; k += 9) {
30386       for (uint32_t mz = 0; mz < 1; mz++) {
30387         GemmMicrokernelTester()
30388           .mr(1)
30389           .nr(4)
30390           .kr(8)
30391           .sr(1)
30392           .m(1)
30393           .n(4)
30394           .k(k)
30395           .ks(3)
30396           .a_offset(43)
30397           .zero_index(mz)
30398           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30399       }
30400     }
30401   }
30402 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)30403   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
30404     TEST_REQUIRES_X86_SSE2;
30405     GemmMicrokernelTester()
30406       .mr(1)
30407       .nr(4)
30408       .kr(8)
30409       .sr(1)
30410       .m(1)
30411       .n(4)
30412       .k(8)
30413       .qmin(128)
30414       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30415   }
30416 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)30417   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
30418     TEST_REQUIRES_X86_SSE2;
30419     GemmMicrokernelTester()
30420       .mr(1)
30421       .nr(4)
30422       .kr(8)
30423       .sr(1)
30424       .m(1)
30425       .n(4)
30426       .k(8)
30427       .qmax(128)
30428       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30429   }
30430 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)30431   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
30432     TEST_REQUIRES_X86_SSE2;
30433     GemmMicrokernelTester()
30434       .mr(1)
30435       .nr(4)
30436       .kr(8)
30437       .sr(1)
30438       .m(1)
30439       .n(4)
30440       .k(8)
30441       .cm_stride(7)
30442       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30443   }
30444 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445 
30446 
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)30448   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
30449     TEST_REQUIRES_X86_SSE2;
30450     GemmMicrokernelTester()
30451       .mr(2)
30452       .nr(4)
30453       .kr(8)
30454       .sr(1)
30455       .m(2)
30456       .n(4)
30457       .k(8)
30458       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30459   }
30460 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)30461   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
30462     TEST_REQUIRES_X86_SSE2;
30463     GemmMicrokernelTester()
30464       .mr(2)
30465       .nr(4)
30466       .kr(8)
30467       .sr(1)
30468       .m(2)
30469       .n(4)
30470       .k(8)
30471       .cn_stride(7)
30472       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30473   }
30474 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)30475   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
30476     TEST_REQUIRES_X86_SSE2;
30477     for (uint32_t n = 1; n <= 4; n++) {
30478       for (uint32_t m = 1; m <= 2; m++) {
30479         GemmMicrokernelTester()
30480           .mr(2)
30481           .nr(4)
30482           .kr(8)
30483           .sr(1)
30484           .m(m)
30485           .n(n)
30486           .k(8)
30487           .iterations(1)
30488           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30489       }
30490     }
30491   }
30492 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)30493   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
30494     TEST_REQUIRES_X86_SSE2;
30495     for (uint32_t m = 1; m <= 2; m++) {
30496       GemmMicrokernelTester()
30497         .mr(2)
30498         .nr(4)
30499         .kr(8)
30500         .sr(1)
30501         .m(m)
30502         .n(4)
30503         .k(8)
30504         .iterations(1)
30505         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30506     }
30507   }
30508 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)30509   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
30510     TEST_REQUIRES_X86_SSE2;
30511     for (uint32_t n = 1; n <= 4; n++) {
30512       GemmMicrokernelTester()
30513         .mr(2)
30514         .nr(4)
30515         .kr(8)
30516         .sr(1)
30517         .m(2)
30518         .n(n)
30519         .k(8)
30520         .iterations(1)
30521         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30522     }
30523   }
30524 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)30525   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
30526     TEST_REQUIRES_X86_SSE2;
30527     for (size_t k = 1; k < 8; k++) {
30528       GemmMicrokernelTester()
30529         .mr(2)
30530         .nr(4)
30531         .kr(8)
30532         .sr(1)
30533         .m(2)
30534         .n(4)
30535         .k(k)
30536         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30537     }
30538   }
30539 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)30540   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
30541     TEST_REQUIRES_X86_SSE2;
30542     for (size_t k = 1; k < 8; k++) {
30543       for (uint32_t n = 1; n <= 4; n++) {
30544         for (uint32_t m = 1; m <= 2; m++) {
30545           GemmMicrokernelTester()
30546             .mr(2)
30547             .nr(4)
30548             .kr(8)
30549             .sr(1)
30550             .m(m)
30551             .n(n)
30552             .k(k)
30553             .iterations(1)
30554             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30555         }
30556       }
30557     }
30558   }
30559 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)30560   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
30561     TEST_REQUIRES_X86_SSE2;
30562     for (size_t k = 9; k < 16; k++) {
30563       GemmMicrokernelTester()
30564         .mr(2)
30565         .nr(4)
30566         .kr(8)
30567         .sr(1)
30568         .m(2)
30569         .n(4)
30570         .k(k)
30571         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30572     }
30573   }
30574 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)30575   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
30576     TEST_REQUIRES_X86_SSE2;
30577     for (size_t k = 9; k < 16; k++) {
30578       for (uint32_t n = 1; n <= 4; n++) {
30579         for (uint32_t m = 1; m <= 2; m++) {
30580           GemmMicrokernelTester()
30581             .mr(2)
30582             .nr(4)
30583             .kr(8)
30584             .sr(1)
30585             .m(m)
30586             .n(n)
30587             .k(k)
30588             .iterations(1)
30589             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30590         }
30591       }
30592     }
30593   }
30594 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)30595   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
30596     TEST_REQUIRES_X86_SSE2;
30597     for (size_t k = 16; k <= 80; k += 8) {
30598       GemmMicrokernelTester()
30599         .mr(2)
30600         .nr(4)
30601         .kr(8)
30602         .sr(1)
30603         .m(2)
30604         .n(4)
30605         .k(k)
30606         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30607     }
30608   }
30609 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)30610   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
30611     TEST_REQUIRES_X86_SSE2;
30612     for (size_t k = 16; k <= 80; k += 8) {
30613       for (uint32_t n = 1; n <= 4; n++) {
30614         for (uint32_t m = 1; m <= 2; m++) {
30615           GemmMicrokernelTester()
30616             .mr(2)
30617             .nr(4)
30618             .kr(8)
30619             .sr(1)
30620             .m(m)
30621             .n(n)
30622             .k(k)
30623             .iterations(1)
30624             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30625         }
30626       }
30627     }
30628   }
30629 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)30630   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
30631     TEST_REQUIRES_X86_SSE2;
30632     for (uint32_t n = 5; n < 8; n++) {
30633       for (size_t k = 1; k <= 40; k += 9) {
30634         GemmMicrokernelTester()
30635           .mr(2)
30636           .nr(4)
30637           .kr(8)
30638           .sr(1)
30639           .m(2)
30640           .n(n)
30641           .k(k)
30642           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30643       }
30644     }
30645   }
30646 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)30647   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
30648     TEST_REQUIRES_X86_SSE2;
30649     for (uint32_t n = 5; n < 8; n++) {
30650       for (size_t k = 1; k <= 40; k += 9) {
30651         GemmMicrokernelTester()
30652           .mr(2)
30653           .nr(4)
30654           .kr(8)
30655           .sr(1)
30656           .m(2)
30657           .n(n)
30658           .k(k)
30659           .cn_stride(7)
30660           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30661       }
30662     }
30663   }
30664 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)30665   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
30666     TEST_REQUIRES_X86_SSE2;
30667     for (uint32_t n = 5; n < 8; n++) {
30668       for (size_t k = 1; k <= 40; k += 9) {
30669         for (uint32_t m = 1; m <= 2; m++) {
30670           GemmMicrokernelTester()
30671             .mr(2)
30672             .nr(4)
30673             .kr(8)
30674             .sr(1)
30675             .m(m)
30676             .n(n)
30677             .k(k)
30678             .iterations(1)
30679             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30680         }
30681       }
30682     }
30683   }
30684 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)30685   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
30686     TEST_REQUIRES_X86_SSE2;
30687     for (uint32_t n = 8; n <= 12; n += 4) {
30688       for (size_t k = 1; k <= 40; k += 9) {
30689         GemmMicrokernelTester()
30690           .mr(2)
30691           .nr(4)
30692           .kr(8)
30693           .sr(1)
30694           .m(2)
30695           .n(n)
30696           .k(k)
30697           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30698       }
30699     }
30700   }
30701 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)30702   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
30703     TEST_REQUIRES_X86_SSE2;
30704     for (uint32_t n = 8; n <= 12; n += 4) {
30705       for (size_t k = 1; k <= 40; k += 9) {
30706         GemmMicrokernelTester()
30707           .mr(2)
30708           .nr(4)
30709           .kr(8)
30710           .sr(1)
30711           .m(2)
30712           .n(n)
30713           .k(k)
30714           .cn_stride(7)
30715           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30716       }
30717     }
30718   }
30719 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)30720   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
30721     TEST_REQUIRES_X86_SSE2;
30722     for (uint32_t n = 8; n <= 12; n += 4) {
30723       for (size_t k = 1; k <= 40; k += 9) {
30724         for (uint32_t m = 1; m <= 2; m++) {
30725           GemmMicrokernelTester()
30726             .mr(2)
30727             .nr(4)
30728             .kr(8)
30729             .sr(1)
30730             .m(m)
30731             .n(n)
30732             .k(k)
30733             .iterations(1)
30734             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30735         }
30736       }
30737     }
30738   }
30739 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)30740   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
30741     TEST_REQUIRES_X86_SSE2;
30742     for (size_t k = 1; k <= 40; k += 9) {
30743       GemmMicrokernelTester()
30744         .mr(2)
30745         .nr(4)
30746         .kr(8)
30747         .sr(1)
30748         .m(2)
30749         .n(4)
30750         .k(k)
30751         .ks(3)
30752         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30753     }
30754   }
30755 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)30756   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
30757     TEST_REQUIRES_X86_SSE2;
30758     for (size_t k = 1; k <= 40; k += 9) {
30759       for (uint32_t n = 1; n <= 4; n++) {
30760         for (uint32_t m = 1; m <= 2; m++) {
30761           GemmMicrokernelTester()
30762             .mr(2)
30763             .nr(4)
30764             .kr(8)
30765             .sr(1)
30766             .m(m)
30767             .n(n)
30768             .k(k)
30769             .ks(3)
30770             .iterations(1)
30771             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30772         }
30773       }
30774     }
30775   }
30776 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)30777   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
30778     TEST_REQUIRES_X86_SSE2;
30779     for (uint32_t n = 5; n < 8; n++) {
30780       for (size_t k = 1; k <= 40; k += 9) {
30781         GemmMicrokernelTester()
30782           .mr(2)
30783           .nr(4)
30784           .kr(8)
30785           .sr(1)
30786           .m(2)
30787           .n(n)
30788           .k(k)
30789           .ks(3)
30790           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30791       }
30792     }
30793   }
30794 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)30795   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
30796     TEST_REQUIRES_X86_SSE2;
30797     for (uint32_t n = 8; n <= 12; n += 4) {
30798       for (size_t k = 1; k <= 40; k += 9) {
30799         GemmMicrokernelTester()
30800           .mr(2)
30801           .nr(4)
30802           .kr(8)
30803           .sr(1)
30804           .m(2)
30805           .n(n)
30806           .k(k)
30807           .ks(3)
30808           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30809       }
30810     }
30811   }
30812 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)30813   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
30814     TEST_REQUIRES_X86_SSE2;
30815     for (size_t k = 1; k <= 40; k += 9) {
30816       for (uint32_t n = 1; n <= 4; n++) {
30817         for (uint32_t m = 1; m <= 2; m++) {
30818           GemmMicrokernelTester()
30819             .mr(2)
30820             .nr(4)
30821             .kr(8)
30822             .sr(1)
30823             .m(m)
30824             .n(n)
30825             .k(k)
30826             .cm_stride(7)
30827             .iterations(1)
30828             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30829         }
30830       }
30831     }
30832   }
30833 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)30834   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
30835     TEST_REQUIRES_X86_SSE2;
30836     for (size_t k = 1; k <= 40; k += 9) {
30837       GemmMicrokernelTester()
30838         .mr(2)
30839         .nr(4)
30840         .kr(8)
30841         .sr(1)
30842         .m(2)
30843         .n(4)
30844         .k(k)
30845         .ks(3)
30846         .a_offset(83)
30847         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30848     }
30849   }
30850 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)30851   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
30852     TEST_REQUIRES_X86_SSE2;
30853     for (size_t k = 1; k <= 40; k += 9) {
30854       for (uint32_t mz = 0; mz < 2; mz++) {
30855         GemmMicrokernelTester()
30856           .mr(2)
30857           .nr(4)
30858           .kr(8)
30859           .sr(1)
30860           .m(2)
30861           .n(4)
30862           .k(k)
30863           .ks(3)
30864           .a_offset(83)
30865           .zero_index(mz)
30866           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30867       }
30868     }
30869   }
30870 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)30871   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
30872     TEST_REQUIRES_X86_SSE2;
30873     GemmMicrokernelTester()
30874       .mr(2)
30875       .nr(4)
30876       .kr(8)
30877       .sr(1)
30878       .m(2)
30879       .n(4)
30880       .k(8)
30881       .qmin(128)
30882       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30883   }
30884 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)30885   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
30886     TEST_REQUIRES_X86_SSE2;
30887     GemmMicrokernelTester()
30888       .mr(2)
30889       .nr(4)
30890       .kr(8)
30891       .sr(1)
30892       .m(2)
30893       .n(4)
30894       .k(8)
30895       .qmax(128)
30896       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30897   }
30898 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)30899   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
30900     TEST_REQUIRES_X86_SSE2;
30901     GemmMicrokernelTester()
30902       .mr(2)
30903       .nr(4)
30904       .kr(8)
30905       .sr(1)
30906       .m(2)
30907       .n(4)
30908       .k(8)
30909       .cm_stride(7)
30910       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30911   }
30912 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913 
30914 
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8)30916   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8) {
30917     TEST_REQUIRES_X86_SSSE3;
30918     GemmMicrokernelTester()
30919       .mr(3)
30920       .nr(4)
30921       .kr(8)
30922       .sr(1)
30923       .m(3)
30924       .n(4)
30925       .k(8)
30926       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30927   }
30928 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cn)30929   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cn) {
30930     TEST_REQUIRES_X86_SSSE3;
30931     GemmMicrokernelTester()
30932       .mr(3)
30933       .nr(4)
30934       .kr(8)
30935       .sr(1)
30936       .m(3)
30937       .n(4)
30938       .k(8)
30939       .cn_stride(7)
30940       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30941   }
30942 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile)30943   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
30944     TEST_REQUIRES_X86_SSSE3;
30945     for (uint32_t n = 1; n <= 4; n++) {
30946       for (uint32_t m = 1; m <= 3; m++) {
30947         GemmMicrokernelTester()
30948           .mr(3)
30949           .nr(4)
30950           .kr(8)
30951           .sr(1)
30952           .m(m)
30953           .n(n)
30954           .k(8)
30955           .iterations(1)
30956           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30957       }
30958     }
30959   }
30960 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile_m)30961   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
30962     TEST_REQUIRES_X86_SSSE3;
30963     for (uint32_t m = 1; m <= 3; m++) {
30964       GemmMicrokernelTester()
30965         .mr(3)
30966         .nr(4)
30967         .kr(8)
30968         .sr(1)
30969         .m(m)
30970         .n(4)
30971         .k(8)
30972         .iterations(1)
30973         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30974     }
30975   }
30976 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile_n)30977   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
30978     TEST_REQUIRES_X86_SSSE3;
30979     for (uint32_t n = 1; n <= 4; n++) {
30980       GemmMicrokernelTester()
30981         .mr(3)
30982         .nr(4)
30983         .kr(8)
30984         .sr(1)
30985         .m(3)
30986         .n(n)
30987         .k(8)
30988         .iterations(1)
30989         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30990     }
30991   }
30992 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_lt_8)30993   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8) {
30994     TEST_REQUIRES_X86_SSSE3;
30995     for (size_t k = 1; k < 8; k++) {
30996       GemmMicrokernelTester()
30997         .mr(3)
30998         .nr(4)
30999         .kr(8)
31000         .sr(1)
31001         .m(3)
31002         .n(4)
31003         .k(k)
31004         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31005     }
31006   }
31007 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_lt_8_subtile)31008   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
31009     TEST_REQUIRES_X86_SSSE3;
31010     for (size_t k = 1; k < 8; k++) {
31011       for (uint32_t n = 1; n <= 4; n++) {
31012         for (uint32_t m = 1; m <= 3; m++) {
31013           GemmMicrokernelTester()
31014             .mr(3)
31015             .nr(4)
31016             .kr(8)
31017             .sr(1)
31018             .m(m)
31019             .n(n)
31020             .k(k)
31021             .iterations(1)
31022             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31023         }
31024       }
31025     }
31026   }
31027 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_gt_8)31028   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8) {
31029     TEST_REQUIRES_X86_SSSE3;
31030     for (size_t k = 9; k < 16; k++) {
31031       GemmMicrokernelTester()
31032         .mr(3)
31033         .nr(4)
31034         .kr(8)
31035         .sr(1)
31036         .m(3)
31037         .n(4)
31038         .k(k)
31039         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31040     }
31041   }
31042 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_gt_8_subtile)31043   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
31044     TEST_REQUIRES_X86_SSSE3;
31045     for (size_t k = 9; k < 16; k++) {
31046       for (uint32_t n = 1; n <= 4; n++) {
31047         for (uint32_t m = 1; m <= 3; m++) {
31048           GemmMicrokernelTester()
31049             .mr(3)
31050             .nr(4)
31051             .kr(8)
31052             .sr(1)
31053             .m(m)
31054             .n(n)
31055             .k(k)
31056             .iterations(1)
31057             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31058         }
31059       }
31060     }
31061   }
31062 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_div_8)31063   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8) {
31064     TEST_REQUIRES_X86_SSSE3;
31065     for (size_t k = 16; k <= 80; k += 8) {
31066       GemmMicrokernelTester()
31067         .mr(3)
31068         .nr(4)
31069         .kr(8)
31070         .sr(1)
31071         .m(3)
31072         .n(4)
31073         .k(k)
31074         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31075     }
31076   }
31077 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_div_8_subtile)31078   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8_subtile) {
31079     TEST_REQUIRES_X86_SSSE3;
31080     for (size_t k = 16; k <= 80; k += 8) {
31081       for (uint32_t n = 1; n <= 4; n++) {
31082         for (uint32_t m = 1; m <= 3; m++) {
31083           GemmMicrokernelTester()
31084             .mr(3)
31085             .nr(4)
31086             .kr(8)
31087             .sr(1)
31088             .m(m)
31089             .n(n)
31090             .k(k)
31091             .iterations(1)
31092             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31093         }
31094       }
31095     }
31096   }
31097 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4)31098   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4) {
31099     TEST_REQUIRES_X86_SSSE3;
31100     for (uint32_t n = 5; n < 8; n++) {
31101       for (size_t k = 1; k <= 40; k += 9) {
31102         GemmMicrokernelTester()
31103           .mr(3)
31104           .nr(4)
31105           .kr(8)
31106           .sr(1)
31107           .m(3)
31108           .n(n)
31109           .k(k)
31110           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31111       }
31112     }
31113   }
31114 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_strided_cn)31115   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
31116     TEST_REQUIRES_X86_SSSE3;
31117     for (uint32_t n = 5; n < 8; n++) {
31118       for (size_t k = 1; k <= 40; k += 9) {
31119         GemmMicrokernelTester()
31120           .mr(3)
31121           .nr(4)
31122           .kr(8)
31123           .sr(1)
31124           .m(3)
31125           .n(n)
31126           .k(k)
31127           .cn_stride(7)
31128           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31129       }
31130     }
31131   }
31132 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_subtile)31133   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
31134     TEST_REQUIRES_X86_SSSE3;
31135     for (uint32_t n = 5; n < 8; n++) {
31136       for (size_t k = 1; k <= 40; k += 9) {
31137         for (uint32_t m = 1; m <= 3; m++) {
31138           GemmMicrokernelTester()
31139             .mr(3)
31140             .nr(4)
31141             .kr(8)
31142             .sr(1)
31143             .m(m)
31144             .n(n)
31145             .k(k)
31146             .iterations(1)
31147             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31148         }
31149       }
31150     }
31151   }
31152 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4)31153   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4) {
31154     TEST_REQUIRES_X86_SSSE3;
31155     for (uint32_t n = 8; n <= 12; n += 4) {
31156       for (size_t k = 1; k <= 40; k += 9) {
31157         GemmMicrokernelTester()
31158           .mr(3)
31159           .nr(4)
31160           .kr(8)
31161           .sr(1)
31162           .m(3)
31163           .n(n)
31164           .k(k)
31165           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31166       }
31167     }
31168   }
31169 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_strided_cn)31170   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
31171     TEST_REQUIRES_X86_SSSE3;
31172     for (uint32_t n = 8; n <= 12; n += 4) {
31173       for (size_t k = 1; k <= 40; k += 9) {
31174         GemmMicrokernelTester()
31175           .mr(3)
31176           .nr(4)
31177           .kr(8)
31178           .sr(1)
31179           .m(3)
31180           .n(n)
31181           .k(k)
31182           .cn_stride(7)
31183           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31184       }
31185     }
31186   }
31187 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_subtile)31188   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_subtile) {
31189     TEST_REQUIRES_X86_SSSE3;
31190     for (uint32_t n = 8; n <= 12; n += 4) {
31191       for (size_t k = 1; k <= 40; k += 9) {
31192         for (uint32_t m = 1; m <= 3; m++) {
31193           GemmMicrokernelTester()
31194             .mr(3)
31195             .nr(4)
31196             .kr(8)
31197             .sr(1)
31198             .m(m)
31199             .n(n)
31200             .k(k)
31201             .iterations(1)
31202             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31203         }
31204       }
31205     }
31206   }
31207 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,small_kernel)31208   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel) {
31209     TEST_REQUIRES_X86_SSSE3;
31210     for (size_t k = 1; k <= 40; k += 9) {
31211       GemmMicrokernelTester()
31212         .mr(3)
31213         .nr(4)
31214         .kr(8)
31215         .sr(1)
31216         .m(3)
31217         .n(4)
31218         .k(k)
31219         .ks(3)
31220         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31221     }
31222   }
31223 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,small_kernel_subtile)31224   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel_subtile) {
31225     TEST_REQUIRES_X86_SSSE3;
31226     for (size_t k = 1; k <= 40; k += 9) {
31227       for (uint32_t n = 1; n <= 4; n++) {
31228         for (uint32_t m = 1; m <= 3; m++) {
31229           GemmMicrokernelTester()
31230             .mr(3)
31231             .nr(4)
31232             .kr(8)
31233             .sr(1)
31234             .m(m)
31235             .n(n)
31236             .k(k)
31237             .ks(3)
31238             .iterations(1)
31239             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31240         }
31241       }
31242     }
31243   }
31244 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_small_kernel)31245   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
31246     TEST_REQUIRES_X86_SSSE3;
31247     for (uint32_t n = 5; n < 8; n++) {
31248       for (size_t k = 1; k <= 40; k += 9) {
31249         GemmMicrokernelTester()
31250           .mr(3)
31251           .nr(4)
31252           .kr(8)
31253           .sr(1)
31254           .m(3)
31255           .n(n)
31256           .k(k)
31257           .ks(3)
31258           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31259       }
31260     }
31261   }
31262 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_small_kernel)31263   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_small_kernel) {
31264     TEST_REQUIRES_X86_SSSE3;
31265     for (uint32_t n = 8; n <= 12; n += 4) {
31266       for (size_t k = 1; k <= 40; k += 9) {
31267         GemmMicrokernelTester()
31268           .mr(3)
31269           .nr(4)
31270           .kr(8)
31271           .sr(1)
31272           .m(3)
31273           .n(n)
31274           .k(k)
31275           .ks(3)
31276           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31277       }
31278     }
31279   }
31280 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cm_subtile)31281   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm_subtile) {
31282     TEST_REQUIRES_X86_SSSE3;
31283     for (size_t k = 1; k <= 40; k += 9) {
31284       for (uint32_t n = 1; n <= 4; n++) {
31285         for (uint32_t m = 1; m <= 3; m++) {
31286           GemmMicrokernelTester()
31287             .mr(3)
31288             .nr(4)
31289             .kr(8)
31290             .sr(1)
31291             .m(m)
31292             .n(n)
31293             .k(k)
31294             .cm_stride(7)
31295             .iterations(1)
31296             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31297         }
31298       }
31299     }
31300   }
31301 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,a_offset)31302   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, a_offset) {
31303     TEST_REQUIRES_X86_SSSE3;
31304     for (size_t k = 1; k <= 40; k += 9) {
31305       GemmMicrokernelTester()
31306         .mr(3)
31307         .nr(4)
31308         .kr(8)
31309         .sr(1)
31310         .m(3)
31311         .n(4)
31312         .k(k)
31313         .ks(3)
31314         .a_offset(127)
31315         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31316     }
31317   }
31318 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,zero)31319   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, zero) {
31320     TEST_REQUIRES_X86_SSSE3;
31321     for (size_t k = 1; k <= 40; k += 9) {
31322       for (uint32_t mz = 0; mz < 3; mz++) {
31323         GemmMicrokernelTester()
31324           .mr(3)
31325           .nr(4)
31326           .kr(8)
31327           .sr(1)
31328           .m(3)
31329           .n(4)
31330           .k(k)
31331           .ks(3)
31332           .a_offset(127)
31333           .zero_index(mz)
31334           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31335       }
31336     }
31337   }
31338 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,qmin)31339   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmin) {
31340     TEST_REQUIRES_X86_SSSE3;
31341     GemmMicrokernelTester()
31342       .mr(3)
31343       .nr(4)
31344       .kr(8)
31345       .sr(1)
31346       .m(3)
31347       .n(4)
31348       .k(8)
31349       .qmin(128)
31350       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31351   }
31352 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,qmax)31353   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmax) {
31354     TEST_REQUIRES_X86_SSSE3;
31355     GemmMicrokernelTester()
31356       .mr(3)
31357       .nr(4)
31358       .kr(8)
31359       .sr(1)
31360       .m(3)
31361       .n(4)
31362       .k(8)
31363       .qmax(128)
31364       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31365   }
31366 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cm)31367   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm) {
31368     TEST_REQUIRES_X86_SSSE3;
31369     GemmMicrokernelTester()
31370       .mr(3)
31371       .nr(4)
31372       .kr(8)
31373       .sr(1)
31374       .m(3)
31375       .n(4)
31376       .k(8)
31377       .cm_stride(7)
31378       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31379   }
31380 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381 
31382 
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)31384   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
31385     TEST_REQUIRES_X86_SSE41;
31386     GemmMicrokernelTester()
31387       .mr(3)
31388       .nr(4)
31389       .kr(8)
31390       .sr(1)
31391       .m(3)
31392       .n(4)
31393       .k(8)
31394       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31395   }
31396 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)31397   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
31398     TEST_REQUIRES_X86_SSE41;
31399     GemmMicrokernelTester()
31400       .mr(3)
31401       .nr(4)
31402       .kr(8)
31403       .sr(1)
31404       .m(3)
31405       .n(4)
31406       .k(8)
31407       .cn_stride(7)
31408       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31409   }
31410 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)31411   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
31412     TEST_REQUIRES_X86_SSE41;
31413     for (uint32_t n = 1; n <= 4; n++) {
31414       for (uint32_t m = 1; m <= 3; m++) {
31415         GemmMicrokernelTester()
31416           .mr(3)
31417           .nr(4)
31418           .kr(8)
31419           .sr(1)
31420           .m(m)
31421           .n(n)
31422           .k(8)
31423           .iterations(1)
31424           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31425       }
31426     }
31427   }
31428 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)31429   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
31430     TEST_REQUIRES_X86_SSE41;
31431     for (uint32_t m = 1; m <= 3; m++) {
31432       GemmMicrokernelTester()
31433         .mr(3)
31434         .nr(4)
31435         .kr(8)
31436         .sr(1)
31437         .m(m)
31438         .n(4)
31439         .k(8)
31440         .iterations(1)
31441         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442     }
31443   }
31444 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)31445   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
31446     TEST_REQUIRES_X86_SSE41;
31447     for (uint32_t n = 1; n <= 4; n++) {
31448       GemmMicrokernelTester()
31449         .mr(3)
31450         .nr(4)
31451         .kr(8)
31452         .sr(1)
31453         .m(3)
31454         .n(n)
31455         .k(8)
31456         .iterations(1)
31457         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31458     }
31459   }
31460 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)31461   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
31462     TEST_REQUIRES_X86_SSE41;
31463     for (size_t k = 1; k < 8; k++) {
31464       GemmMicrokernelTester()
31465         .mr(3)
31466         .nr(4)
31467         .kr(8)
31468         .sr(1)
31469         .m(3)
31470         .n(4)
31471         .k(k)
31472         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473     }
31474   }
31475 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)31476   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
31477     TEST_REQUIRES_X86_SSE41;
31478     for (size_t k = 1; k < 8; k++) {
31479       for (uint32_t n = 1; n <= 4; n++) {
31480         for (uint32_t m = 1; m <= 3; m++) {
31481           GemmMicrokernelTester()
31482             .mr(3)
31483             .nr(4)
31484             .kr(8)
31485             .sr(1)
31486             .m(m)
31487             .n(n)
31488             .k(k)
31489             .iterations(1)
31490             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31491         }
31492       }
31493     }
31494   }
31495 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)31496   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
31497     TEST_REQUIRES_X86_SSE41;
31498     for (size_t k = 9; k < 16; k++) {
31499       GemmMicrokernelTester()
31500         .mr(3)
31501         .nr(4)
31502         .kr(8)
31503         .sr(1)
31504         .m(3)
31505         .n(4)
31506         .k(k)
31507         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31508     }
31509   }
31510 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)31511   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
31512     TEST_REQUIRES_X86_SSE41;
31513     for (size_t k = 9; k < 16; k++) {
31514       for (uint32_t n = 1; n <= 4; n++) {
31515         for (uint32_t m = 1; m <= 3; m++) {
31516           GemmMicrokernelTester()
31517             .mr(3)
31518             .nr(4)
31519             .kr(8)
31520             .sr(1)
31521             .m(m)
31522             .n(n)
31523             .k(k)
31524             .iterations(1)
31525             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31526         }
31527       }
31528     }
31529   }
31530 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)31531   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
31532     TEST_REQUIRES_X86_SSE41;
31533     for (size_t k = 16; k <= 80; k += 8) {
31534       GemmMicrokernelTester()
31535         .mr(3)
31536         .nr(4)
31537         .kr(8)
31538         .sr(1)
31539         .m(3)
31540         .n(4)
31541         .k(k)
31542         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31543     }
31544   }
31545 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)31546   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
31547     TEST_REQUIRES_X86_SSE41;
31548     for (size_t k = 16; k <= 80; k += 8) {
31549       for (uint32_t n = 1; n <= 4; n++) {
31550         for (uint32_t m = 1; m <= 3; m++) {
31551           GemmMicrokernelTester()
31552             .mr(3)
31553             .nr(4)
31554             .kr(8)
31555             .sr(1)
31556             .m(m)
31557             .n(n)
31558             .k(k)
31559             .iterations(1)
31560             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31561         }
31562       }
31563     }
31564   }
31565 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)31566   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
31567     TEST_REQUIRES_X86_SSE41;
31568     for (uint32_t n = 5; n < 8; n++) {
31569       for (size_t k = 1; k <= 40; k += 9) {
31570         GemmMicrokernelTester()
31571           .mr(3)
31572           .nr(4)
31573           .kr(8)
31574           .sr(1)
31575           .m(3)
31576           .n(n)
31577           .k(k)
31578           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31579       }
31580     }
31581   }
31582 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)31583   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
31584     TEST_REQUIRES_X86_SSE41;
31585     for (uint32_t n = 5; n < 8; n++) {
31586       for (size_t k = 1; k <= 40; k += 9) {
31587         GemmMicrokernelTester()
31588           .mr(3)
31589           .nr(4)
31590           .kr(8)
31591           .sr(1)
31592           .m(3)
31593           .n(n)
31594           .k(k)
31595           .cn_stride(7)
31596           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31597       }
31598     }
31599   }
31600 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)31601   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
31602     TEST_REQUIRES_X86_SSE41;
31603     for (uint32_t n = 5; n < 8; n++) {
31604       for (size_t k = 1; k <= 40; k += 9) {
31605         for (uint32_t m = 1; m <= 3; m++) {
31606           GemmMicrokernelTester()
31607             .mr(3)
31608             .nr(4)
31609             .kr(8)
31610             .sr(1)
31611             .m(m)
31612             .n(n)
31613             .k(k)
31614             .iterations(1)
31615             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31616         }
31617       }
31618     }
31619   }
31620 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)31621   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
31622     TEST_REQUIRES_X86_SSE41;
31623     for (uint32_t n = 8; n <= 12; n += 4) {
31624       for (size_t k = 1; k <= 40; k += 9) {
31625         GemmMicrokernelTester()
31626           .mr(3)
31627           .nr(4)
31628           .kr(8)
31629           .sr(1)
31630           .m(3)
31631           .n(n)
31632           .k(k)
31633           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31634       }
31635     }
31636   }
31637 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)31638   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
31639     TEST_REQUIRES_X86_SSE41;
31640     for (uint32_t n = 8; n <= 12; n += 4) {
31641       for (size_t k = 1; k <= 40; k += 9) {
31642         GemmMicrokernelTester()
31643           .mr(3)
31644           .nr(4)
31645           .kr(8)
31646           .sr(1)
31647           .m(3)
31648           .n(n)
31649           .k(k)
31650           .cn_stride(7)
31651           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31652       }
31653     }
31654   }
31655 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)31656   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
31657     TEST_REQUIRES_X86_SSE41;
31658     for (uint32_t n = 8; n <= 12; n += 4) {
31659       for (size_t k = 1; k <= 40; k += 9) {
31660         for (uint32_t m = 1; m <= 3; m++) {
31661           GemmMicrokernelTester()
31662             .mr(3)
31663             .nr(4)
31664             .kr(8)
31665             .sr(1)
31666             .m(m)
31667             .n(n)
31668             .k(k)
31669             .iterations(1)
31670             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31671         }
31672       }
31673     }
31674   }
31675 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)31676   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
31677     TEST_REQUIRES_X86_SSE41;
31678     for (size_t k = 1; k <= 40; k += 9) {
31679       GemmMicrokernelTester()
31680         .mr(3)
31681         .nr(4)
31682         .kr(8)
31683         .sr(1)
31684         .m(3)
31685         .n(4)
31686         .k(k)
31687         .ks(3)
31688         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31689     }
31690   }
31691 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)31692   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
31693     TEST_REQUIRES_X86_SSE41;
31694     for (size_t k = 1; k <= 40; k += 9) {
31695       for (uint32_t n = 1; n <= 4; n++) {
31696         for (uint32_t m = 1; m <= 3; m++) {
31697           GemmMicrokernelTester()
31698             .mr(3)
31699             .nr(4)
31700             .kr(8)
31701             .sr(1)
31702             .m(m)
31703             .n(n)
31704             .k(k)
31705             .ks(3)
31706             .iterations(1)
31707             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31708         }
31709       }
31710     }
31711   }
31712 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)31713   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
31714     TEST_REQUIRES_X86_SSE41;
31715     for (uint32_t n = 5; n < 8; n++) {
31716       for (size_t k = 1; k <= 40; k += 9) {
31717         GemmMicrokernelTester()
31718           .mr(3)
31719           .nr(4)
31720           .kr(8)
31721           .sr(1)
31722           .m(3)
31723           .n(n)
31724           .k(k)
31725           .ks(3)
31726           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31727       }
31728     }
31729   }
31730 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)31731   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
31732     TEST_REQUIRES_X86_SSE41;
31733     for (uint32_t n = 8; n <= 12; n += 4) {
31734       for (size_t k = 1; k <= 40; k += 9) {
31735         GemmMicrokernelTester()
31736           .mr(3)
31737           .nr(4)
31738           .kr(8)
31739           .sr(1)
31740           .m(3)
31741           .n(n)
31742           .k(k)
31743           .ks(3)
31744           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31745       }
31746     }
31747   }
31748 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)31749   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
31750     TEST_REQUIRES_X86_SSE41;
31751     for (size_t k = 1; k <= 40; k += 9) {
31752       for (uint32_t n = 1; n <= 4; n++) {
31753         for (uint32_t m = 1; m <= 3; m++) {
31754           GemmMicrokernelTester()
31755             .mr(3)
31756             .nr(4)
31757             .kr(8)
31758             .sr(1)
31759             .m(m)
31760             .n(n)
31761             .k(k)
31762             .cm_stride(7)
31763             .iterations(1)
31764             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31765         }
31766       }
31767     }
31768   }
31769 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)31770   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
31771     TEST_REQUIRES_X86_SSE41;
31772     for (size_t k = 1; k <= 40; k += 9) {
31773       GemmMicrokernelTester()
31774         .mr(3)
31775         .nr(4)
31776         .kr(8)
31777         .sr(1)
31778         .m(3)
31779         .n(4)
31780         .k(k)
31781         .ks(3)
31782         .a_offset(127)
31783         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31784     }
31785   }
31786 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)31787   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
31788     TEST_REQUIRES_X86_SSE41;
31789     for (size_t k = 1; k <= 40; k += 9) {
31790       for (uint32_t mz = 0; mz < 3; mz++) {
31791         GemmMicrokernelTester()
31792           .mr(3)
31793           .nr(4)
31794           .kr(8)
31795           .sr(1)
31796           .m(3)
31797           .n(4)
31798           .k(k)
31799           .ks(3)
31800           .a_offset(127)
31801           .zero_index(mz)
31802           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31803       }
31804     }
31805   }
31806 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)31807   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
31808     TEST_REQUIRES_X86_SSE41;
31809     GemmMicrokernelTester()
31810       .mr(3)
31811       .nr(4)
31812       .kr(8)
31813       .sr(1)
31814       .m(3)
31815       .n(4)
31816       .k(8)
31817       .qmin(128)
31818       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31819   }
31820 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)31821   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
31822     TEST_REQUIRES_X86_SSE41;
31823     GemmMicrokernelTester()
31824       .mr(3)
31825       .nr(4)
31826       .kr(8)
31827       .sr(1)
31828       .m(3)
31829       .n(4)
31830       .k(8)
31831       .qmax(128)
31832       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31833   }
31834 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)31835   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
31836     TEST_REQUIRES_X86_SSE41;
31837     GemmMicrokernelTester()
31838       .mr(3)
31839       .nr(4)
31840       .kr(8)
31841       .sr(1)
31842       .m(3)
31843       .n(4)
31844       .k(8)
31845       .cm_stride(7)
31846       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31847   }
31848 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849 
31850 
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)31852   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
31853     TEST_REQUIRES_X86_AVX;
31854     GemmMicrokernelTester()
31855       .mr(2)
31856       .nr(4)
31857       .kr(8)
31858       .sr(1)
31859       .m(2)
31860       .n(4)
31861       .k(8)
31862       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31863   }
31864 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)31865   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
31866     TEST_REQUIRES_X86_AVX;
31867     GemmMicrokernelTester()
31868       .mr(2)
31869       .nr(4)
31870       .kr(8)
31871       .sr(1)
31872       .m(2)
31873       .n(4)
31874       .k(8)
31875       .cn_stride(7)
31876       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31877   }
31878 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)31879   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
31880     TEST_REQUIRES_X86_AVX;
31881     for (uint32_t n = 1; n <= 4; n++) {
31882       for (uint32_t m = 1; m <= 2; m++) {
31883         GemmMicrokernelTester()
31884           .mr(2)
31885           .nr(4)
31886           .kr(8)
31887           .sr(1)
31888           .m(m)
31889           .n(n)
31890           .k(8)
31891           .iterations(1)
31892           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31893       }
31894     }
31895   }
31896 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)31897   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
31898     TEST_REQUIRES_X86_AVX;
31899     for (uint32_t m = 1; m <= 2; m++) {
31900       GemmMicrokernelTester()
31901         .mr(2)
31902         .nr(4)
31903         .kr(8)
31904         .sr(1)
31905         .m(m)
31906         .n(4)
31907         .k(8)
31908         .iterations(1)
31909         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31910     }
31911   }
31912 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)31913   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
31914     TEST_REQUIRES_X86_AVX;
31915     for (uint32_t n = 1; n <= 4; n++) {
31916       GemmMicrokernelTester()
31917         .mr(2)
31918         .nr(4)
31919         .kr(8)
31920         .sr(1)
31921         .m(2)
31922         .n(n)
31923         .k(8)
31924         .iterations(1)
31925         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31926     }
31927   }
31928 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)31929   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
31930     TEST_REQUIRES_X86_AVX;
31931     for (size_t k = 1; k < 8; k++) {
31932       GemmMicrokernelTester()
31933         .mr(2)
31934         .nr(4)
31935         .kr(8)
31936         .sr(1)
31937         .m(2)
31938         .n(4)
31939         .k(k)
31940         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31941     }
31942   }
31943 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)31944   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
31945     TEST_REQUIRES_X86_AVX;
31946     for (size_t k = 1; k < 8; k++) {
31947       for (uint32_t n = 1; n <= 4; n++) {
31948         for (uint32_t m = 1; m <= 2; m++) {
31949           GemmMicrokernelTester()
31950             .mr(2)
31951             .nr(4)
31952             .kr(8)
31953             .sr(1)
31954             .m(m)
31955             .n(n)
31956             .k(k)
31957             .iterations(1)
31958             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959         }
31960       }
31961     }
31962   }
31963 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)31964   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
31965     TEST_REQUIRES_X86_AVX;
31966     for (size_t k = 9; k < 16; k++) {
31967       GemmMicrokernelTester()
31968         .mr(2)
31969         .nr(4)
31970         .kr(8)
31971         .sr(1)
31972         .m(2)
31973         .n(4)
31974         .k(k)
31975         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31976     }
31977   }
31978 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)31979   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
31980     TEST_REQUIRES_X86_AVX;
31981     for (size_t k = 9; k < 16; k++) {
31982       for (uint32_t n = 1; n <= 4; n++) {
31983         for (uint32_t m = 1; m <= 2; m++) {
31984           GemmMicrokernelTester()
31985             .mr(2)
31986             .nr(4)
31987             .kr(8)
31988             .sr(1)
31989             .m(m)
31990             .n(n)
31991             .k(k)
31992             .iterations(1)
31993             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31994         }
31995       }
31996     }
31997   }
31998 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)31999   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
32000     TEST_REQUIRES_X86_AVX;
32001     for (size_t k = 16; k <= 80; k += 8) {
32002       GemmMicrokernelTester()
32003         .mr(2)
32004         .nr(4)
32005         .kr(8)
32006         .sr(1)
32007         .m(2)
32008         .n(4)
32009         .k(k)
32010         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32011     }
32012   }
32013 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)32014   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
32015     TEST_REQUIRES_X86_AVX;
32016     for (size_t k = 16; k <= 80; k += 8) {
32017       for (uint32_t n = 1; n <= 4; n++) {
32018         for (uint32_t m = 1; m <= 2; m++) {
32019           GemmMicrokernelTester()
32020             .mr(2)
32021             .nr(4)
32022             .kr(8)
32023             .sr(1)
32024             .m(m)
32025             .n(n)
32026             .k(k)
32027             .iterations(1)
32028             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32029         }
32030       }
32031     }
32032   }
32033 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)32034   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
32035     TEST_REQUIRES_X86_AVX;
32036     for (uint32_t n = 5; n < 8; n++) {
32037       for (size_t k = 1; k <= 40; k += 9) {
32038         GemmMicrokernelTester()
32039           .mr(2)
32040           .nr(4)
32041           .kr(8)
32042           .sr(1)
32043           .m(2)
32044           .n(n)
32045           .k(k)
32046           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32047       }
32048     }
32049   }
32050 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)32051   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
32052     TEST_REQUIRES_X86_AVX;
32053     for (uint32_t n = 5; n < 8; n++) {
32054       for (size_t k = 1; k <= 40; k += 9) {
32055         GemmMicrokernelTester()
32056           .mr(2)
32057           .nr(4)
32058           .kr(8)
32059           .sr(1)
32060           .m(2)
32061           .n(n)
32062           .k(k)
32063           .cn_stride(7)
32064           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32065       }
32066     }
32067   }
32068 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)32069   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
32070     TEST_REQUIRES_X86_AVX;
32071     for (uint32_t n = 5; n < 8; n++) {
32072       for (size_t k = 1; k <= 40; k += 9) {
32073         for (uint32_t m = 1; m <= 2; m++) {
32074           GemmMicrokernelTester()
32075             .mr(2)
32076             .nr(4)
32077             .kr(8)
32078             .sr(1)
32079             .m(m)
32080             .n(n)
32081             .k(k)
32082             .iterations(1)
32083             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32084         }
32085       }
32086     }
32087   }
32088 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)32089   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
32090     TEST_REQUIRES_X86_AVX;
32091     for (uint32_t n = 8; n <= 12; n += 4) {
32092       for (size_t k = 1; k <= 40; k += 9) {
32093         GemmMicrokernelTester()
32094           .mr(2)
32095           .nr(4)
32096           .kr(8)
32097           .sr(1)
32098           .m(2)
32099           .n(n)
32100           .k(k)
32101           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102       }
32103     }
32104   }
32105 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)32106   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
32107     TEST_REQUIRES_X86_AVX;
32108     for (uint32_t n = 8; n <= 12; n += 4) {
32109       for (size_t k = 1; k <= 40; k += 9) {
32110         GemmMicrokernelTester()
32111           .mr(2)
32112           .nr(4)
32113           .kr(8)
32114           .sr(1)
32115           .m(2)
32116           .n(n)
32117           .k(k)
32118           .cn_stride(7)
32119           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32120       }
32121     }
32122   }
32123 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)32124   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
32125     TEST_REQUIRES_X86_AVX;
32126     for (uint32_t n = 8; n <= 12; n += 4) {
32127       for (size_t k = 1; k <= 40; k += 9) {
32128         for (uint32_t m = 1; m <= 2; m++) {
32129           GemmMicrokernelTester()
32130             .mr(2)
32131             .nr(4)
32132             .kr(8)
32133             .sr(1)
32134             .m(m)
32135             .n(n)
32136             .k(k)
32137             .iterations(1)
32138             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32139         }
32140       }
32141     }
32142   }
32143 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel)32144   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
32145     TEST_REQUIRES_X86_AVX;
32146     for (size_t k = 1; k <= 40; k += 9) {
32147       GemmMicrokernelTester()
32148         .mr(2)
32149         .nr(4)
32150         .kr(8)
32151         .sr(1)
32152         .m(2)
32153         .n(4)
32154         .k(k)
32155         .ks(3)
32156         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32157     }
32158   }
32159 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel_subtile)32160   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
32161     TEST_REQUIRES_X86_AVX;
32162     for (size_t k = 1; k <= 40; k += 9) {
32163       for (uint32_t n = 1; n <= 4; n++) {
32164         for (uint32_t m = 1; m <= 2; m++) {
32165           GemmMicrokernelTester()
32166             .mr(2)
32167             .nr(4)
32168             .kr(8)
32169             .sr(1)
32170             .m(m)
32171             .n(n)
32172             .k(k)
32173             .ks(3)
32174             .iterations(1)
32175             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32176         }
32177       }
32178     }
32179   }
32180 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_small_kernel)32181   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
32182     TEST_REQUIRES_X86_AVX;
32183     for (uint32_t n = 5; n < 8; n++) {
32184       for (size_t k = 1; k <= 40; k += 9) {
32185         GemmMicrokernelTester()
32186           .mr(2)
32187           .nr(4)
32188           .kr(8)
32189           .sr(1)
32190           .m(2)
32191           .n(n)
32192           .k(k)
32193           .ks(3)
32194           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32195       }
32196     }
32197   }
32198 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_small_kernel)32199   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
32200     TEST_REQUIRES_X86_AVX;
32201     for (uint32_t n = 8; n <= 12; n += 4) {
32202       for (size_t k = 1; k <= 40; k += 9) {
32203         GemmMicrokernelTester()
32204           .mr(2)
32205           .nr(4)
32206           .kr(8)
32207           .sr(1)
32208           .m(2)
32209           .n(n)
32210           .k(k)
32211           .ks(3)
32212           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32213       }
32214     }
32215   }
32216 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)32217   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
32218     TEST_REQUIRES_X86_AVX;
32219     for (size_t k = 1; k <= 40; k += 9) {
32220       for (uint32_t n = 1; n <= 4; n++) {
32221         for (uint32_t m = 1; m <= 2; m++) {
32222           GemmMicrokernelTester()
32223             .mr(2)
32224             .nr(4)
32225             .kr(8)
32226             .sr(1)
32227             .m(m)
32228             .n(n)
32229             .k(k)
32230             .cm_stride(7)
32231             .iterations(1)
32232             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32233         }
32234       }
32235     }
32236   }
32237 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,a_offset)32238   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
32239     TEST_REQUIRES_X86_AVX;
32240     for (size_t k = 1; k <= 40; k += 9) {
32241       GemmMicrokernelTester()
32242         .mr(2)
32243         .nr(4)
32244         .kr(8)
32245         .sr(1)
32246         .m(2)
32247         .n(4)
32248         .k(k)
32249         .ks(3)
32250         .a_offset(83)
32251         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32252     }
32253   }
32254 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,zero)32255   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
32256     TEST_REQUIRES_X86_AVX;
32257     for (size_t k = 1; k <= 40; k += 9) {
32258       for (uint32_t mz = 0; mz < 2; mz++) {
32259         GemmMicrokernelTester()
32260           .mr(2)
32261           .nr(4)
32262           .kr(8)
32263           .sr(1)
32264           .m(2)
32265           .n(4)
32266           .k(k)
32267           .ks(3)
32268           .a_offset(83)
32269           .zero_index(mz)
32270           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32271       }
32272     }
32273   }
32274 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)32275   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
32276     TEST_REQUIRES_X86_AVX;
32277     GemmMicrokernelTester()
32278       .mr(2)
32279       .nr(4)
32280       .kr(8)
32281       .sr(1)
32282       .m(2)
32283       .n(4)
32284       .k(8)
32285       .qmin(128)
32286       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32287   }
32288 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)32289   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
32290     TEST_REQUIRES_X86_AVX;
32291     GemmMicrokernelTester()
32292       .mr(2)
32293       .nr(4)
32294       .kr(8)
32295       .sr(1)
32296       .m(2)
32297       .n(4)
32298       .k(8)
32299       .qmax(128)
32300       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32301   }
32302 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)32303   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
32304     TEST_REQUIRES_X86_AVX;
32305     GemmMicrokernelTester()
32306       .mr(2)
32307       .nr(4)
32308       .kr(8)
32309       .sr(1)
32310       .m(2)
32311       .n(4)
32312       .k(8)
32313       .cm_stride(7)
32314       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32315   }
32316 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317 
32318 
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)32320   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
32321     TEST_REQUIRES_X86_XOP;
32322     GemmMicrokernelTester()
32323       .mr(2)
32324       .nr(4)
32325       .kr(8)
32326       .sr(1)
32327       .m(2)
32328       .n(4)
32329       .k(8)
32330       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32331   }
32332 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)32333   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
32334     TEST_REQUIRES_X86_XOP;
32335     GemmMicrokernelTester()
32336       .mr(2)
32337       .nr(4)
32338       .kr(8)
32339       .sr(1)
32340       .m(2)
32341       .n(4)
32342       .k(8)
32343       .cn_stride(7)
32344       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32345   }
32346 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)32347   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
32348     TEST_REQUIRES_X86_XOP;
32349     for (uint32_t n = 1; n <= 4; n++) {
32350       for (uint32_t m = 1; m <= 2; m++) {
32351         GemmMicrokernelTester()
32352           .mr(2)
32353           .nr(4)
32354           .kr(8)
32355           .sr(1)
32356           .m(m)
32357           .n(n)
32358           .k(8)
32359           .iterations(1)
32360           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32361       }
32362     }
32363   }
32364 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)32365   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
32366     TEST_REQUIRES_X86_XOP;
32367     for (uint32_t m = 1; m <= 2; m++) {
32368       GemmMicrokernelTester()
32369         .mr(2)
32370         .nr(4)
32371         .kr(8)
32372         .sr(1)
32373         .m(m)
32374         .n(4)
32375         .k(8)
32376         .iterations(1)
32377         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32378     }
32379   }
32380 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)32381   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
32382     TEST_REQUIRES_X86_XOP;
32383     for (uint32_t n = 1; n <= 4; n++) {
32384       GemmMicrokernelTester()
32385         .mr(2)
32386         .nr(4)
32387         .kr(8)
32388         .sr(1)
32389         .m(2)
32390         .n(n)
32391         .k(8)
32392         .iterations(1)
32393         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32394     }
32395   }
32396 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)32397   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
32398     TEST_REQUIRES_X86_XOP;
32399     for (size_t k = 1; k < 8; k++) {
32400       GemmMicrokernelTester()
32401         .mr(2)
32402         .nr(4)
32403         .kr(8)
32404         .sr(1)
32405         .m(2)
32406         .n(4)
32407         .k(k)
32408         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32409     }
32410   }
32411 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)32412   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
32413     TEST_REQUIRES_X86_XOP;
32414     for (size_t k = 1; k < 8; k++) {
32415       for (uint32_t n = 1; n <= 4; n++) {
32416         for (uint32_t m = 1; m <= 2; m++) {
32417           GemmMicrokernelTester()
32418             .mr(2)
32419             .nr(4)
32420             .kr(8)
32421             .sr(1)
32422             .m(m)
32423             .n(n)
32424             .k(k)
32425             .iterations(1)
32426             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32427         }
32428       }
32429     }
32430   }
32431 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)32432   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
32433     TEST_REQUIRES_X86_XOP;
32434     for (size_t k = 9; k < 16; k++) {
32435       GemmMicrokernelTester()
32436         .mr(2)
32437         .nr(4)
32438         .kr(8)
32439         .sr(1)
32440         .m(2)
32441         .n(4)
32442         .k(k)
32443         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32444     }
32445   }
32446 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)32447   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
32448     TEST_REQUIRES_X86_XOP;
32449     for (size_t k = 9; k < 16; k++) {
32450       for (uint32_t n = 1; n <= 4; n++) {
32451         for (uint32_t m = 1; m <= 2; m++) {
32452           GemmMicrokernelTester()
32453             .mr(2)
32454             .nr(4)
32455             .kr(8)
32456             .sr(1)
32457             .m(m)
32458             .n(n)
32459             .k(k)
32460             .iterations(1)
32461             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32462         }
32463       }
32464     }
32465   }
32466 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)32467   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
32468     TEST_REQUIRES_X86_XOP;
32469     for (size_t k = 16; k <= 80; k += 8) {
32470       GemmMicrokernelTester()
32471         .mr(2)
32472         .nr(4)
32473         .kr(8)
32474         .sr(1)
32475         .m(2)
32476         .n(4)
32477         .k(k)
32478         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32479     }
32480   }
32481 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)32482   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
32483     TEST_REQUIRES_X86_XOP;
32484     for (size_t k = 16; k <= 80; k += 8) {
32485       for (uint32_t n = 1; n <= 4; n++) {
32486         for (uint32_t m = 1; m <= 2; m++) {
32487           GemmMicrokernelTester()
32488             .mr(2)
32489             .nr(4)
32490             .kr(8)
32491             .sr(1)
32492             .m(m)
32493             .n(n)
32494             .k(k)
32495             .iterations(1)
32496             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32497         }
32498       }
32499     }
32500   }
32501 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)32502   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
32503     TEST_REQUIRES_X86_XOP;
32504     for (uint32_t n = 5; n < 8; n++) {
32505       for (size_t k = 1; k <= 40; k += 9) {
32506         GemmMicrokernelTester()
32507           .mr(2)
32508           .nr(4)
32509           .kr(8)
32510           .sr(1)
32511           .m(2)
32512           .n(n)
32513           .k(k)
32514           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32515       }
32516     }
32517   }
32518 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)32519   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
32520     TEST_REQUIRES_X86_XOP;
32521     for (uint32_t n = 5; n < 8; n++) {
32522       for (size_t k = 1; k <= 40; k += 9) {
32523         GemmMicrokernelTester()
32524           .mr(2)
32525           .nr(4)
32526           .kr(8)
32527           .sr(1)
32528           .m(2)
32529           .n(n)
32530           .k(k)
32531           .cn_stride(7)
32532           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32533       }
32534     }
32535   }
32536 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)32537   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
32538     TEST_REQUIRES_X86_XOP;
32539     for (uint32_t n = 5; n < 8; n++) {
32540       for (size_t k = 1; k <= 40; k += 9) {
32541         for (uint32_t m = 1; m <= 2; m++) {
32542           GemmMicrokernelTester()
32543             .mr(2)
32544             .nr(4)
32545             .kr(8)
32546             .sr(1)
32547             .m(m)
32548             .n(n)
32549             .k(k)
32550             .iterations(1)
32551             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32552         }
32553       }
32554     }
32555   }
32556 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)32557   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
32558     TEST_REQUIRES_X86_XOP;
32559     for (uint32_t n = 8; n <= 12; n += 4) {
32560       for (size_t k = 1; k <= 40; k += 9) {
32561         GemmMicrokernelTester()
32562           .mr(2)
32563           .nr(4)
32564           .kr(8)
32565           .sr(1)
32566           .m(2)
32567           .n(n)
32568           .k(k)
32569           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32570       }
32571     }
32572   }
32573 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)32574   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
32575     TEST_REQUIRES_X86_XOP;
32576     for (uint32_t n = 8; n <= 12; n += 4) {
32577       for (size_t k = 1; k <= 40; k += 9) {
32578         GemmMicrokernelTester()
32579           .mr(2)
32580           .nr(4)
32581           .kr(8)
32582           .sr(1)
32583           .m(2)
32584           .n(n)
32585           .k(k)
32586           .cn_stride(7)
32587           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32588       }
32589     }
32590   }
32591 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)32592   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
32593     TEST_REQUIRES_X86_XOP;
32594     for (uint32_t n = 8; n <= 12; n += 4) {
32595       for (size_t k = 1; k <= 40; k += 9) {
32596         for (uint32_t m = 1; m <= 2; m++) {
32597           GemmMicrokernelTester()
32598             .mr(2)
32599             .nr(4)
32600             .kr(8)
32601             .sr(1)
32602             .m(m)
32603             .n(n)
32604             .k(k)
32605             .iterations(1)
32606             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32607         }
32608       }
32609     }
32610   }
32611 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel)32612   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
32613     TEST_REQUIRES_X86_XOP;
32614     for (size_t k = 1; k <= 40; k += 9) {
32615       GemmMicrokernelTester()
32616         .mr(2)
32617         .nr(4)
32618         .kr(8)
32619         .sr(1)
32620         .m(2)
32621         .n(4)
32622         .k(k)
32623         .ks(3)
32624         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625     }
32626   }
32627 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel_subtile)32628   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
32629     TEST_REQUIRES_X86_XOP;
32630     for (size_t k = 1; k <= 40; k += 9) {
32631       for (uint32_t n = 1; n <= 4; n++) {
32632         for (uint32_t m = 1; m <= 2; m++) {
32633           GemmMicrokernelTester()
32634             .mr(2)
32635             .nr(4)
32636             .kr(8)
32637             .sr(1)
32638             .m(m)
32639             .n(n)
32640             .k(k)
32641             .ks(3)
32642             .iterations(1)
32643             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32644         }
32645       }
32646     }
32647   }
32648 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_small_kernel)32649   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
32650     TEST_REQUIRES_X86_XOP;
32651     for (uint32_t n = 5; n < 8; n++) {
32652       for (size_t k = 1; k <= 40; k += 9) {
32653         GemmMicrokernelTester()
32654           .mr(2)
32655           .nr(4)
32656           .kr(8)
32657           .sr(1)
32658           .m(2)
32659           .n(n)
32660           .k(k)
32661           .ks(3)
32662           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32663       }
32664     }
32665   }
32666 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_small_kernel)32667   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
32668     TEST_REQUIRES_X86_XOP;
32669     for (uint32_t n = 8; n <= 12; n += 4) {
32670       for (size_t k = 1; k <= 40; k += 9) {
32671         GemmMicrokernelTester()
32672           .mr(2)
32673           .nr(4)
32674           .kr(8)
32675           .sr(1)
32676           .m(2)
32677           .n(n)
32678           .k(k)
32679           .ks(3)
32680           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32681       }
32682     }
32683   }
32684 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)32685   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
32686     TEST_REQUIRES_X86_XOP;
32687     for (size_t k = 1; k <= 40; k += 9) {
32688       for (uint32_t n = 1; n <= 4; n++) {
32689         for (uint32_t m = 1; m <= 2; m++) {
32690           GemmMicrokernelTester()
32691             .mr(2)
32692             .nr(4)
32693             .kr(8)
32694             .sr(1)
32695             .m(m)
32696             .n(n)
32697             .k(k)
32698             .cm_stride(7)
32699             .iterations(1)
32700             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32701         }
32702       }
32703     }
32704   }
32705 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,a_offset)32706   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
32707     TEST_REQUIRES_X86_XOP;
32708     for (size_t k = 1; k <= 40; k += 9) {
32709       GemmMicrokernelTester()
32710         .mr(2)
32711         .nr(4)
32712         .kr(8)
32713         .sr(1)
32714         .m(2)
32715         .n(4)
32716         .k(k)
32717         .ks(3)
32718         .a_offset(83)
32719         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32720     }
32721   }
32722 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,zero)32723   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
32724     TEST_REQUIRES_X86_XOP;
32725     for (size_t k = 1; k <= 40; k += 9) {
32726       for (uint32_t mz = 0; mz < 2; mz++) {
32727         GemmMicrokernelTester()
32728           .mr(2)
32729           .nr(4)
32730           .kr(8)
32731           .sr(1)
32732           .m(2)
32733           .n(4)
32734           .k(k)
32735           .ks(3)
32736           .a_offset(83)
32737           .zero_index(mz)
32738           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32739       }
32740     }
32741   }
32742 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)32743   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
32744     TEST_REQUIRES_X86_XOP;
32745     GemmMicrokernelTester()
32746       .mr(2)
32747       .nr(4)
32748       .kr(8)
32749       .sr(1)
32750       .m(2)
32751       .n(4)
32752       .k(8)
32753       .qmin(128)
32754       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32755   }
32756 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)32757   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
32758     TEST_REQUIRES_X86_XOP;
32759     GemmMicrokernelTester()
32760       .mr(2)
32761       .nr(4)
32762       .kr(8)
32763       .sr(1)
32764       .m(2)
32765       .n(4)
32766       .k(8)
32767       .qmax(128)
32768       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32769   }
32770 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)32771   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
32772     TEST_REQUIRES_X86_XOP;
32773     GemmMicrokernelTester()
32774       .mr(2)
32775       .nr(4)
32776       .kr(8)
32777       .sr(1)
32778       .m(2)
32779       .n(4)
32780       .k(8)
32781       .cm_stride(7)
32782       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32783   }
32784 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785 
32786 
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)32788   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
32789     TEST_REQUIRES_X86_AVX;
32790     GemmMicrokernelTester()
32791       .mr(3)
32792       .nr(4)
32793       .kr(8)
32794       .sr(1)
32795       .m(3)
32796       .n(4)
32797       .k(8)
32798       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32799   }
32800 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)32801   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
32802     TEST_REQUIRES_X86_AVX;
32803     GemmMicrokernelTester()
32804       .mr(3)
32805       .nr(4)
32806       .kr(8)
32807       .sr(1)
32808       .m(3)
32809       .n(4)
32810       .k(8)
32811       .cn_stride(7)
32812       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32813   }
32814 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)32815   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
32816     TEST_REQUIRES_X86_AVX;
32817     for (uint32_t n = 1; n <= 4; n++) {
32818       for (uint32_t m = 1; m <= 3; m++) {
32819         GemmMicrokernelTester()
32820           .mr(3)
32821           .nr(4)
32822           .kr(8)
32823           .sr(1)
32824           .m(m)
32825           .n(n)
32826           .k(8)
32827           .iterations(1)
32828           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32829       }
32830     }
32831   }
32832 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)32833   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
32834     TEST_REQUIRES_X86_AVX;
32835     for (uint32_t m = 1; m <= 3; m++) {
32836       GemmMicrokernelTester()
32837         .mr(3)
32838         .nr(4)
32839         .kr(8)
32840         .sr(1)
32841         .m(m)
32842         .n(4)
32843         .k(8)
32844         .iterations(1)
32845         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32846     }
32847   }
32848 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)32849   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
32850     TEST_REQUIRES_X86_AVX;
32851     for (uint32_t n = 1; n <= 4; n++) {
32852       GemmMicrokernelTester()
32853         .mr(3)
32854         .nr(4)
32855         .kr(8)
32856         .sr(1)
32857         .m(3)
32858         .n(n)
32859         .k(8)
32860         .iterations(1)
32861         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32862     }
32863   }
32864 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)32865   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
32866     TEST_REQUIRES_X86_AVX;
32867     for (size_t k = 1; k < 8; k++) {
32868       GemmMicrokernelTester()
32869         .mr(3)
32870         .nr(4)
32871         .kr(8)
32872         .sr(1)
32873         .m(3)
32874         .n(4)
32875         .k(k)
32876         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32877     }
32878   }
32879 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)32880   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
32881     TEST_REQUIRES_X86_AVX;
32882     for (size_t k = 1; k < 8; k++) {
32883       for (uint32_t n = 1; n <= 4; n++) {
32884         for (uint32_t m = 1; m <= 3; m++) {
32885           GemmMicrokernelTester()
32886             .mr(3)
32887             .nr(4)
32888             .kr(8)
32889             .sr(1)
32890             .m(m)
32891             .n(n)
32892             .k(k)
32893             .iterations(1)
32894             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32895         }
32896       }
32897     }
32898   }
32899 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)32900   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
32901     TEST_REQUIRES_X86_AVX;
32902     for (size_t k = 9; k < 16; k++) {
32903       GemmMicrokernelTester()
32904         .mr(3)
32905         .nr(4)
32906         .kr(8)
32907         .sr(1)
32908         .m(3)
32909         .n(4)
32910         .k(k)
32911         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32912     }
32913   }
32914 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)32915   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
32916     TEST_REQUIRES_X86_AVX;
32917     for (size_t k = 9; k < 16; k++) {
32918       for (uint32_t n = 1; n <= 4; n++) {
32919         for (uint32_t m = 1; m <= 3; m++) {
32920           GemmMicrokernelTester()
32921             .mr(3)
32922             .nr(4)
32923             .kr(8)
32924             .sr(1)
32925             .m(m)
32926             .n(n)
32927             .k(k)
32928             .iterations(1)
32929             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32930         }
32931       }
32932     }
32933   }
32934 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)32935   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
32936     TEST_REQUIRES_X86_AVX;
32937     for (size_t k = 16; k <= 80; k += 8) {
32938       GemmMicrokernelTester()
32939         .mr(3)
32940         .nr(4)
32941         .kr(8)
32942         .sr(1)
32943         .m(3)
32944         .n(4)
32945         .k(k)
32946         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32947     }
32948   }
32949 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)32950   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
32951     TEST_REQUIRES_X86_AVX;
32952     for (size_t k = 16; k <= 80; k += 8) {
32953       for (uint32_t n = 1; n <= 4; n++) {
32954         for (uint32_t m = 1; m <= 3; m++) {
32955           GemmMicrokernelTester()
32956             .mr(3)
32957             .nr(4)
32958             .kr(8)
32959             .sr(1)
32960             .m(m)
32961             .n(n)
32962             .k(k)
32963             .iterations(1)
32964             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32965         }
32966       }
32967     }
32968   }
32969 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)32970   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
32971     TEST_REQUIRES_X86_AVX;
32972     for (uint32_t n = 5; n < 8; n++) {
32973       for (size_t k = 1; k <= 40; k += 9) {
32974         GemmMicrokernelTester()
32975           .mr(3)
32976           .nr(4)
32977           .kr(8)
32978           .sr(1)
32979           .m(3)
32980           .n(n)
32981           .k(k)
32982           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32983       }
32984     }
32985   }
32986 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)32987   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
32988     TEST_REQUIRES_X86_AVX;
32989     for (uint32_t n = 5; n < 8; n++) {
32990       for (size_t k = 1; k <= 40; k += 9) {
32991         GemmMicrokernelTester()
32992           .mr(3)
32993           .nr(4)
32994           .kr(8)
32995           .sr(1)
32996           .m(3)
32997           .n(n)
32998           .k(k)
32999           .cn_stride(7)
33000           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33001       }
33002     }
33003   }
33004 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)33005   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
33006     TEST_REQUIRES_X86_AVX;
33007     for (uint32_t n = 5; n < 8; n++) {
33008       for (size_t k = 1; k <= 40; k += 9) {
33009         for (uint32_t m = 1; m <= 3; m++) {
33010           GemmMicrokernelTester()
33011             .mr(3)
33012             .nr(4)
33013             .kr(8)
33014             .sr(1)
33015             .m(m)
33016             .n(n)
33017             .k(k)
33018             .iterations(1)
33019             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33020         }
33021       }
33022     }
33023   }
33024 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)33025   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
33026     TEST_REQUIRES_X86_AVX;
33027     for (uint32_t n = 8; n <= 12; n += 4) {
33028       for (size_t k = 1; k <= 40; k += 9) {
33029         GemmMicrokernelTester()
33030           .mr(3)
33031           .nr(4)
33032           .kr(8)
33033           .sr(1)
33034           .m(3)
33035           .n(n)
33036           .k(k)
33037           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33038       }
33039     }
33040   }
33041 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)33042   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
33043     TEST_REQUIRES_X86_AVX;
33044     for (uint32_t n = 8; n <= 12; n += 4) {
33045       for (size_t k = 1; k <= 40; k += 9) {
33046         GemmMicrokernelTester()
33047           .mr(3)
33048           .nr(4)
33049           .kr(8)
33050           .sr(1)
33051           .m(3)
33052           .n(n)
33053           .k(k)
33054           .cn_stride(7)
33055           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33056       }
33057     }
33058   }
33059 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)33060   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
33061     TEST_REQUIRES_X86_AVX;
33062     for (uint32_t n = 8; n <= 12; n += 4) {
33063       for (size_t k = 1; k <= 40; k += 9) {
33064         for (uint32_t m = 1; m <= 3; m++) {
33065           GemmMicrokernelTester()
33066             .mr(3)
33067             .nr(4)
33068             .kr(8)
33069             .sr(1)
33070             .m(m)
33071             .n(n)
33072             .k(k)
33073             .iterations(1)
33074             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33075         }
33076       }
33077     }
33078   }
33079 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel)33080   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
33081     TEST_REQUIRES_X86_AVX;
33082     for (size_t k = 1; k <= 40; k += 9) {
33083       GemmMicrokernelTester()
33084         .mr(3)
33085         .nr(4)
33086         .kr(8)
33087         .sr(1)
33088         .m(3)
33089         .n(4)
33090         .k(k)
33091         .ks(3)
33092         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33093     }
33094   }
33095 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel_subtile)33096   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
33097     TEST_REQUIRES_X86_AVX;
33098     for (size_t k = 1; k <= 40; k += 9) {
33099       for (uint32_t n = 1; n <= 4; n++) {
33100         for (uint32_t m = 1; m <= 3; m++) {
33101           GemmMicrokernelTester()
33102             .mr(3)
33103             .nr(4)
33104             .kr(8)
33105             .sr(1)
33106             .m(m)
33107             .n(n)
33108             .k(k)
33109             .ks(3)
33110             .iterations(1)
33111             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33112         }
33113       }
33114     }
33115   }
33116 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_small_kernel)33117   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
33118     TEST_REQUIRES_X86_AVX;
33119     for (uint32_t n = 5; n < 8; n++) {
33120       for (size_t k = 1; k <= 40; k += 9) {
33121         GemmMicrokernelTester()
33122           .mr(3)
33123           .nr(4)
33124           .kr(8)
33125           .sr(1)
33126           .m(3)
33127           .n(n)
33128           .k(k)
33129           .ks(3)
33130           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33131       }
33132     }
33133   }
33134 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_small_kernel)33135   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
33136     TEST_REQUIRES_X86_AVX;
33137     for (uint32_t n = 8; n <= 12; n += 4) {
33138       for (size_t k = 1; k <= 40; k += 9) {
33139         GemmMicrokernelTester()
33140           .mr(3)
33141           .nr(4)
33142           .kr(8)
33143           .sr(1)
33144           .m(3)
33145           .n(n)
33146           .k(k)
33147           .ks(3)
33148           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33149       }
33150     }
33151   }
33152 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)33153   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
33154     TEST_REQUIRES_X86_AVX;
33155     for (size_t k = 1; k <= 40; k += 9) {
33156       for (uint32_t n = 1; n <= 4; n++) {
33157         for (uint32_t m = 1; m <= 3; m++) {
33158           GemmMicrokernelTester()
33159             .mr(3)
33160             .nr(4)
33161             .kr(8)
33162             .sr(1)
33163             .m(m)
33164             .n(n)
33165             .k(k)
33166             .cm_stride(7)
33167             .iterations(1)
33168             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33169         }
33170       }
33171     }
33172   }
33173 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,a_offset)33174   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
33175     TEST_REQUIRES_X86_AVX;
33176     for (size_t k = 1; k <= 40; k += 9) {
33177       GemmMicrokernelTester()
33178         .mr(3)
33179         .nr(4)
33180         .kr(8)
33181         .sr(1)
33182         .m(3)
33183         .n(4)
33184         .k(k)
33185         .ks(3)
33186         .a_offset(127)
33187         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33188     }
33189   }
33190 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,zero)33191   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
33192     TEST_REQUIRES_X86_AVX;
33193     for (size_t k = 1; k <= 40; k += 9) {
33194       for (uint32_t mz = 0; mz < 3; mz++) {
33195         GemmMicrokernelTester()
33196           .mr(3)
33197           .nr(4)
33198           .kr(8)
33199           .sr(1)
33200           .m(3)
33201           .n(4)
33202           .k(k)
33203           .ks(3)
33204           .a_offset(127)
33205           .zero_index(mz)
33206           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33207       }
33208     }
33209   }
33210 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)33211   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
33212     TEST_REQUIRES_X86_AVX;
33213     GemmMicrokernelTester()
33214       .mr(3)
33215       .nr(4)
33216       .kr(8)
33217       .sr(1)
33218       .m(3)
33219       .n(4)
33220       .k(8)
33221       .qmin(128)
33222       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33223   }
33224 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)33225   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
33226     TEST_REQUIRES_X86_AVX;
33227     GemmMicrokernelTester()
33228       .mr(3)
33229       .nr(4)
33230       .kr(8)
33231       .sr(1)
33232       .m(3)
33233       .n(4)
33234       .k(8)
33235       .qmax(128)
33236       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33237   }
33238 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)33239   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
33240     TEST_REQUIRES_X86_AVX;
33241     GemmMicrokernelTester()
33242       .mr(3)
33243       .nr(4)
33244       .kr(8)
33245       .sr(1)
33246       .m(3)
33247       .n(4)
33248       .k(8)
33249       .cm_stride(7)
33250       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33251   }
33252 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253 
33254 
33255 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)33256   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
33257     TEST_REQUIRES_X86_XOP;
33258     GemmMicrokernelTester()
33259       .mr(3)
33260       .nr(4)
33261       .kr(8)
33262       .sr(1)
33263       .m(3)
33264       .n(4)
33265       .k(8)
33266       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33267   }
33268 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)33269   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
33270     TEST_REQUIRES_X86_XOP;
33271     GemmMicrokernelTester()
33272       .mr(3)
33273       .nr(4)
33274       .kr(8)
33275       .sr(1)
33276       .m(3)
33277       .n(4)
33278       .k(8)
33279       .cn_stride(7)
33280       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33281   }
33282 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)33283   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
33284     TEST_REQUIRES_X86_XOP;
33285     for (uint32_t n = 1; n <= 4; n++) {
33286       for (uint32_t m = 1; m <= 3; m++) {
33287         GemmMicrokernelTester()
33288           .mr(3)
33289           .nr(4)
33290           .kr(8)
33291           .sr(1)
33292           .m(m)
33293           .n(n)
33294           .k(8)
33295           .iterations(1)
33296           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33297       }
33298     }
33299   }
33300 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)33301   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
33302     TEST_REQUIRES_X86_XOP;
33303     for (uint32_t m = 1; m <= 3; m++) {
33304       GemmMicrokernelTester()
33305         .mr(3)
33306         .nr(4)
33307         .kr(8)
33308         .sr(1)
33309         .m(m)
33310         .n(4)
33311         .k(8)
33312         .iterations(1)
33313         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33314     }
33315   }
33316 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)33317   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
33318     TEST_REQUIRES_X86_XOP;
33319     for (uint32_t n = 1; n <= 4; n++) {
33320       GemmMicrokernelTester()
33321         .mr(3)
33322         .nr(4)
33323         .kr(8)
33324         .sr(1)
33325         .m(3)
33326         .n(n)
33327         .k(8)
33328         .iterations(1)
33329         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33330     }
33331   }
33332 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)33333   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
33334     TEST_REQUIRES_X86_XOP;
33335     for (size_t k = 1; k < 8; k++) {
33336       GemmMicrokernelTester()
33337         .mr(3)
33338         .nr(4)
33339         .kr(8)
33340         .sr(1)
33341         .m(3)
33342         .n(4)
33343         .k(k)
33344         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33345     }
33346   }
33347 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)33348   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
33349     TEST_REQUIRES_X86_XOP;
33350     for (size_t k = 1; k < 8; k++) {
33351       for (uint32_t n = 1; n <= 4; n++) {
33352         for (uint32_t m = 1; m <= 3; m++) {
33353           GemmMicrokernelTester()
33354             .mr(3)
33355             .nr(4)
33356             .kr(8)
33357             .sr(1)
33358             .m(m)
33359             .n(n)
33360             .k(k)
33361             .iterations(1)
33362             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33363         }
33364       }
33365     }
33366   }
33367 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)33368   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
33369     TEST_REQUIRES_X86_XOP;
33370     for (size_t k = 9; k < 16; k++) {
33371       GemmMicrokernelTester()
33372         .mr(3)
33373         .nr(4)
33374         .kr(8)
33375         .sr(1)
33376         .m(3)
33377         .n(4)
33378         .k(k)
33379         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33380     }
33381   }
33382 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)33383   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
33384     TEST_REQUIRES_X86_XOP;
33385     for (size_t k = 9; k < 16; k++) {
33386       for (uint32_t n = 1; n <= 4; n++) {
33387         for (uint32_t m = 1; m <= 3; m++) {
33388           GemmMicrokernelTester()
33389             .mr(3)
33390             .nr(4)
33391             .kr(8)
33392             .sr(1)
33393             .m(m)
33394             .n(n)
33395             .k(k)
33396             .iterations(1)
33397             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33398         }
33399       }
33400     }
33401   }
33402 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)33403   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
33404     TEST_REQUIRES_X86_XOP;
33405     for (size_t k = 16; k <= 80; k += 8) {
33406       GemmMicrokernelTester()
33407         .mr(3)
33408         .nr(4)
33409         .kr(8)
33410         .sr(1)
33411         .m(3)
33412         .n(4)
33413         .k(k)
33414         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33415     }
33416   }
33417 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)33418   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
33419     TEST_REQUIRES_X86_XOP;
33420     for (size_t k = 16; k <= 80; k += 8) {
33421       for (uint32_t n = 1; n <= 4; n++) {
33422         for (uint32_t m = 1; m <= 3; m++) {
33423           GemmMicrokernelTester()
33424             .mr(3)
33425             .nr(4)
33426             .kr(8)
33427             .sr(1)
33428             .m(m)
33429             .n(n)
33430             .k(k)
33431             .iterations(1)
33432             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33433         }
33434       }
33435     }
33436   }
33437 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)33438   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
33439     TEST_REQUIRES_X86_XOP;
33440     for (uint32_t n = 5; n < 8; n++) {
33441       for (size_t k = 1; k <= 40; k += 9) {
33442         GemmMicrokernelTester()
33443           .mr(3)
33444           .nr(4)
33445           .kr(8)
33446           .sr(1)
33447           .m(3)
33448           .n(n)
33449           .k(k)
33450           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33451       }
33452     }
33453   }
33454 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)33455   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
33456     TEST_REQUIRES_X86_XOP;
33457     for (uint32_t n = 5; n < 8; n++) {
33458       for (size_t k = 1; k <= 40; k += 9) {
33459         GemmMicrokernelTester()
33460           .mr(3)
33461           .nr(4)
33462           .kr(8)
33463           .sr(1)
33464           .m(3)
33465           .n(n)
33466           .k(k)
33467           .cn_stride(7)
33468           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33469       }
33470     }
33471   }
33472 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)33473   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
33474     TEST_REQUIRES_X86_XOP;
33475     for (uint32_t n = 5; n < 8; n++) {
33476       for (size_t k = 1; k <= 40; k += 9) {
33477         for (uint32_t m = 1; m <= 3; m++) {
33478           GemmMicrokernelTester()
33479             .mr(3)
33480             .nr(4)
33481             .kr(8)
33482             .sr(1)
33483             .m(m)
33484             .n(n)
33485             .k(k)
33486             .iterations(1)
33487             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33488         }
33489       }
33490     }
33491   }
33492 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)33493   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
33494     TEST_REQUIRES_X86_XOP;
33495     for (uint32_t n = 8; n <= 12; n += 4) {
33496       for (size_t k = 1; k <= 40; k += 9) {
33497         GemmMicrokernelTester()
33498           .mr(3)
33499           .nr(4)
33500           .kr(8)
33501           .sr(1)
33502           .m(3)
33503           .n(n)
33504           .k(k)
33505           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33506       }
33507     }
33508   }
33509 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)33510   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
33511     TEST_REQUIRES_X86_XOP;
33512     for (uint32_t n = 8; n <= 12; n += 4) {
33513       for (size_t k = 1; k <= 40; k += 9) {
33514         GemmMicrokernelTester()
33515           .mr(3)
33516           .nr(4)
33517           .kr(8)
33518           .sr(1)
33519           .m(3)
33520           .n(n)
33521           .k(k)
33522           .cn_stride(7)
33523           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33524       }
33525     }
33526   }
33527 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)33528   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
33529     TEST_REQUIRES_X86_XOP;
33530     for (uint32_t n = 8; n <= 12; n += 4) {
33531       for (size_t k = 1; k <= 40; k += 9) {
33532         for (uint32_t m = 1; m <= 3; m++) {
33533           GemmMicrokernelTester()
33534             .mr(3)
33535             .nr(4)
33536             .kr(8)
33537             .sr(1)
33538             .m(m)
33539             .n(n)
33540             .k(k)
33541             .iterations(1)
33542             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33543         }
33544       }
33545     }
33546   }
33547 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)33548   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
33549     TEST_REQUIRES_X86_XOP;
33550     for (size_t k = 1; k <= 40; k += 9) {
33551       GemmMicrokernelTester()
33552         .mr(3)
33553         .nr(4)
33554         .kr(8)
33555         .sr(1)
33556         .m(3)
33557         .n(4)
33558         .k(k)
33559         .ks(3)
33560         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33561     }
33562   }
33563 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)33564   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
33565     TEST_REQUIRES_X86_XOP;
33566     for (size_t k = 1; k <= 40; k += 9) {
33567       for (uint32_t n = 1; n <= 4; n++) {
33568         for (uint32_t m = 1; m <= 3; m++) {
33569           GemmMicrokernelTester()
33570             .mr(3)
33571             .nr(4)
33572             .kr(8)
33573             .sr(1)
33574             .m(m)
33575             .n(n)
33576             .k(k)
33577             .ks(3)
33578             .iterations(1)
33579             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33580         }
33581       }
33582     }
33583   }
33584 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)33585   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
33586     TEST_REQUIRES_X86_XOP;
33587     for (uint32_t n = 5; n < 8; n++) {
33588       for (size_t k = 1; k <= 40; k += 9) {
33589         GemmMicrokernelTester()
33590           .mr(3)
33591           .nr(4)
33592           .kr(8)
33593           .sr(1)
33594           .m(3)
33595           .n(n)
33596           .k(k)
33597           .ks(3)
33598           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33599       }
33600     }
33601   }
33602 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)33603   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
33604     TEST_REQUIRES_X86_XOP;
33605     for (uint32_t n = 8; n <= 12; n += 4) {
33606       for (size_t k = 1; k <= 40; k += 9) {
33607         GemmMicrokernelTester()
33608           .mr(3)
33609           .nr(4)
33610           .kr(8)
33611           .sr(1)
33612           .m(3)
33613           .n(n)
33614           .k(k)
33615           .ks(3)
33616           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33617       }
33618     }
33619   }
33620 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)33621   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
33622     TEST_REQUIRES_X86_XOP;
33623     for (size_t k = 1; k <= 40; k += 9) {
33624       for (uint32_t n = 1; n <= 4; n++) {
33625         for (uint32_t m = 1; m <= 3; m++) {
33626           GemmMicrokernelTester()
33627             .mr(3)
33628             .nr(4)
33629             .kr(8)
33630             .sr(1)
33631             .m(m)
33632             .n(n)
33633             .k(k)
33634             .cm_stride(7)
33635             .iterations(1)
33636             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33637         }
33638       }
33639     }
33640   }
33641 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)33642   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
33643     TEST_REQUIRES_X86_XOP;
33644     for (size_t k = 1; k <= 40; k += 9) {
33645       GemmMicrokernelTester()
33646         .mr(3)
33647         .nr(4)
33648         .kr(8)
33649         .sr(1)
33650         .m(3)
33651         .n(4)
33652         .k(k)
33653         .ks(3)
33654         .a_offset(127)
33655         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33656     }
33657   }
33658 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)33659   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
33660     TEST_REQUIRES_X86_XOP;
33661     for (size_t k = 1; k <= 40; k += 9) {
33662       for (uint32_t mz = 0; mz < 3; mz++) {
33663         GemmMicrokernelTester()
33664           .mr(3)
33665           .nr(4)
33666           .kr(8)
33667           .sr(1)
33668           .m(3)
33669           .n(4)
33670           .k(k)
33671           .ks(3)
33672           .a_offset(127)
33673           .zero_index(mz)
33674           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33675       }
33676     }
33677   }
33678 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)33679   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
33680     TEST_REQUIRES_X86_XOP;
33681     GemmMicrokernelTester()
33682       .mr(3)
33683       .nr(4)
33684       .kr(8)
33685       .sr(1)
33686       .m(3)
33687       .n(4)
33688       .k(8)
33689       .qmin(128)
33690       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33691   }
33692 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)33693   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
33694     TEST_REQUIRES_X86_XOP;
33695     GemmMicrokernelTester()
33696       .mr(3)
33697       .nr(4)
33698       .kr(8)
33699       .sr(1)
33700       .m(3)
33701       .n(4)
33702       .k(8)
33703       .qmax(128)
33704       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33705   }
33706 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)33707   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
33708     TEST_REQUIRES_X86_XOP;
33709     GemmMicrokernelTester()
33710       .mr(3)
33711       .nr(4)
33712       .kr(8)
33713       .sr(1)
33714       .m(3)
33715       .n(4)
33716       .k(8)
33717       .cm_stride(7)
33718       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33719   }
33720 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
33721 
33722 
33723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)33724   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
33725     TEST_REQUIRES_X86_SSE2;
33726     GemmMicrokernelTester()
33727       .mr(1)
33728       .nr(4)
33729       .kr(8)
33730       .sr(1)
33731       .m(1)
33732       .n(4)
33733       .k(8)
33734       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33735   }
33736 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)33737   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
33738     TEST_REQUIRES_X86_SSE2;
33739     GemmMicrokernelTester()
33740       .mr(1)
33741       .nr(4)
33742       .kr(8)
33743       .sr(1)
33744       .m(1)
33745       .n(4)
33746       .k(8)
33747       .cn_stride(7)
33748       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33749   }
33750 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)33751   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
33752     TEST_REQUIRES_X86_SSE2;
33753     for (uint32_t n = 1; n <= 4; n++) {
33754       for (uint32_t m = 1; m <= 1; m++) {
33755         GemmMicrokernelTester()
33756           .mr(1)
33757           .nr(4)
33758           .kr(8)
33759           .sr(1)
33760           .m(m)
33761           .n(n)
33762           .k(8)
33763           .iterations(1)
33764           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33765       }
33766     }
33767   }
33768 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)33769   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
33770     TEST_REQUIRES_X86_SSE2;
33771     for (uint32_t m = 1; m <= 1; m++) {
33772       GemmMicrokernelTester()
33773         .mr(1)
33774         .nr(4)
33775         .kr(8)
33776         .sr(1)
33777         .m(m)
33778         .n(4)
33779         .k(8)
33780         .iterations(1)
33781         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33782     }
33783   }
33784 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)33785   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
33786     TEST_REQUIRES_X86_SSE2;
33787     for (uint32_t n = 1; n <= 4; n++) {
33788       GemmMicrokernelTester()
33789         .mr(1)
33790         .nr(4)
33791         .kr(8)
33792         .sr(1)
33793         .m(1)
33794         .n(n)
33795         .k(8)
33796         .iterations(1)
33797         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33798     }
33799   }
33800 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)33801   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
33802     TEST_REQUIRES_X86_SSE2;
33803     for (size_t k = 1; k < 8; k++) {
33804       GemmMicrokernelTester()
33805         .mr(1)
33806         .nr(4)
33807         .kr(8)
33808         .sr(1)
33809         .m(1)
33810         .n(4)
33811         .k(k)
33812         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33813     }
33814   }
33815 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)33816   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
33817     TEST_REQUIRES_X86_SSE2;
33818     for (size_t k = 1; k < 8; k++) {
33819       for (uint32_t n = 1; n <= 4; n++) {
33820         for (uint32_t m = 1; m <= 1; m++) {
33821           GemmMicrokernelTester()
33822             .mr(1)
33823             .nr(4)
33824             .kr(8)
33825             .sr(1)
33826             .m(m)
33827             .n(n)
33828             .k(k)
33829             .iterations(1)
33830             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33831         }
33832       }
33833     }
33834   }
33835 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)33836   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
33837     TEST_REQUIRES_X86_SSE2;
33838     for (size_t k = 9; k < 16; k++) {
33839       GemmMicrokernelTester()
33840         .mr(1)
33841         .nr(4)
33842         .kr(8)
33843         .sr(1)
33844         .m(1)
33845         .n(4)
33846         .k(k)
33847         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33848     }
33849   }
33850 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)33851   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
33852     TEST_REQUIRES_X86_SSE2;
33853     for (size_t k = 9; k < 16; k++) {
33854       for (uint32_t n = 1; n <= 4; n++) {
33855         for (uint32_t m = 1; m <= 1; m++) {
33856           GemmMicrokernelTester()
33857             .mr(1)
33858             .nr(4)
33859             .kr(8)
33860             .sr(1)
33861             .m(m)
33862             .n(n)
33863             .k(k)
33864             .iterations(1)
33865             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33866         }
33867       }
33868     }
33869   }
33870 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)33871   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
33872     TEST_REQUIRES_X86_SSE2;
33873     for (size_t k = 16; k <= 80; k += 8) {
33874       GemmMicrokernelTester()
33875         .mr(1)
33876         .nr(4)
33877         .kr(8)
33878         .sr(1)
33879         .m(1)
33880         .n(4)
33881         .k(k)
33882         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33883     }
33884   }
33885 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)33886   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
33887     TEST_REQUIRES_X86_SSE2;
33888     for (size_t k = 16; k <= 80; k += 8) {
33889       for (uint32_t n = 1; n <= 4; n++) {
33890         for (uint32_t m = 1; m <= 1; m++) {
33891           GemmMicrokernelTester()
33892             .mr(1)
33893             .nr(4)
33894             .kr(8)
33895             .sr(1)
33896             .m(m)
33897             .n(n)
33898             .k(k)
33899             .iterations(1)
33900             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33901         }
33902       }
33903     }
33904   }
33905 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)33906   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
33907     TEST_REQUIRES_X86_SSE2;
33908     for (uint32_t n = 5; n < 8; n++) {
33909       for (size_t k = 1; k <= 40; k += 9) {
33910         GemmMicrokernelTester()
33911           .mr(1)
33912           .nr(4)
33913           .kr(8)
33914           .sr(1)
33915           .m(1)
33916           .n(n)
33917           .k(k)
33918           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33919       }
33920     }
33921   }
33922 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)33923   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
33924     TEST_REQUIRES_X86_SSE2;
33925     for (uint32_t n = 5; n < 8; n++) {
33926       for (size_t k = 1; k <= 40; k += 9) {
33927         GemmMicrokernelTester()
33928           .mr(1)
33929           .nr(4)
33930           .kr(8)
33931           .sr(1)
33932           .m(1)
33933           .n(n)
33934           .k(k)
33935           .cn_stride(7)
33936           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33937       }
33938     }
33939   }
33940 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)33941   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
33942     TEST_REQUIRES_X86_SSE2;
33943     for (uint32_t n = 5; n < 8; n++) {
33944       for (size_t k = 1; k <= 40; k += 9) {
33945         for (uint32_t m = 1; m <= 1; m++) {
33946           GemmMicrokernelTester()
33947             .mr(1)
33948             .nr(4)
33949             .kr(8)
33950             .sr(1)
33951             .m(m)
33952             .n(n)
33953             .k(k)
33954             .iterations(1)
33955             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33956         }
33957       }
33958     }
33959   }
33960 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)33961   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
33962     TEST_REQUIRES_X86_SSE2;
33963     for (uint32_t n = 8; n <= 12; n += 4) {
33964       for (size_t k = 1; k <= 40; k += 9) {
33965         GemmMicrokernelTester()
33966           .mr(1)
33967           .nr(4)
33968           .kr(8)
33969           .sr(1)
33970           .m(1)
33971           .n(n)
33972           .k(k)
33973           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33974       }
33975     }
33976   }
33977 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)33978   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
33979     TEST_REQUIRES_X86_SSE2;
33980     for (uint32_t n = 8; n <= 12; n += 4) {
33981       for (size_t k = 1; k <= 40; k += 9) {
33982         GemmMicrokernelTester()
33983           .mr(1)
33984           .nr(4)
33985           .kr(8)
33986           .sr(1)
33987           .m(1)
33988           .n(n)
33989           .k(k)
33990           .cn_stride(7)
33991           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33992       }
33993     }
33994   }
33995 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)33996   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
33997     TEST_REQUIRES_X86_SSE2;
33998     for (uint32_t n = 8; n <= 12; n += 4) {
33999       for (size_t k = 1; k <= 40; k += 9) {
34000         for (uint32_t m = 1; m <= 1; m++) {
34001           GemmMicrokernelTester()
34002             .mr(1)
34003             .nr(4)
34004             .kr(8)
34005             .sr(1)
34006             .m(m)
34007             .n(n)
34008             .k(k)
34009             .iterations(1)
34010             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34011         }
34012       }
34013     }
34014   }
34015 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel)34016   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
34017     TEST_REQUIRES_X86_SSE2;
34018     for (size_t k = 1; k <= 40; k += 9) {
34019       GemmMicrokernelTester()
34020         .mr(1)
34021         .nr(4)
34022         .kr(8)
34023         .sr(1)
34024         .m(1)
34025         .n(4)
34026         .k(k)
34027         .ks(3)
34028         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34029     }
34030   }
34031 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel_subtile)34032   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
34033     TEST_REQUIRES_X86_SSE2;
34034     for (size_t k = 1; k <= 40; k += 9) {
34035       for (uint32_t n = 1; n <= 4; n++) {
34036         for (uint32_t m = 1; m <= 1; m++) {
34037           GemmMicrokernelTester()
34038             .mr(1)
34039             .nr(4)
34040             .kr(8)
34041             .sr(1)
34042             .m(m)
34043             .n(n)
34044             .k(k)
34045             .ks(3)
34046             .iterations(1)
34047             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34048         }
34049       }
34050     }
34051   }
34052 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_small_kernel)34053   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34054     TEST_REQUIRES_X86_SSE2;
34055     for (uint32_t n = 5; n < 8; n++) {
34056       for (size_t k = 1; k <= 40; k += 9) {
34057         GemmMicrokernelTester()
34058           .mr(1)
34059           .nr(4)
34060           .kr(8)
34061           .sr(1)
34062           .m(1)
34063           .n(n)
34064           .k(k)
34065           .ks(3)
34066           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34067       }
34068     }
34069   }
34070 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_small_kernel)34071   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
34072     TEST_REQUIRES_X86_SSE2;
34073     for (uint32_t n = 8; n <= 12; n += 4) {
34074       for (size_t k = 1; k <= 40; k += 9) {
34075         GemmMicrokernelTester()
34076           .mr(1)
34077           .nr(4)
34078           .kr(8)
34079           .sr(1)
34080           .m(1)
34081           .n(n)
34082           .k(k)
34083           .ks(3)
34084           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34085       }
34086     }
34087   }
34088 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)34089   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
34090     TEST_REQUIRES_X86_SSE2;
34091     for (size_t k = 1; k <= 40; k += 9) {
34092       for (uint32_t n = 1; n <= 4; n++) {
34093         for (uint32_t m = 1; m <= 1; m++) {
34094           GemmMicrokernelTester()
34095             .mr(1)
34096             .nr(4)
34097             .kr(8)
34098             .sr(1)
34099             .m(m)
34100             .n(n)
34101             .k(k)
34102             .cm_stride(7)
34103             .iterations(1)
34104             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34105         }
34106       }
34107     }
34108   }
34109 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,a_offset)34110   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
34111     TEST_REQUIRES_X86_SSE2;
34112     for (size_t k = 1; k <= 40; k += 9) {
34113       GemmMicrokernelTester()
34114         .mr(1)
34115         .nr(4)
34116         .kr(8)
34117         .sr(1)
34118         .m(1)
34119         .n(4)
34120         .k(k)
34121         .ks(3)
34122         .a_offset(43)
34123         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34124     }
34125   }
34126 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,zero)34127   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
34128     TEST_REQUIRES_X86_SSE2;
34129     for (size_t k = 1; k <= 40; k += 9) {
34130       for (uint32_t mz = 0; mz < 1; mz++) {
34131         GemmMicrokernelTester()
34132           .mr(1)
34133           .nr(4)
34134           .kr(8)
34135           .sr(1)
34136           .m(1)
34137           .n(4)
34138           .k(k)
34139           .ks(3)
34140           .a_offset(43)
34141           .zero_index(mz)
34142           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34143       }
34144     }
34145   }
34146 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)34147   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
34148     TEST_REQUIRES_X86_SSE2;
34149     GemmMicrokernelTester()
34150       .mr(1)
34151       .nr(4)
34152       .kr(8)
34153       .sr(1)
34154       .m(1)
34155       .n(4)
34156       .k(8)
34157       .qmin(128)
34158       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34159   }
34160 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)34161   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
34162     TEST_REQUIRES_X86_SSE2;
34163     GemmMicrokernelTester()
34164       .mr(1)
34165       .nr(4)
34166       .kr(8)
34167       .sr(1)
34168       .m(1)
34169       .n(4)
34170       .k(8)
34171       .qmax(128)
34172       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34173   }
34174 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)34175   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
34176     TEST_REQUIRES_X86_SSE2;
34177     GemmMicrokernelTester()
34178       .mr(1)
34179       .nr(4)
34180       .kr(8)
34181       .sr(1)
34182       .m(1)
34183       .n(4)
34184       .k(8)
34185       .cm_stride(7)
34186       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34187   }
34188 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
34189 
34190 
34191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)34192   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
34193     TEST_REQUIRES_X86_SSE2;
34194     GemmMicrokernelTester()
34195       .mr(2)
34196       .nr(4)
34197       .kr(8)
34198       .sr(1)
34199       .m(2)
34200       .n(4)
34201       .k(8)
34202       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34203   }
34204 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)34205   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
34206     TEST_REQUIRES_X86_SSE2;
34207     GemmMicrokernelTester()
34208       .mr(2)
34209       .nr(4)
34210       .kr(8)
34211       .sr(1)
34212       .m(2)
34213       .n(4)
34214       .k(8)
34215       .cn_stride(7)
34216       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34217   }
34218 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)34219   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
34220     TEST_REQUIRES_X86_SSE2;
34221     for (uint32_t n = 1; n <= 4; n++) {
34222       for (uint32_t m = 1; m <= 2; m++) {
34223         GemmMicrokernelTester()
34224           .mr(2)
34225           .nr(4)
34226           .kr(8)
34227           .sr(1)
34228           .m(m)
34229           .n(n)
34230           .k(8)
34231           .iterations(1)
34232           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34233       }
34234     }
34235   }
34236 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)34237   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
34238     TEST_REQUIRES_X86_SSE2;
34239     for (uint32_t m = 1; m <= 2; m++) {
34240       GemmMicrokernelTester()
34241         .mr(2)
34242         .nr(4)
34243         .kr(8)
34244         .sr(1)
34245         .m(m)
34246         .n(4)
34247         .k(8)
34248         .iterations(1)
34249         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34250     }
34251   }
34252 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)34253   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
34254     TEST_REQUIRES_X86_SSE2;
34255     for (uint32_t n = 1; n <= 4; n++) {
34256       GemmMicrokernelTester()
34257         .mr(2)
34258         .nr(4)
34259         .kr(8)
34260         .sr(1)
34261         .m(2)
34262         .n(n)
34263         .k(8)
34264         .iterations(1)
34265         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34266     }
34267   }
34268 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)34269   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
34270     TEST_REQUIRES_X86_SSE2;
34271     for (size_t k = 1; k < 8; k++) {
34272       GemmMicrokernelTester()
34273         .mr(2)
34274         .nr(4)
34275         .kr(8)
34276         .sr(1)
34277         .m(2)
34278         .n(4)
34279         .k(k)
34280         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34281     }
34282   }
34283 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)34284   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
34285     TEST_REQUIRES_X86_SSE2;
34286     for (size_t k = 1; k < 8; k++) {
34287       for (uint32_t n = 1; n <= 4; n++) {
34288         for (uint32_t m = 1; m <= 2; m++) {
34289           GemmMicrokernelTester()
34290             .mr(2)
34291             .nr(4)
34292             .kr(8)
34293             .sr(1)
34294             .m(m)
34295             .n(n)
34296             .k(k)
34297             .iterations(1)
34298             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34299         }
34300       }
34301     }
34302   }
34303 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)34304   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
34305     TEST_REQUIRES_X86_SSE2;
34306     for (size_t k = 9; k < 16; k++) {
34307       GemmMicrokernelTester()
34308         .mr(2)
34309         .nr(4)
34310         .kr(8)
34311         .sr(1)
34312         .m(2)
34313         .n(4)
34314         .k(k)
34315         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34316     }
34317   }
34318 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)34319   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
34320     TEST_REQUIRES_X86_SSE2;
34321     for (size_t k = 9; k < 16; k++) {
34322       for (uint32_t n = 1; n <= 4; n++) {
34323         for (uint32_t m = 1; m <= 2; m++) {
34324           GemmMicrokernelTester()
34325             .mr(2)
34326             .nr(4)
34327             .kr(8)
34328             .sr(1)
34329             .m(m)
34330             .n(n)
34331             .k(k)
34332             .iterations(1)
34333             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34334         }
34335       }
34336     }
34337   }
34338 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)34339   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
34340     TEST_REQUIRES_X86_SSE2;
34341     for (size_t k = 16; k <= 80; k += 8) {
34342       GemmMicrokernelTester()
34343         .mr(2)
34344         .nr(4)
34345         .kr(8)
34346         .sr(1)
34347         .m(2)
34348         .n(4)
34349         .k(k)
34350         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34351     }
34352   }
34353 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)34354   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
34355     TEST_REQUIRES_X86_SSE2;
34356     for (size_t k = 16; k <= 80; k += 8) {
34357       for (uint32_t n = 1; n <= 4; n++) {
34358         for (uint32_t m = 1; m <= 2; m++) {
34359           GemmMicrokernelTester()
34360             .mr(2)
34361             .nr(4)
34362             .kr(8)
34363             .sr(1)
34364             .m(m)
34365             .n(n)
34366             .k(k)
34367             .iterations(1)
34368             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34369         }
34370       }
34371     }
34372   }
34373 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)34374   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
34375     TEST_REQUIRES_X86_SSE2;
34376     for (uint32_t n = 5; n < 8; n++) {
34377       for (size_t k = 1; k <= 40; k += 9) {
34378         GemmMicrokernelTester()
34379           .mr(2)
34380           .nr(4)
34381           .kr(8)
34382           .sr(1)
34383           .m(2)
34384           .n(n)
34385           .k(k)
34386           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34387       }
34388     }
34389   }
34390 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)34391   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
34392     TEST_REQUIRES_X86_SSE2;
34393     for (uint32_t n = 5; n < 8; n++) {
34394       for (size_t k = 1; k <= 40; k += 9) {
34395         GemmMicrokernelTester()
34396           .mr(2)
34397           .nr(4)
34398           .kr(8)
34399           .sr(1)
34400           .m(2)
34401           .n(n)
34402           .k(k)
34403           .cn_stride(7)
34404           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34405       }
34406     }
34407   }
34408 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)34409   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
34410     TEST_REQUIRES_X86_SSE2;
34411     for (uint32_t n = 5; n < 8; n++) {
34412       for (size_t k = 1; k <= 40; k += 9) {
34413         for (uint32_t m = 1; m <= 2; m++) {
34414           GemmMicrokernelTester()
34415             .mr(2)
34416             .nr(4)
34417             .kr(8)
34418             .sr(1)
34419             .m(m)
34420             .n(n)
34421             .k(k)
34422             .iterations(1)
34423             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34424         }
34425       }
34426     }
34427   }
34428 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)34429   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
34430     TEST_REQUIRES_X86_SSE2;
34431     for (uint32_t n = 8; n <= 12; n += 4) {
34432       for (size_t k = 1; k <= 40; k += 9) {
34433         GemmMicrokernelTester()
34434           .mr(2)
34435           .nr(4)
34436           .kr(8)
34437           .sr(1)
34438           .m(2)
34439           .n(n)
34440           .k(k)
34441           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34442       }
34443     }
34444   }
34445 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)34446   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
34447     TEST_REQUIRES_X86_SSE2;
34448     for (uint32_t n = 8; n <= 12; n += 4) {
34449       for (size_t k = 1; k <= 40; k += 9) {
34450         GemmMicrokernelTester()
34451           .mr(2)
34452           .nr(4)
34453           .kr(8)
34454           .sr(1)
34455           .m(2)
34456           .n(n)
34457           .k(k)
34458           .cn_stride(7)
34459           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34460       }
34461     }
34462   }
34463 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)34464   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
34465     TEST_REQUIRES_X86_SSE2;
34466     for (uint32_t n = 8; n <= 12; n += 4) {
34467       for (size_t k = 1; k <= 40; k += 9) {
34468         for (uint32_t m = 1; m <= 2; m++) {
34469           GemmMicrokernelTester()
34470             .mr(2)
34471             .nr(4)
34472             .kr(8)
34473             .sr(1)
34474             .m(m)
34475             .n(n)
34476             .k(k)
34477             .iterations(1)
34478             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34479         }
34480       }
34481     }
34482   }
34483 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)34484   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
34485     TEST_REQUIRES_X86_SSE2;
34486     for (size_t k = 1; k <= 40; k += 9) {
34487       GemmMicrokernelTester()
34488         .mr(2)
34489         .nr(4)
34490         .kr(8)
34491         .sr(1)
34492         .m(2)
34493         .n(4)
34494         .k(k)
34495         .ks(3)
34496         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34497     }
34498   }
34499 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)34500   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
34501     TEST_REQUIRES_X86_SSE2;
34502     for (size_t k = 1; k <= 40; k += 9) {
34503       for (uint32_t n = 1; n <= 4; n++) {
34504         for (uint32_t m = 1; m <= 2; m++) {
34505           GemmMicrokernelTester()
34506             .mr(2)
34507             .nr(4)
34508             .kr(8)
34509             .sr(1)
34510             .m(m)
34511             .n(n)
34512             .k(k)
34513             .ks(3)
34514             .iterations(1)
34515             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34516         }
34517       }
34518     }
34519   }
34520 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)34521   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34522     TEST_REQUIRES_X86_SSE2;
34523     for (uint32_t n = 5; n < 8; n++) {
34524       for (size_t k = 1; k <= 40; k += 9) {
34525         GemmMicrokernelTester()
34526           .mr(2)
34527           .nr(4)
34528           .kr(8)
34529           .sr(1)
34530           .m(2)
34531           .n(n)
34532           .k(k)
34533           .ks(3)
34534           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34535       }
34536     }
34537   }
34538 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)34539   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
34540     TEST_REQUIRES_X86_SSE2;
34541     for (uint32_t n = 8; n <= 12; n += 4) {
34542       for (size_t k = 1; k <= 40; k += 9) {
34543         GemmMicrokernelTester()
34544           .mr(2)
34545           .nr(4)
34546           .kr(8)
34547           .sr(1)
34548           .m(2)
34549           .n(n)
34550           .k(k)
34551           .ks(3)
34552           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34553       }
34554     }
34555   }
34556 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)34557   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
34558     TEST_REQUIRES_X86_SSE2;
34559     for (size_t k = 1; k <= 40; k += 9) {
34560       for (uint32_t n = 1; n <= 4; n++) {
34561         for (uint32_t m = 1; m <= 2; m++) {
34562           GemmMicrokernelTester()
34563             .mr(2)
34564             .nr(4)
34565             .kr(8)
34566             .sr(1)
34567             .m(m)
34568             .n(n)
34569             .k(k)
34570             .cm_stride(7)
34571             .iterations(1)
34572             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34573         }
34574       }
34575     }
34576   }
34577 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)34578   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
34579     TEST_REQUIRES_X86_SSE2;
34580     for (size_t k = 1; k <= 40; k += 9) {
34581       GemmMicrokernelTester()
34582         .mr(2)
34583         .nr(4)
34584         .kr(8)
34585         .sr(1)
34586         .m(2)
34587         .n(4)
34588         .k(k)
34589         .ks(3)
34590         .a_offset(83)
34591         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34592     }
34593   }
34594 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)34595   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
34596     TEST_REQUIRES_X86_SSE2;
34597     for (size_t k = 1; k <= 40; k += 9) {
34598       for (uint32_t mz = 0; mz < 2; mz++) {
34599         GemmMicrokernelTester()
34600           .mr(2)
34601           .nr(4)
34602           .kr(8)
34603           .sr(1)
34604           .m(2)
34605           .n(4)
34606           .k(k)
34607           .ks(3)
34608           .a_offset(83)
34609           .zero_index(mz)
34610           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34611       }
34612     }
34613   }
34614 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)34615   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
34616     TEST_REQUIRES_X86_SSE2;
34617     GemmMicrokernelTester()
34618       .mr(2)
34619       .nr(4)
34620       .kr(8)
34621       .sr(1)
34622       .m(2)
34623       .n(4)
34624       .k(8)
34625       .qmin(128)
34626       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34627   }
34628 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)34629   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
34630     TEST_REQUIRES_X86_SSE2;
34631     GemmMicrokernelTester()
34632       .mr(2)
34633       .nr(4)
34634       .kr(8)
34635       .sr(1)
34636       .m(2)
34637       .n(4)
34638       .k(8)
34639       .qmax(128)
34640       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34641   }
34642 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)34643   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
34644     TEST_REQUIRES_X86_SSE2;
34645     GemmMicrokernelTester()
34646       .mr(2)
34647       .nr(4)
34648       .kr(8)
34649       .sr(1)
34650       .m(2)
34651       .n(4)
34652       .k(8)
34653       .cm_stride(7)
34654       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34655   }
34656 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
34657 
34658 
34659 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8)34660   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8) {
34661     TEST_REQUIRES_X86_SSSE3;
34662     GemmMicrokernelTester()
34663       .mr(3)
34664       .nr(4)
34665       .kr(8)
34666       .sr(1)
34667       .m(3)
34668       .n(4)
34669       .k(8)
34670       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34671   }
34672 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cn)34673   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cn) {
34674     TEST_REQUIRES_X86_SSSE3;
34675     GemmMicrokernelTester()
34676       .mr(3)
34677       .nr(4)
34678       .kr(8)
34679       .sr(1)
34680       .m(3)
34681       .n(4)
34682       .k(8)
34683       .cn_stride(7)
34684       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34685   }
34686 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile)34687   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
34688     TEST_REQUIRES_X86_SSSE3;
34689     for (uint32_t n = 1; n <= 4; n++) {
34690       for (uint32_t m = 1; m <= 3; m++) {
34691         GemmMicrokernelTester()
34692           .mr(3)
34693           .nr(4)
34694           .kr(8)
34695           .sr(1)
34696           .m(m)
34697           .n(n)
34698           .k(8)
34699           .iterations(1)
34700           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34701       }
34702     }
34703   }
34704 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile_m)34705   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
34706     TEST_REQUIRES_X86_SSSE3;
34707     for (uint32_t m = 1; m <= 3; m++) {
34708       GemmMicrokernelTester()
34709         .mr(3)
34710         .nr(4)
34711         .kr(8)
34712         .sr(1)
34713         .m(m)
34714         .n(4)
34715         .k(8)
34716         .iterations(1)
34717         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34718     }
34719   }
34720 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile_n)34721   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
34722     TEST_REQUIRES_X86_SSSE3;
34723     for (uint32_t n = 1; n <= 4; n++) {
34724       GemmMicrokernelTester()
34725         .mr(3)
34726         .nr(4)
34727         .kr(8)
34728         .sr(1)
34729         .m(3)
34730         .n(n)
34731         .k(8)
34732         .iterations(1)
34733         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34734     }
34735   }
34736 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_lt_8)34737   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8) {
34738     TEST_REQUIRES_X86_SSSE3;
34739     for (size_t k = 1; k < 8; k++) {
34740       GemmMicrokernelTester()
34741         .mr(3)
34742         .nr(4)
34743         .kr(8)
34744         .sr(1)
34745         .m(3)
34746         .n(4)
34747         .k(k)
34748         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34749     }
34750   }
34751 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_lt_8_subtile)34752   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
34753     TEST_REQUIRES_X86_SSSE3;
34754     for (size_t k = 1; k < 8; k++) {
34755       for (uint32_t n = 1; n <= 4; n++) {
34756         for (uint32_t m = 1; m <= 3; m++) {
34757           GemmMicrokernelTester()
34758             .mr(3)
34759             .nr(4)
34760             .kr(8)
34761             .sr(1)
34762             .m(m)
34763             .n(n)
34764             .k(k)
34765             .iterations(1)
34766             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34767         }
34768       }
34769     }
34770   }
34771 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_gt_8)34772   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8) {
34773     TEST_REQUIRES_X86_SSSE3;
34774     for (size_t k = 9; k < 16; k++) {
34775       GemmMicrokernelTester()
34776         .mr(3)
34777         .nr(4)
34778         .kr(8)
34779         .sr(1)
34780         .m(3)
34781         .n(4)
34782         .k(k)
34783         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34784     }
34785   }
34786 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_gt_8_subtile)34787   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
34788     TEST_REQUIRES_X86_SSSE3;
34789     for (size_t k = 9; k < 16; k++) {
34790       for (uint32_t n = 1; n <= 4; n++) {
34791         for (uint32_t m = 1; m <= 3; m++) {
34792           GemmMicrokernelTester()
34793             .mr(3)
34794             .nr(4)
34795             .kr(8)
34796             .sr(1)
34797             .m(m)
34798             .n(n)
34799             .k(k)
34800             .iterations(1)
34801             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34802         }
34803       }
34804     }
34805   }
34806 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_div_8)34807   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8) {
34808     TEST_REQUIRES_X86_SSSE3;
34809     for (size_t k = 16; k <= 80; k += 8) {
34810       GemmMicrokernelTester()
34811         .mr(3)
34812         .nr(4)
34813         .kr(8)
34814         .sr(1)
34815         .m(3)
34816         .n(4)
34817         .k(k)
34818         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34819     }
34820   }
34821 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_div_8_subtile)34822   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8_subtile) {
34823     TEST_REQUIRES_X86_SSSE3;
34824     for (size_t k = 16; k <= 80; k += 8) {
34825       for (uint32_t n = 1; n <= 4; n++) {
34826         for (uint32_t m = 1; m <= 3; m++) {
34827           GemmMicrokernelTester()
34828             .mr(3)
34829             .nr(4)
34830             .kr(8)
34831             .sr(1)
34832             .m(m)
34833             .n(n)
34834             .k(k)
34835             .iterations(1)
34836             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34837         }
34838       }
34839     }
34840   }
34841 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4)34842   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4) {
34843     TEST_REQUIRES_X86_SSSE3;
34844     for (uint32_t n = 5; n < 8; n++) {
34845       for (size_t k = 1; k <= 40; k += 9) {
34846         GemmMicrokernelTester()
34847           .mr(3)
34848           .nr(4)
34849           .kr(8)
34850           .sr(1)
34851           .m(3)
34852           .n(n)
34853           .k(k)
34854           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34855       }
34856     }
34857   }
34858 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_strided_cn)34859   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
34860     TEST_REQUIRES_X86_SSSE3;
34861     for (uint32_t n = 5; n < 8; n++) {
34862       for (size_t k = 1; k <= 40; k += 9) {
34863         GemmMicrokernelTester()
34864           .mr(3)
34865           .nr(4)
34866           .kr(8)
34867           .sr(1)
34868           .m(3)
34869           .n(n)
34870           .k(k)
34871           .cn_stride(7)
34872           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34873       }
34874     }
34875   }
34876 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_subtile)34877   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
34878     TEST_REQUIRES_X86_SSSE3;
34879     for (uint32_t n = 5; n < 8; n++) {
34880       for (size_t k = 1; k <= 40; k += 9) {
34881         for (uint32_t m = 1; m <= 3; m++) {
34882           GemmMicrokernelTester()
34883             .mr(3)
34884             .nr(4)
34885             .kr(8)
34886             .sr(1)
34887             .m(m)
34888             .n(n)
34889             .k(k)
34890             .iterations(1)
34891             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34892         }
34893       }
34894     }
34895   }
34896 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4)34897   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4) {
34898     TEST_REQUIRES_X86_SSSE3;
34899     for (uint32_t n = 8; n <= 12; n += 4) {
34900       for (size_t k = 1; k <= 40; k += 9) {
34901         GemmMicrokernelTester()
34902           .mr(3)
34903           .nr(4)
34904           .kr(8)
34905           .sr(1)
34906           .m(3)
34907           .n(n)
34908           .k(k)
34909           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34910       }
34911     }
34912   }
34913 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_strided_cn)34914   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
34915     TEST_REQUIRES_X86_SSSE3;
34916     for (uint32_t n = 8; n <= 12; n += 4) {
34917       for (size_t k = 1; k <= 40; k += 9) {
34918         GemmMicrokernelTester()
34919           .mr(3)
34920           .nr(4)
34921           .kr(8)
34922           .sr(1)
34923           .m(3)
34924           .n(n)
34925           .k(k)
34926           .cn_stride(7)
34927           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34928       }
34929     }
34930   }
34931 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_subtile)34932   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_subtile) {
34933     TEST_REQUIRES_X86_SSSE3;
34934     for (uint32_t n = 8; n <= 12; n += 4) {
34935       for (size_t k = 1; k <= 40; k += 9) {
34936         for (uint32_t m = 1; m <= 3; m++) {
34937           GemmMicrokernelTester()
34938             .mr(3)
34939             .nr(4)
34940             .kr(8)
34941             .sr(1)
34942             .m(m)
34943             .n(n)
34944             .k(k)
34945             .iterations(1)
34946             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34947         }
34948       }
34949     }
34950   }
34951 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,small_kernel)34952   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel) {
34953     TEST_REQUIRES_X86_SSSE3;
34954     for (size_t k = 1; k <= 40; k += 9) {
34955       GemmMicrokernelTester()
34956         .mr(3)
34957         .nr(4)
34958         .kr(8)
34959         .sr(1)
34960         .m(3)
34961         .n(4)
34962         .k(k)
34963         .ks(3)
34964         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34965     }
34966   }
34967 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,small_kernel_subtile)34968   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel_subtile) {
34969     TEST_REQUIRES_X86_SSSE3;
34970     for (size_t k = 1; k <= 40; k += 9) {
34971       for (uint32_t n = 1; n <= 4; n++) {
34972         for (uint32_t m = 1; m <= 3; m++) {
34973           GemmMicrokernelTester()
34974             .mr(3)
34975             .nr(4)
34976             .kr(8)
34977             .sr(1)
34978             .m(m)
34979             .n(n)
34980             .k(k)
34981             .ks(3)
34982             .iterations(1)
34983             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34984         }
34985       }
34986     }
34987   }
34988 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_small_kernel)34989   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
34990     TEST_REQUIRES_X86_SSSE3;
34991     for (uint32_t n = 5; n < 8; n++) {
34992       for (size_t k = 1; k <= 40; k += 9) {
34993         GemmMicrokernelTester()
34994           .mr(3)
34995           .nr(4)
34996           .kr(8)
34997           .sr(1)
34998           .m(3)
34999           .n(n)
35000           .k(k)
35001           .ks(3)
35002           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35003       }
35004     }
35005   }
35006 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_small_kernel)35007   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_small_kernel) {
35008     TEST_REQUIRES_X86_SSSE3;
35009     for (uint32_t n = 8; n <= 12; n += 4) {
35010       for (size_t k = 1; k <= 40; k += 9) {
35011         GemmMicrokernelTester()
35012           .mr(3)
35013           .nr(4)
35014           .kr(8)
35015           .sr(1)
35016           .m(3)
35017           .n(n)
35018           .k(k)
35019           .ks(3)
35020           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35021       }
35022     }
35023   }
35024 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cm_subtile)35025   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm_subtile) {
35026     TEST_REQUIRES_X86_SSSE3;
35027     for (size_t k = 1; k <= 40; k += 9) {
35028       for (uint32_t n = 1; n <= 4; n++) {
35029         for (uint32_t m = 1; m <= 3; m++) {
35030           GemmMicrokernelTester()
35031             .mr(3)
35032             .nr(4)
35033             .kr(8)
35034             .sr(1)
35035             .m(m)
35036             .n(n)
35037             .k(k)
35038             .cm_stride(7)
35039             .iterations(1)
35040             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35041         }
35042       }
35043     }
35044   }
35045 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,a_offset)35046   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, a_offset) {
35047     TEST_REQUIRES_X86_SSSE3;
35048     for (size_t k = 1; k <= 40; k += 9) {
35049       GemmMicrokernelTester()
35050         .mr(3)
35051         .nr(4)
35052         .kr(8)
35053         .sr(1)
35054         .m(3)
35055         .n(4)
35056         .k(k)
35057         .ks(3)
35058         .a_offset(127)
35059         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35060     }
35061   }
35062 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,zero)35063   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, zero) {
35064     TEST_REQUIRES_X86_SSSE3;
35065     for (size_t k = 1; k <= 40; k += 9) {
35066       for (uint32_t mz = 0; mz < 3; mz++) {
35067         GemmMicrokernelTester()
35068           .mr(3)
35069           .nr(4)
35070           .kr(8)
35071           .sr(1)
35072           .m(3)
35073           .n(4)
35074           .k(k)
35075           .ks(3)
35076           .a_offset(127)
35077           .zero_index(mz)
35078           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35079       }
35080     }
35081   }
35082 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,qmin)35083   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmin) {
35084     TEST_REQUIRES_X86_SSSE3;
35085     GemmMicrokernelTester()
35086       .mr(3)
35087       .nr(4)
35088       .kr(8)
35089       .sr(1)
35090       .m(3)
35091       .n(4)
35092       .k(8)
35093       .qmin(128)
35094       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35095   }
35096 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,qmax)35097   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmax) {
35098     TEST_REQUIRES_X86_SSSE3;
35099     GemmMicrokernelTester()
35100       .mr(3)
35101       .nr(4)
35102       .kr(8)
35103       .sr(1)
35104       .m(3)
35105       .n(4)
35106       .k(8)
35107       .qmax(128)
35108       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35109   }
35110 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cm)35111   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm) {
35112     TEST_REQUIRES_X86_SSSE3;
35113     GemmMicrokernelTester()
35114       .mr(3)
35115       .nr(4)
35116       .kr(8)
35117       .sr(1)
35118       .m(3)
35119       .n(4)
35120       .k(8)
35121       .cm_stride(7)
35122       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35123   }
35124 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
35125 
35126 
35127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)35128   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
35129     TEST_REQUIRES_X86_SSE41;
35130     GemmMicrokernelTester()
35131       .mr(3)
35132       .nr(4)
35133       .kr(8)
35134       .sr(1)
35135       .m(3)
35136       .n(4)
35137       .k(8)
35138       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35139   }
35140 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)35141   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
35142     TEST_REQUIRES_X86_SSE41;
35143     GemmMicrokernelTester()
35144       .mr(3)
35145       .nr(4)
35146       .kr(8)
35147       .sr(1)
35148       .m(3)
35149       .n(4)
35150       .k(8)
35151       .cn_stride(7)
35152       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35153   }
35154 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)35155   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
35156     TEST_REQUIRES_X86_SSE41;
35157     for (uint32_t n = 1; n <= 4; n++) {
35158       for (uint32_t m = 1; m <= 3; m++) {
35159         GemmMicrokernelTester()
35160           .mr(3)
35161           .nr(4)
35162           .kr(8)
35163           .sr(1)
35164           .m(m)
35165           .n(n)
35166           .k(8)
35167           .iterations(1)
35168           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35169       }
35170     }
35171   }
35172 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)35173   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
35174     TEST_REQUIRES_X86_SSE41;
35175     for (uint32_t m = 1; m <= 3; m++) {
35176       GemmMicrokernelTester()
35177         .mr(3)
35178         .nr(4)
35179         .kr(8)
35180         .sr(1)
35181         .m(m)
35182         .n(4)
35183         .k(8)
35184         .iterations(1)
35185         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35186     }
35187   }
35188 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)35189   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
35190     TEST_REQUIRES_X86_SSE41;
35191     for (uint32_t n = 1; n <= 4; n++) {
35192       GemmMicrokernelTester()
35193         .mr(3)
35194         .nr(4)
35195         .kr(8)
35196         .sr(1)
35197         .m(3)
35198         .n(n)
35199         .k(8)
35200         .iterations(1)
35201         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35202     }
35203   }
35204 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)35205   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
35206     TEST_REQUIRES_X86_SSE41;
35207     for (size_t k = 1; k < 8; k++) {
35208       GemmMicrokernelTester()
35209         .mr(3)
35210         .nr(4)
35211         .kr(8)
35212         .sr(1)
35213         .m(3)
35214         .n(4)
35215         .k(k)
35216         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35217     }
35218   }
35219 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)35220   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
35221     TEST_REQUIRES_X86_SSE41;
35222     for (size_t k = 1; k < 8; k++) {
35223       for (uint32_t n = 1; n <= 4; n++) {
35224         for (uint32_t m = 1; m <= 3; m++) {
35225           GemmMicrokernelTester()
35226             .mr(3)
35227             .nr(4)
35228             .kr(8)
35229             .sr(1)
35230             .m(m)
35231             .n(n)
35232             .k(k)
35233             .iterations(1)
35234             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35235         }
35236       }
35237     }
35238   }
35239 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)35240   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
35241     TEST_REQUIRES_X86_SSE41;
35242     for (size_t k = 9; k < 16; k++) {
35243       GemmMicrokernelTester()
35244         .mr(3)
35245         .nr(4)
35246         .kr(8)
35247         .sr(1)
35248         .m(3)
35249         .n(4)
35250         .k(k)
35251         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35252     }
35253   }
35254 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)35255   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
35256     TEST_REQUIRES_X86_SSE41;
35257     for (size_t k = 9; k < 16; k++) {
35258       for (uint32_t n = 1; n <= 4; n++) {
35259         for (uint32_t m = 1; m <= 3; m++) {
35260           GemmMicrokernelTester()
35261             .mr(3)
35262             .nr(4)
35263             .kr(8)
35264             .sr(1)
35265             .m(m)
35266             .n(n)
35267             .k(k)
35268             .iterations(1)
35269             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35270         }
35271       }
35272     }
35273   }
35274 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)35275   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
35276     TEST_REQUIRES_X86_SSE41;
35277     for (size_t k = 16; k <= 80; k += 8) {
35278       GemmMicrokernelTester()
35279         .mr(3)
35280         .nr(4)
35281         .kr(8)
35282         .sr(1)
35283         .m(3)
35284         .n(4)
35285         .k(k)
35286         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35287     }
35288   }
35289 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)35290   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
35291     TEST_REQUIRES_X86_SSE41;
35292     for (size_t k = 16; k <= 80; k += 8) {
35293       for (uint32_t n = 1; n <= 4; n++) {
35294         for (uint32_t m = 1; m <= 3; m++) {
35295           GemmMicrokernelTester()
35296             .mr(3)
35297             .nr(4)
35298             .kr(8)
35299             .sr(1)
35300             .m(m)
35301             .n(n)
35302             .k(k)
35303             .iterations(1)
35304             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35305         }
35306       }
35307     }
35308   }
35309 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)35310   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
35311     TEST_REQUIRES_X86_SSE41;
35312     for (uint32_t n = 5; n < 8; n++) {
35313       for (size_t k = 1; k <= 40; k += 9) {
35314         GemmMicrokernelTester()
35315           .mr(3)
35316           .nr(4)
35317           .kr(8)
35318           .sr(1)
35319           .m(3)
35320           .n(n)
35321           .k(k)
35322           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35323       }
35324     }
35325   }
35326 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)35327   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
35328     TEST_REQUIRES_X86_SSE41;
35329     for (uint32_t n = 5; n < 8; n++) {
35330       for (size_t k = 1; k <= 40; k += 9) {
35331         GemmMicrokernelTester()
35332           .mr(3)
35333           .nr(4)
35334           .kr(8)
35335           .sr(1)
35336           .m(3)
35337           .n(n)
35338           .k(k)
35339           .cn_stride(7)
35340           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35341       }
35342     }
35343   }
35344 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)35345   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
35346     TEST_REQUIRES_X86_SSE41;
35347     for (uint32_t n = 5; n < 8; n++) {
35348       for (size_t k = 1; k <= 40; k += 9) {
35349         for (uint32_t m = 1; m <= 3; m++) {
35350           GemmMicrokernelTester()
35351             .mr(3)
35352             .nr(4)
35353             .kr(8)
35354             .sr(1)
35355             .m(m)
35356             .n(n)
35357             .k(k)
35358             .iterations(1)
35359             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35360         }
35361       }
35362     }
35363   }
35364 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)35365   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
35366     TEST_REQUIRES_X86_SSE41;
35367     for (uint32_t n = 8; n <= 12; n += 4) {
35368       for (size_t k = 1; k <= 40; k += 9) {
35369         GemmMicrokernelTester()
35370           .mr(3)
35371           .nr(4)
35372           .kr(8)
35373           .sr(1)
35374           .m(3)
35375           .n(n)
35376           .k(k)
35377           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35378       }
35379     }
35380   }
35381 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)35382   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
35383     TEST_REQUIRES_X86_SSE41;
35384     for (uint32_t n = 8; n <= 12; n += 4) {
35385       for (size_t k = 1; k <= 40; k += 9) {
35386         GemmMicrokernelTester()
35387           .mr(3)
35388           .nr(4)
35389           .kr(8)
35390           .sr(1)
35391           .m(3)
35392           .n(n)
35393           .k(k)
35394           .cn_stride(7)
35395           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35396       }
35397     }
35398   }
35399 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)35400   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
35401     TEST_REQUIRES_X86_SSE41;
35402     for (uint32_t n = 8; n <= 12; n += 4) {
35403       for (size_t k = 1; k <= 40; k += 9) {
35404         for (uint32_t m = 1; m <= 3; m++) {
35405           GemmMicrokernelTester()
35406             .mr(3)
35407             .nr(4)
35408             .kr(8)
35409             .sr(1)
35410             .m(m)
35411             .n(n)
35412             .k(k)
35413             .iterations(1)
35414             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35415         }
35416       }
35417     }
35418   }
35419 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)35420   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
35421     TEST_REQUIRES_X86_SSE41;
35422     for (size_t k = 1; k <= 40; k += 9) {
35423       GemmMicrokernelTester()
35424         .mr(3)
35425         .nr(4)
35426         .kr(8)
35427         .sr(1)
35428         .m(3)
35429         .n(4)
35430         .k(k)
35431         .ks(3)
35432         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35433     }
35434   }
35435 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)35436   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
35437     TEST_REQUIRES_X86_SSE41;
35438     for (size_t k = 1; k <= 40; k += 9) {
35439       for (uint32_t n = 1; n <= 4; n++) {
35440         for (uint32_t m = 1; m <= 3; m++) {
35441           GemmMicrokernelTester()
35442             .mr(3)
35443             .nr(4)
35444             .kr(8)
35445             .sr(1)
35446             .m(m)
35447             .n(n)
35448             .k(k)
35449             .ks(3)
35450             .iterations(1)
35451             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35452         }
35453       }
35454     }
35455   }
35456 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)35457   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
35458     TEST_REQUIRES_X86_SSE41;
35459     for (uint32_t n = 5; n < 8; n++) {
35460       for (size_t k = 1; k <= 40; k += 9) {
35461         GemmMicrokernelTester()
35462           .mr(3)
35463           .nr(4)
35464           .kr(8)
35465           .sr(1)
35466           .m(3)
35467           .n(n)
35468           .k(k)
35469           .ks(3)
35470           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35471       }
35472     }
35473   }
35474 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)35475   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
35476     TEST_REQUIRES_X86_SSE41;
35477     for (uint32_t n = 8; n <= 12; n += 4) {
35478       for (size_t k = 1; k <= 40; k += 9) {
35479         GemmMicrokernelTester()
35480           .mr(3)
35481           .nr(4)
35482           .kr(8)
35483           .sr(1)
35484           .m(3)
35485           .n(n)
35486           .k(k)
35487           .ks(3)
35488           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35489       }
35490     }
35491   }
35492 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)35493   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
35494     TEST_REQUIRES_X86_SSE41;
35495     for (size_t k = 1; k <= 40; k += 9) {
35496       for (uint32_t n = 1; n <= 4; n++) {
35497         for (uint32_t m = 1; m <= 3; m++) {
35498           GemmMicrokernelTester()
35499             .mr(3)
35500             .nr(4)
35501             .kr(8)
35502             .sr(1)
35503             .m(m)
35504             .n(n)
35505             .k(k)
35506             .cm_stride(7)
35507             .iterations(1)
35508             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35509         }
35510       }
35511     }
35512   }
35513 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)35514   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
35515     TEST_REQUIRES_X86_SSE41;
35516     for (size_t k = 1; k <= 40; k += 9) {
35517       GemmMicrokernelTester()
35518         .mr(3)
35519         .nr(4)
35520         .kr(8)
35521         .sr(1)
35522         .m(3)
35523         .n(4)
35524         .k(k)
35525         .ks(3)
35526         .a_offset(127)
35527         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35528     }
35529   }
35530 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)35531   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
35532     TEST_REQUIRES_X86_SSE41;
35533     for (size_t k = 1; k <= 40; k += 9) {
35534       for (uint32_t mz = 0; mz < 3; mz++) {
35535         GemmMicrokernelTester()
35536           .mr(3)
35537           .nr(4)
35538           .kr(8)
35539           .sr(1)
35540           .m(3)
35541           .n(4)
35542           .k(k)
35543           .ks(3)
35544           .a_offset(127)
35545           .zero_index(mz)
35546           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35547       }
35548     }
35549   }
35550 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)35551   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
35552     TEST_REQUIRES_X86_SSE41;
35553     GemmMicrokernelTester()
35554       .mr(3)
35555       .nr(4)
35556       .kr(8)
35557       .sr(1)
35558       .m(3)
35559       .n(4)
35560       .k(8)
35561       .qmin(128)
35562       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35563   }
35564 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)35565   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
35566     TEST_REQUIRES_X86_SSE41;
35567     GemmMicrokernelTester()
35568       .mr(3)
35569       .nr(4)
35570       .kr(8)
35571       .sr(1)
35572       .m(3)
35573       .n(4)
35574       .k(8)
35575       .qmax(128)
35576       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35577   }
35578 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)35579   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
35580     TEST_REQUIRES_X86_SSE41;
35581     GemmMicrokernelTester()
35582       .mr(3)
35583       .nr(4)
35584       .kr(8)
35585       .sr(1)
35586       .m(3)
35587       .n(4)
35588       .k(8)
35589       .cm_stride(7)
35590       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35591   }
35592 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
35593 
35594 
35595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)35596   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
35597     TEST_REQUIRES_X86_AVX;
35598     GemmMicrokernelTester()
35599       .mr(1)
35600       .nr(4)
35601       .kr(8)
35602       .sr(1)
35603       .m(1)
35604       .n(4)
35605       .k(8)
35606       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35607   }
35608 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)35609   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
35610     TEST_REQUIRES_X86_AVX;
35611     GemmMicrokernelTester()
35612       .mr(1)
35613       .nr(4)
35614       .kr(8)
35615       .sr(1)
35616       .m(1)
35617       .n(4)
35618       .k(8)
35619       .cn_stride(7)
35620       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35621   }
35622 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)35623   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
35624     TEST_REQUIRES_X86_AVX;
35625     for (uint32_t n = 1; n <= 4; n++) {
35626       for (uint32_t m = 1; m <= 1; m++) {
35627         GemmMicrokernelTester()
35628           .mr(1)
35629           .nr(4)
35630           .kr(8)
35631           .sr(1)
35632           .m(m)
35633           .n(n)
35634           .k(8)
35635           .iterations(1)
35636           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35637       }
35638     }
35639   }
35640 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)35641   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
35642     TEST_REQUIRES_X86_AVX;
35643     for (uint32_t m = 1; m <= 1; m++) {
35644       GemmMicrokernelTester()
35645         .mr(1)
35646         .nr(4)
35647         .kr(8)
35648         .sr(1)
35649         .m(m)
35650         .n(4)
35651         .k(8)
35652         .iterations(1)
35653         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35654     }
35655   }
35656 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)35657   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
35658     TEST_REQUIRES_X86_AVX;
35659     for (uint32_t n = 1; n <= 4; n++) {
35660       GemmMicrokernelTester()
35661         .mr(1)
35662         .nr(4)
35663         .kr(8)
35664         .sr(1)
35665         .m(1)
35666         .n(n)
35667         .k(8)
35668         .iterations(1)
35669         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35670     }
35671   }
35672 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)35673   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
35674     TEST_REQUIRES_X86_AVX;
35675     for (size_t k = 1; k < 8; k++) {
35676       GemmMicrokernelTester()
35677         .mr(1)
35678         .nr(4)
35679         .kr(8)
35680         .sr(1)
35681         .m(1)
35682         .n(4)
35683         .k(k)
35684         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35685     }
35686   }
35687 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)35688   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
35689     TEST_REQUIRES_X86_AVX;
35690     for (size_t k = 1; k < 8; k++) {
35691       for (uint32_t n = 1; n <= 4; n++) {
35692         for (uint32_t m = 1; m <= 1; m++) {
35693           GemmMicrokernelTester()
35694             .mr(1)
35695             .nr(4)
35696             .kr(8)
35697             .sr(1)
35698             .m(m)
35699             .n(n)
35700             .k(k)
35701             .iterations(1)
35702             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35703         }
35704       }
35705     }
35706   }
35707 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)35708   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
35709     TEST_REQUIRES_X86_AVX;
35710     for (size_t k = 9; k < 16; k++) {
35711       GemmMicrokernelTester()
35712         .mr(1)
35713         .nr(4)
35714         .kr(8)
35715         .sr(1)
35716         .m(1)
35717         .n(4)
35718         .k(k)
35719         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35720     }
35721   }
35722 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)35723   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
35724     TEST_REQUIRES_X86_AVX;
35725     for (size_t k = 9; k < 16; k++) {
35726       for (uint32_t n = 1; n <= 4; n++) {
35727         for (uint32_t m = 1; m <= 1; m++) {
35728           GemmMicrokernelTester()
35729             .mr(1)
35730             .nr(4)
35731             .kr(8)
35732             .sr(1)
35733             .m(m)
35734             .n(n)
35735             .k(k)
35736             .iterations(1)
35737             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35738         }
35739       }
35740     }
35741   }
35742 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)35743   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
35744     TEST_REQUIRES_X86_AVX;
35745     for (size_t k = 16; k <= 80; k += 8) {
35746       GemmMicrokernelTester()
35747         .mr(1)
35748         .nr(4)
35749         .kr(8)
35750         .sr(1)
35751         .m(1)
35752         .n(4)
35753         .k(k)
35754         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35755     }
35756   }
35757 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)35758   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
35759     TEST_REQUIRES_X86_AVX;
35760     for (size_t k = 16; k <= 80; k += 8) {
35761       for (uint32_t n = 1; n <= 4; n++) {
35762         for (uint32_t m = 1; m <= 1; m++) {
35763           GemmMicrokernelTester()
35764             .mr(1)
35765             .nr(4)
35766             .kr(8)
35767             .sr(1)
35768             .m(m)
35769             .n(n)
35770             .k(k)
35771             .iterations(1)
35772             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35773         }
35774       }
35775     }
35776   }
35777 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)35778   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
35779     TEST_REQUIRES_X86_AVX;
35780     for (uint32_t n = 5; n < 8; n++) {
35781       for (size_t k = 1; k <= 40; k += 9) {
35782         GemmMicrokernelTester()
35783           .mr(1)
35784           .nr(4)
35785           .kr(8)
35786           .sr(1)
35787           .m(1)
35788           .n(n)
35789           .k(k)
35790           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35791       }
35792     }
35793   }
35794 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)35795   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
35796     TEST_REQUIRES_X86_AVX;
35797     for (uint32_t n = 5; n < 8; n++) {
35798       for (size_t k = 1; k <= 40; k += 9) {
35799         GemmMicrokernelTester()
35800           .mr(1)
35801           .nr(4)
35802           .kr(8)
35803           .sr(1)
35804           .m(1)
35805           .n(n)
35806           .k(k)
35807           .cn_stride(7)
35808           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35809       }
35810     }
35811   }
35812 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)35813   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
35814     TEST_REQUIRES_X86_AVX;
35815     for (uint32_t n = 5; n < 8; n++) {
35816       for (size_t k = 1; k <= 40; k += 9) {
35817         for (uint32_t m = 1; m <= 1; m++) {
35818           GemmMicrokernelTester()
35819             .mr(1)
35820             .nr(4)
35821             .kr(8)
35822             .sr(1)
35823             .m(m)
35824             .n(n)
35825             .k(k)
35826             .iterations(1)
35827             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35828         }
35829       }
35830     }
35831   }
35832 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)35833   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
35834     TEST_REQUIRES_X86_AVX;
35835     for (uint32_t n = 8; n <= 12; n += 4) {
35836       for (size_t k = 1; k <= 40; k += 9) {
35837         GemmMicrokernelTester()
35838           .mr(1)
35839           .nr(4)
35840           .kr(8)
35841           .sr(1)
35842           .m(1)
35843           .n(n)
35844           .k(k)
35845           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35846       }
35847     }
35848   }
35849 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)35850   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
35851     TEST_REQUIRES_X86_AVX;
35852     for (uint32_t n = 8; n <= 12; n += 4) {
35853       for (size_t k = 1; k <= 40; k += 9) {
35854         GemmMicrokernelTester()
35855           .mr(1)
35856           .nr(4)
35857           .kr(8)
35858           .sr(1)
35859           .m(1)
35860           .n(n)
35861           .k(k)
35862           .cn_stride(7)
35863           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35864       }
35865     }
35866   }
35867 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)35868   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
35869     TEST_REQUIRES_X86_AVX;
35870     for (uint32_t n = 8; n <= 12; n += 4) {
35871       for (size_t k = 1; k <= 40; k += 9) {
35872         for (uint32_t m = 1; m <= 1; m++) {
35873           GemmMicrokernelTester()
35874             .mr(1)
35875             .nr(4)
35876             .kr(8)
35877             .sr(1)
35878             .m(m)
35879             .n(n)
35880             .k(k)
35881             .iterations(1)
35882             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35883         }
35884       }
35885     }
35886   }
35887 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel)35888   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
35889     TEST_REQUIRES_X86_AVX;
35890     for (size_t k = 1; k <= 40; k += 9) {
35891       GemmMicrokernelTester()
35892         .mr(1)
35893         .nr(4)
35894         .kr(8)
35895         .sr(1)
35896         .m(1)
35897         .n(4)
35898         .k(k)
35899         .ks(3)
35900         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35901     }
35902   }
35903 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel_subtile)35904   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
35905     TEST_REQUIRES_X86_AVX;
35906     for (size_t k = 1; k <= 40; k += 9) {
35907       for (uint32_t n = 1; n <= 4; n++) {
35908         for (uint32_t m = 1; m <= 1; m++) {
35909           GemmMicrokernelTester()
35910             .mr(1)
35911             .nr(4)
35912             .kr(8)
35913             .sr(1)
35914             .m(m)
35915             .n(n)
35916             .k(k)
35917             .ks(3)
35918             .iterations(1)
35919             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35920         }
35921       }
35922     }
35923   }
35924 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_small_kernel)35925   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
35926     TEST_REQUIRES_X86_AVX;
35927     for (uint32_t n = 5; n < 8; n++) {
35928       for (size_t k = 1; k <= 40; k += 9) {
35929         GemmMicrokernelTester()
35930           .mr(1)
35931           .nr(4)
35932           .kr(8)
35933           .sr(1)
35934           .m(1)
35935           .n(n)
35936           .k(k)
35937           .ks(3)
35938           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35939       }
35940     }
35941   }
35942 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_small_kernel)35943   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
35944     TEST_REQUIRES_X86_AVX;
35945     for (uint32_t n = 8; n <= 12; n += 4) {
35946       for (size_t k = 1; k <= 40; k += 9) {
35947         GemmMicrokernelTester()
35948           .mr(1)
35949           .nr(4)
35950           .kr(8)
35951           .sr(1)
35952           .m(1)
35953           .n(n)
35954           .k(k)
35955           .ks(3)
35956           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35957       }
35958     }
35959   }
35960 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)35961   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
35962     TEST_REQUIRES_X86_AVX;
35963     for (size_t k = 1; k <= 40; k += 9) {
35964       for (uint32_t n = 1; n <= 4; n++) {
35965         for (uint32_t m = 1; m <= 1; m++) {
35966           GemmMicrokernelTester()
35967             .mr(1)
35968             .nr(4)
35969             .kr(8)
35970             .sr(1)
35971             .m(m)
35972             .n(n)
35973             .k(k)
35974             .cm_stride(7)
35975             .iterations(1)
35976             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35977         }
35978       }
35979     }
35980   }
35981 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,a_offset)35982   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
35983     TEST_REQUIRES_X86_AVX;
35984     for (size_t k = 1; k <= 40; k += 9) {
35985       GemmMicrokernelTester()
35986         .mr(1)
35987         .nr(4)
35988         .kr(8)
35989         .sr(1)
35990         .m(1)
35991         .n(4)
35992         .k(k)
35993         .ks(3)
35994         .a_offset(43)
35995         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35996     }
35997   }
35998 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,zero)35999   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
36000     TEST_REQUIRES_X86_AVX;
36001     for (size_t k = 1; k <= 40; k += 9) {
36002       for (uint32_t mz = 0; mz < 1; mz++) {
36003         GemmMicrokernelTester()
36004           .mr(1)
36005           .nr(4)
36006           .kr(8)
36007           .sr(1)
36008           .m(1)
36009           .n(4)
36010           .k(k)
36011           .ks(3)
36012           .a_offset(43)
36013           .zero_index(mz)
36014           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36015       }
36016     }
36017   }
36018 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)36019   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
36020     TEST_REQUIRES_X86_AVX;
36021     GemmMicrokernelTester()
36022       .mr(1)
36023       .nr(4)
36024       .kr(8)
36025       .sr(1)
36026       .m(1)
36027       .n(4)
36028       .k(8)
36029       .qmin(128)
36030       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36031   }
36032 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)36033   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
36034     TEST_REQUIRES_X86_AVX;
36035     GemmMicrokernelTester()
36036       .mr(1)
36037       .nr(4)
36038       .kr(8)
36039       .sr(1)
36040       .m(1)
36041       .n(4)
36042       .k(8)
36043       .qmax(128)
36044       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36045   }
36046 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)36047   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
36048     TEST_REQUIRES_X86_AVX;
36049     GemmMicrokernelTester()
36050       .mr(1)
36051       .nr(4)
36052       .kr(8)
36053       .sr(1)
36054       .m(1)
36055       .n(4)
36056       .k(8)
36057       .cm_stride(7)
36058       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36059   }
36060 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36061 
36062 
36063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)36064   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
36065     TEST_REQUIRES_X86_AVX;
36066     GemmMicrokernelTester()
36067       .mr(2)
36068       .nr(4)
36069       .kr(8)
36070       .sr(1)
36071       .m(2)
36072       .n(4)
36073       .k(8)
36074       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36075   }
36076 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)36077   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
36078     TEST_REQUIRES_X86_AVX;
36079     GemmMicrokernelTester()
36080       .mr(2)
36081       .nr(4)
36082       .kr(8)
36083       .sr(1)
36084       .m(2)
36085       .n(4)
36086       .k(8)
36087       .cn_stride(7)
36088       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36089   }
36090 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)36091   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
36092     TEST_REQUIRES_X86_AVX;
36093     for (uint32_t n = 1; n <= 4; n++) {
36094       for (uint32_t m = 1; m <= 2; m++) {
36095         GemmMicrokernelTester()
36096           .mr(2)
36097           .nr(4)
36098           .kr(8)
36099           .sr(1)
36100           .m(m)
36101           .n(n)
36102           .k(8)
36103           .iterations(1)
36104           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36105       }
36106     }
36107   }
36108 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)36109   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
36110     TEST_REQUIRES_X86_AVX;
36111     for (uint32_t m = 1; m <= 2; m++) {
36112       GemmMicrokernelTester()
36113         .mr(2)
36114         .nr(4)
36115         .kr(8)
36116         .sr(1)
36117         .m(m)
36118         .n(4)
36119         .k(8)
36120         .iterations(1)
36121         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36122     }
36123   }
36124 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)36125   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
36126     TEST_REQUIRES_X86_AVX;
36127     for (uint32_t n = 1; n <= 4; n++) {
36128       GemmMicrokernelTester()
36129         .mr(2)
36130         .nr(4)
36131         .kr(8)
36132         .sr(1)
36133         .m(2)
36134         .n(n)
36135         .k(8)
36136         .iterations(1)
36137         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36138     }
36139   }
36140 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)36141   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
36142     TEST_REQUIRES_X86_AVX;
36143     for (size_t k = 1; k < 8; k++) {
36144       GemmMicrokernelTester()
36145         .mr(2)
36146         .nr(4)
36147         .kr(8)
36148         .sr(1)
36149         .m(2)
36150         .n(4)
36151         .k(k)
36152         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36153     }
36154   }
36155 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)36156   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
36157     TEST_REQUIRES_X86_AVX;
36158     for (size_t k = 1; k < 8; k++) {
36159       for (uint32_t n = 1; n <= 4; n++) {
36160         for (uint32_t m = 1; m <= 2; m++) {
36161           GemmMicrokernelTester()
36162             .mr(2)
36163             .nr(4)
36164             .kr(8)
36165             .sr(1)
36166             .m(m)
36167             .n(n)
36168             .k(k)
36169             .iterations(1)
36170             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36171         }
36172       }
36173     }
36174   }
36175 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)36176   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
36177     TEST_REQUIRES_X86_AVX;
36178     for (size_t k = 9; k < 16; k++) {
36179       GemmMicrokernelTester()
36180         .mr(2)
36181         .nr(4)
36182         .kr(8)
36183         .sr(1)
36184         .m(2)
36185         .n(4)
36186         .k(k)
36187         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36188     }
36189   }
36190 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)36191   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
36192     TEST_REQUIRES_X86_AVX;
36193     for (size_t k = 9; k < 16; k++) {
36194       for (uint32_t n = 1; n <= 4; n++) {
36195         for (uint32_t m = 1; m <= 2; m++) {
36196           GemmMicrokernelTester()
36197             .mr(2)
36198             .nr(4)
36199             .kr(8)
36200             .sr(1)
36201             .m(m)
36202             .n(n)
36203             .k(k)
36204             .iterations(1)
36205             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36206         }
36207       }
36208     }
36209   }
36210 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)36211   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
36212     TEST_REQUIRES_X86_AVX;
36213     for (size_t k = 16; k <= 80; k += 8) {
36214       GemmMicrokernelTester()
36215         .mr(2)
36216         .nr(4)
36217         .kr(8)
36218         .sr(1)
36219         .m(2)
36220         .n(4)
36221         .k(k)
36222         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36223     }
36224   }
36225 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)36226   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
36227     TEST_REQUIRES_X86_AVX;
36228     for (size_t k = 16; k <= 80; k += 8) {
36229       for (uint32_t n = 1; n <= 4; n++) {
36230         for (uint32_t m = 1; m <= 2; m++) {
36231           GemmMicrokernelTester()
36232             .mr(2)
36233             .nr(4)
36234             .kr(8)
36235             .sr(1)
36236             .m(m)
36237             .n(n)
36238             .k(k)
36239             .iterations(1)
36240             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36241         }
36242       }
36243     }
36244   }
36245 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)36246   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
36247     TEST_REQUIRES_X86_AVX;
36248     for (uint32_t n = 5; n < 8; n++) {
36249       for (size_t k = 1; k <= 40; k += 9) {
36250         GemmMicrokernelTester()
36251           .mr(2)
36252           .nr(4)
36253           .kr(8)
36254           .sr(1)
36255           .m(2)
36256           .n(n)
36257           .k(k)
36258           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36259       }
36260     }
36261   }
36262 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)36263   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
36264     TEST_REQUIRES_X86_AVX;
36265     for (uint32_t n = 5; n < 8; n++) {
36266       for (size_t k = 1; k <= 40; k += 9) {
36267         GemmMicrokernelTester()
36268           .mr(2)
36269           .nr(4)
36270           .kr(8)
36271           .sr(1)
36272           .m(2)
36273           .n(n)
36274           .k(k)
36275           .cn_stride(7)
36276           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36277       }
36278     }
36279   }
36280 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)36281   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
36282     TEST_REQUIRES_X86_AVX;
36283     for (uint32_t n = 5; n < 8; n++) {
36284       for (size_t k = 1; k <= 40; k += 9) {
36285         for (uint32_t m = 1; m <= 2; m++) {
36286           GemmMicrokernelTester()
36287             .mr(2)
36288             .nr(4)
36289             .kr(8)
36290             .sr(1)
36291             .m(m)
36292             .n(n)
36293             .k(k)
36294             .iterations(1)
36295             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36296         }
36297       }
36298     }
36299   }
36300 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)36301   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
36302     TEST_REQUIRES_X86_AVX;
36303     for (uint32_t n = 8; n <= 12; n += 4) {
36304       for (size_t k = 1; k <= 40; k += 9) {
36305         GemmMicrokernelTester()
36306           .mr(2)
36307           .nr(4)
36308           .kr(8)
36309           .sr(1)
36310           .m(2)
36311           .n(n)
36312           .k(k)
36313           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36314       }
36315     }
36316   }
36317 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)36318   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
36319     TEST_REQUIRES_X86_AVX;
36320     for (uint32_t n = 8; n <= 12; n += 4) {
36321       for (size_t k = 1; k <= 40; k += 9) {
36322         GemmMicrokernelTester()
36323           .mr(2)
36324           .nr(4)
36325           .kr(8)
36326           .sr(1)
36327           .m(2)
36328           .n(n)
36329           .k(k)
36330           .cn_stride(7)
36331           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36332       }
36333     }
36334   }
36335 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)36336   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
36337     TEST_REQUIRES_X86_AVX;
36338     for (uint32_t n = 8; n <= 12; n += 4) {
36339       for (size_t k = 1; k <= 40; k += 9) {
36340         for (uint32_t m = 1; m <= 2; m++) {
36341           GemmMicrokernelTester()
36342             .mr(2)
36343             .nr(4)
36344             .kr(8)
36345             .sr(1)
36346             .m(m)
36347             .n(n)
36348             .k(k)
36349             .iterations(1)
36350             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36351         }
36352       }
36353     }
36354   }
36355 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel)36356   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
36357     TEST_REQUIRES_X86_AVX;
36358     for (size_t k = 1; k <= 40; k += 9) {
36359       GemmMicrokernelTester()
36360         .mr(2)
36361         .nr(4)
36362         .kr(8)
36363         .sr(1)
36364         .m(2)
36365         .n(4)
36366         .k(k)
36367         .ks(3)
36368         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36369     }
36370   }
36371 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel_subtile)36372   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
36373     TEST_REQUIRES_X86_AVX;
36374     for (size_t k = 1; k <= 40; k += 9) {
36375       for (uint32_t n = 1; n <= 4; n++) {
36376         for (uint32_t m = 1; m <= 2; m++) {
36377           GemmMicrokernelTester()
36378             .mr(2)
36379             .nr(4)
36380             .kr(8)
36381             .sr(1)
36382             .m(m)
36383             .n(n)
36384             .k(k)
36385             .ks(3)
36386             .iterations(1)
36387             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36388         }
36389       }
36390     }
36391   }
36392 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_small_kernel)36393   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
36394     TEST_REQUIRES_X86_AVX;
36395     for (uint32_t n = 5; n < 8; n++) {
36396       for (size_t k = 1; k <= 40; k += 9) {
36397         GemmMicrokernelTester()
36398           .mr(2)
36399           .nr(4)
36400           .kr(8)
36401           .sr(1)
36402           .m(2)
36403           .n(n)
36404           .k(k)
36405           .ks(3)
36406           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36407       }
36408     }
36409   }
36410 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_small_kernel)36411   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
36412     TEST_REQUIRES_X86_AVX;
36413     for (uint32_t n = 8; n <= 12; n += 4) {
36414       for (size_t k = 1; k <= 40; k += 9) {
36415         GemmMicrokernelTester()
36416           .mr(2)
36417           .nr(4)
36418           .kr(8)
36419           .sr(1)
36420           .m(2)
36421           .n(n)
36422           .k(k)
36423           .ks(3)
36424           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36425       }
36426     }
36427   }
36428 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)36429   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
36430     TEST_REQUIRES_X86_AVX;
36431     for (size_t k = 1; k <= 40; k += 9) {
36432       for (uint32_t n = 1; n <= 4; n++) {
36433         for (uint32_t m = 1; m <= 2; m++) {
36434           GemmMicrokernelTester()
36435             .mr(2)
36436             .nr(4)
36437             .kr(8)
36438             .sr(1)
36439             .m(m)
36440             .n(n)
36441             .k(k)
36442             .cm_stride(7)
36443             .iterations(1)
36444             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36445         }
36446       }
36447     }
36448   }
36449 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,a_offset)36450   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
36451     TEST_REQUIRES_X86_AVX;
36452     for (size_t k = 1; k <= 40; k += 9) {
36453       GemmMicrokernelTester()
36454         .mr(2)
36455         .nr(4)
36456         .kr(8)
36457         .sr(1)
36458         .m(2)
36459         .n(4)
36460         .k(k)
36461         .ks(3)
36462         .a_offset(83)
36463         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36464     }
36465   }
36466 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,zero)36467   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
36468     TEST_REQUIRES_X86_AVX;
36469     for (size_t k = 1; k <= 40; k += 9) {
36470       for (uint32_t mz = 0; mz < 2; mz++) {
36471         GemmMicrokernelTester()
36472           .mr(2)
36473           .nr(4)
36474           .kr(8)
36475           .sr(1)
36476           .m(2)
36477           .n(4)
36478           .k(k)
36479           .ks(3)
36480           .a_offset(83)
36481           .zero_index(mz)
36482           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36483       }
36484     }
36485   }
36486 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)36487   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
36488     TEST_REQUIRES_X86_AVX;
36489     GemmMicrokernelTester()
36490       .mr(2)
36491       .nr(4)
36492       .kr(8)
36493       .sr(1)
36494       .m(2)
36495       .n(4)
36496       .k(8)
36497       .qmin(128)
36498       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36499   }
36500 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)36501   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
36502     TEST_REQUIRES_X86_AVX;
36503     GemmMicrokernelTester()
36504       .mr(2)
36505       .nr(4)
36506       .kr(8)
36507       .sr(1)
36508       .m(2)
36509       .n(4)
36510       .k(8)
36511       .qmax(128)
36512       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36513   }
36514 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)36515   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
36516     TEST_REQUIRES_X86_AVX;
36517     GemmMicrokernelTester()
36518       .mr(2)
36519       .nr(4)
36520       .kr(8)
36521       .sr(1)
36522       .m(2)
36523       .n(4)
36524       .k(8)
36525       .cm_stride(7)
36526       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36527   }
36528 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36529 
36530 
36531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)36532   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
36533     TEST_REQUIRES_X86_XOP;
36534     GemmMicrokernelTester()
36535       .mr(3)
36536       .nr(4)
36537       .kr(8)
36538       .sr(1)
36539       .m(3)
36540       .n(4)
36541       .k(8)
36542       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36543   }
36544 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)36545   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
36546     TEST_REQUIRES_X86_XOP;
36547     GemmMicrokernelTester()
36548       .mr(3)
36549       .nr(4)
36550       .kr(8)
36551       .sr(1)
36552       .m(3)
36553       .n(4)
36554       .k(8)
36555       .cn_stride(7)
36556       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36557   }
36558 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)36559   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
36560     TEST_REQUIRES_X86_XOP;
36561     for (uint32_t n = 1; n <= 4; n++) {
36562       for (uint32_t m = 1; m <= 3; m++) {
36563         GemmMicrokernelTester()
36564           .mr(3)
36565           .nr(4)
36566           .kr(8)
36567           .sr(1)
36568           .m(m)
36569           .n(n)
36570           .k(8)
36571           .iterations(1)
36572           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36573       }
36574     }
36575   }
36576 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)36577   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
36578     TEST_REQUIRES_X86_XOP;
36579     for (uint32_t m = 1; m <= 3; m++) {
36580       GemmMicrokernelTester()
36581         .mr(3)
36582         .nr(4)
36583         .kr(8)
36584         .sr(1)
36585         .m(m)
36586         .n(4)
36587         .k(8)
36588         .iterations(1)
36589         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36590     }
36591   }
36592 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)36593   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
36594     TEST_REQUIRES_X86_XOP;
36595     for (uint32_t n = 1; n <= 4; n++) {
36596       GemmMicrokernelTester()
36597         .mr(3)
36598         .nr(4)
36599         .kr(8)
36600         .sr(1)
36601         .m(3)
36602         .n(n)
36603         .k(8)
36604         .iterations(1)
36605         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36606     }
36607   }
36608 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)36609   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
36610     TEST_REQUIRES_X86_XOP;
36611     for (size_t k = 1; k < 8; k++) {
36612       GemmMicrokernelTester()
36613         .mr(3)
36614         .nr(4)
36615         .kr(8)
36616         .sr(1)
36617         .m(3)
36618         .n(4)
36619         .k(k)
36620         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36621     }
36622   }
36623 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)36624   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
36625     TEST_REQUIRES_X86_XOP;
36626     for (size_t k = 1; k < 8; k++) {
36627       for (uint32_t n = 1; n <= 4; n++) {
36628         for (uint32_t m = 1; m <= 3; m++) {
36629           GemmMicrokernelTester()
36630             .mr(3)
36631             .nr(4)
36632             .kr(8)
36633             .sr(1)
36634             .m(m)
36635             .n(n)
36636             .k(k)
36637             .iterations(1)
36638             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36639         }
36640       }
36641     }
36642   }
36643 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)36644   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
36645     TEST_REQUIRES_X86_XOP;
36646     for (size_t k = 9; k < 16; k++) {
36647       GemmMicrokernelTester()
36648         .mr(3)
36649         .nr(4)
36650         .kr(8)
36651         .sr(1)
36652         .m(3)
36653         .n(4)
36654         .k(k)
36655         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36656     }
36657   }
36658 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)36659   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
36660     TEST_REQUIRES_X86_XOP;
36661     for (size_t k = 9; k < 16; k++) {
36662       for (uint32_t n = 1; n <= 4; n++) {
36663         for (uint32_t m = 1; m <= 3; m++) {
36664           GemmMicrokernelTester()
36665             .mr(3)
36666             .nr(4)
36667             .kr(8)
36668             .sr(1)
36669             .m(m)
36670             .n(n)
36671             .k(k)
36672             .iterations(1)
36673             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36674         }
36675       }
36676     }
36677   }
36678 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)36679   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
36680     TEST_REQUIRES_X86_XOP;
36681     for (size_t k = 16; k <= 80; k += 8) {
36682       GemmMicrokernelTester()
36683         .mr(3)
36684         .nr(4)
36685         .kr(8)
36686         .sr(1)
36687         .m(3)
36688         .n(4)
36689         .k(k)
36690         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36691     }
36692   }
36693 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)36694   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
36695     TEST_REQUIRES_X86_XOP;
36696     for (size_t k = 16; k <= 80; k += 8) {
36697       for (uint32_t n = 1; n <= 4; n++) {
36698         for (uint32_t m = 1; m <= 3; m++) {
36699           GemmMicrokernelTester()
36700             .mr(3)
36701             .nr(4)
36702             .kr(8)
36703             .sr(1)
36704             .m(m)
36705             .n(n)
36706             .k(k)
36707             .iterations(1)
36708             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36709         }
36710       }
36711     }
36712   }
36713 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)36714   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
36715     TEST_REQUIRES_X86_XOP;
36716     for (uint32_t n = 5; n < 8; n++) {
36717       for (size_t k = 1; k <= 40; k += 9) {
36718         GemmMicrokernelTester()
36719           .mr(3)
36720           .nr(4)
36721           .kr(8)
36722           .sr(1)
36723           .m(3)
36724           .n(n)
36725           .k(k)
36726           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36727       }
36728     }
36729   }
36730 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)36731   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
36732     TEST_REQUIRES_X86_XOP;
36733     for (uint32_t n = 5; n < 8; n++) {
36734       for (size_t k = 1; k <= 40; k += 9) {
36735         GemmMicrokernelTester()
36736           .mr(3)
36737           .nr(4)
36738           .kr(8)
36739           .sr(1)
36740           .m(3)
36741           .n(n)
36742           .k(k)
36743           .cn_stride(7)
36744           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36745       }
36746     }
36747   }
36748 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)36749   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
36750     TEST_REQUIRES_X86_XOP;
36751     for (uint32_t n = 5; n < 8; n++) {
36752       for (size_t k = 1; k <= 40; k += 9) {
36753         for (uint32_t m = 1; m <= 3; m++) {
36754           GemmMicrokernelTester()
36755             .mr(3)
36756             .nr(4)
36757             .kr(8)
36758             .sr(1)
36759             .m(m)
36760             .n(n)
36761             .k(k)
36762             .iterations(1)
36763             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36764         }
36765       }
36766     }
36767   }
36768 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)36769   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
36770     TEST_REQUIRES_X86_XOP;
36771     for (uint32_t n = 8; n <= 12; n += 4) {
36772       for (size_t k = 1; k <= 40; k += 9) {
36773         GemmMicrokernelTester()
36774           .mr(3)
36775           .nr(4)
36776           .kr(8)
36777           .sr(1)
36778           .m(3)
36779           .n(n)
36780           .k(k)
36781           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36782       }
36783     }
36784   }
36785 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)36786   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
36787     TEST_REQUIRES_X86_XOP;
36788     for (uint32_t n = 8; n <= 12; n += 4) {
36789       for (size_t k = 1; k <= 40; k += 9) {
36790         GemmMicrokernelTester()
36791           .mr(3)
36792           .nr(4)
36793           .kr(8)
36794           .sr(1)
36795           .m(3)
36796           .n(n)
36797           .k(k)
36798           .cn_stride(7)
36799           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36800       }
36801     }
36802   }
36803 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)36804   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
36805     TEST_REQUIRES_X86_XOP;
36806     for (uint32_t n = 8; n <= 12; n += 4) {
36807       for (size_t k = 1; k <= 40; k += 9) {
36808         for (uint32_t m = 1; m <= 3; m++) {
36809           GemmMicrokernelTester()
36810             .mr(3)
36811             .nr(4)
36812             .kr(8)
36813             .sr(1)
36814             .m(m)
36815             .n(n)
36816             .k(k)
36817             .iterations(1)
36818             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36819         }
36820       }
36821     }
36822   }
36823 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel)36824   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
36825     TEST_REQUIRES_X86_XOP;
36826     for (size_t k = 1; k <= 40; k += 9) {
36827       GemmMicrokernelTester()
36828         .mr(3)
36829         .nr(4)
36830         .kr(8)
36831         .sr(1)
36832         .m(3)
36833         .n(4)
36834         .k(k)
36835         .ks(3)
36836         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36837     }
36838   }
36839 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel_subtile)36840   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
36841     TEST_REQUIRES_X86_XOP;
36842     for (size_t k = 1; k <= 40; k += 9) {
36843       for (uint32_t n = 1; n <= 4; n++) {
36844         for (uint32_t m = 1; m <= 3; m++) {
36845           GemmMicrokernelTester()
36846             .mr(3)
36847             .nr(4)
36848             .kr(8)
36849             .sr(1)
36850             .m(m)
36851             .n(n)
36852             .k(k)
36853             .ks(3)
36854             .iterations(1)
36855             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36856         }
36857       }
36858     }
36859   }
36860 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_small_kernel)36861   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
36862     TEST_REQUIRES_X86_XOP;
36863     for (uint32_t n = 5; n < 8; n++) {
36864       for (size_t k = 1; k <= 40; k += 9) {
36865         GemmMicrokernelTester()
36866           .mr(3)
36867           .nr(4)
36868           .kr(8)
36869           .sr(1)
36870           .m(3)
36871           .n(n)
36872           .k(k)
36873           .ks(3)
36874           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36875       }
36876     }
36877   }
36878 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_small_kernel)36879   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
36880     TEST_REQUIRES_X86_XOP;
36881     for (uint32_t n = 8; n <= 12; n += 4) {
36882       for (size_t k = 1; k <= 40; k += 9) {
36883         GemmMicrokernelTester()
36884           .mr(3)
36885           .nr(4)
36886           .kr(8)
36887           .sr(1)
36888           .m(3)
36889           .n(n)
36890           .k(k)
36891           .ks(3)
36892           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36893       }
36894     }
36895   }
36896 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)36897   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
36898     TEST_REQUIRES_X86_XOP;
36899     for (size_t k = 1; k <= 40; k += 9) {
36900       for (uint32_t n = 1; n <= 4; n++) {
36901         for (uint32_t m = 1; m <= 3; m++) {
36902           GemmMicrokernelTester()
36903             .mr(3)
36904             .nr(4)
36905             .kr(8)
36906             .sr(1)
36907             .m(m)
36908             .n(n)
36909             .k(k)
36910             .cm_stride(7)
36911             .iterations(1)
36912             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36913         }
36914       }
36915     }
36916   }
36917 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,a_offset)36918   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
36919     TEST_REQUIRES_X86_XOP;
36920     for (size_t k = 1; k <= 40; k += 9) {
36921       GemmMicrokernelTester()
36922         .mr(3)
36923         .nr(4)
36924         .kr(8)
36925         .sr(1)
36926         .m(3)
36927         .n(4)
36928         .k(k)
36929         .ks(3)
36930         .a_offset(127)
36931         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36932     }
36933   }
36934 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,zero)36935   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
36936     TEST_REQUIRES_X86_XOP;
36937     for (size_t k = 1; k <= 40; k += 9) {
36938       for (uint32_t mz = 0; mz < 3; mz++) {
36939         GemmMicrokernelTester()
36940           .mr(3)
36941           .nr(4)
36942           .kr(8)
36943           .sr(1)
36944           .m(3)
36945           .n(4)
36946           .k(k)
36947           .ks(3)
36948           .a_offset(127)
36949           .zero_index(mz)
36950           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36951       }
36952     }
36953   }
36954 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)36955   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
36956     TEST_REQUIRES_X86_XOP;
36957     GemmMicrokernelTester()
36958       .mr(3)
36959       .nr(4)
36960       .kr(8)
36961       .sr(1)
36962       .m(3)
36963       .n(4)
36964       .k(8)
36965       .qmin(128)
36966       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36967   }
36968 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)36969   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
36970     TEST_REQUIRES_X86_XOP;
36971     GemmMicrokernelTester()
36972       .mr(3)
36973       .nr(4)
36974       .kr(8)
36975       .sr(1)
36976       .m(3)
36977       .n(4)
36978       .k(8)
36979       .qmax(128)
36980       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36981   }
36982 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)36983   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
36984     TEST_REQUIRES_X86_XOP;
36985     GemmMicrokernelTester()
36986       .mr(3)
36987       .nr(4)
36988       .kr(8)
36989       .sr(1)
36990       .m(3)
36991       .n(4)
36992       .k(8)
36993       .cm_stride(7)
36994       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36995   }
36996 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36997 
36998 
36999 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)37000   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
37001     TEST_REQUIRES_X86_AVX2;
37002     GemmMicrokernelTester()
37003       .mr(1)
37004       .nr(8)
37005       .kr(8)
37006       .sr(1)
37007       .m(1)
37008       .n(8)
37009       .k(8)
37010       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37011   }
37012 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)37013   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
37014     TEST_REQUIRES_X86_AVX2;
37015     GemmMicrokernelTester()
37016       .mr(1)
37017       .nr(8)
37018       .kr(8)
37019       .sr(1)
37020       .m(1)
37021       .n(8)
37022       .k(8)
37023       .cn_stride(11)
37024       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37025   }
37026 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)37027   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
37028     TEST_REQUIRES_X86_AVX2;
37029     for (uint32_t n = 1; n <= 8; n++) {
37030       for (uint32_t m = 1; m <= 1; m++) {
37031         GemmMicrokernelTester()
37032           .mr(1)
37033           .nr(8)
37034           .kr(8)
37035           .sr(1)
37036           .m(m)
37037           .n(n)
37038           .k(8)
37039           .iterations(1)
37040           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37041       }
37042     }
37043   }
37044 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)37045   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
37046     TEST_REQUIRES_X86_AVX2;
37047     for (uint32_t m = 1; m <= 1; m++) {
37048       GemmMicrokernelTester()
37049         .mr(1)
37050         .nr(8)
37051         .kr(8)
37052         .sr(1)
37053         .m(m)
37054         .n(8)
37055         .k(8)
37056         .iterations(1)
37057         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37058     }
37059   }
37060 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)37061   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
37062     TEST_REQUIRES_X86_AVX2;
37063     for (uint32_t n = 1; n <= 8; n++) {
37064       GemmMicrokernelTester()
37065         .mr(1)
37066         .nr(8)
37067         .kr(8)
37068         .sr(1)
37069         .m(1)
37070         .n(n)
37071         .k(8)
37072         .iterations(1)
37073         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37074     }
37075   }
37076 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)37077   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
37078     TEST_REQUIRES_X86_AVX2;
37079     for (size_t k = 1; k < 8; k++) {
37080       GemmMicrokernelTester()
37081         .mr(1)
37082         .nr(8)
37083         .kr(8)
37084         .sr(1)
37085         .m(1)
37086         .n(8)
37087         .k(k)
37088         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37089     }
37090   }
37091 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)37092   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
37093     TEST_REQUIRES_X86_AVX2;
37094     for (size_t k = 1; k < 8; k++) {
37095       for (uint32_t n = 1; n <= 8; n++) {
37096         for (uint32_t m = 1; m <= 1; m++) {
37097           GemmMicrokernelTester()
37098             .mr(1)
37099             .nr(8)
37100             .kr(8)
37101             .sr(1)
37102             .m(m)
37103             .n(n)
37104             .k(k)
37105             .iterations(1)
37106             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37107         }
37108       }
37109     }
37110   }
37111 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)37112   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
37113     TEST_REQUIRES_X86_AVX2;
37114     for (size_t k = 9; k < 16; k++) {
37115       GemmMicrokernelTester()
37116         .mr(1)
37117         .nr(8)
37118         .kr(8)
37119         .sr(1)
37120         .m(1)
37121         .n(8)
37122         .k(k)
37123         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37124     }
37125   }
37126 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)37127   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
37128     TEST_REQUIRES_X86_AVX2;
37129     for (size_t k = 9; k < 16; k++) {
37130       for (uint32_t n = 1; n <= 8; n++) {
37131         for (uint32_t m = 1; m <= 1; m++) {
37132           GemmMicrokernelTester()
37133             .mr(1)
37134             .nr(8)
37135             .kr(8)
37136             .sr(1)
37137             .m(m)
37138             .n(n)
37139             .k(k)
37140             .iterations(1)
37141             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37142         }
37143       }
37144     }
37145   }
37146 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)37147   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
37148     TEST_REQUIRES_X86_AVX2;
37149     for (size_t k = 16; k <= 80; k += 8) {
37150       GemmMicrokernelTester()
37151         .mr(1)
37152         .nr(8)
37153         .kr(8)
37154         .sr(1)
37155         .m(1)
37156         .n(8)
37157         .k(k)
37158         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37159     }
37160   }
37161 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)37162   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
37163     TEST_REQUIRES_X86_AVX2;
37164     for (size_t k = 16; k <= 80; k += 8) {
37165       for (uint32_t n = 1; n <= 8; n++) {
37166         for (uint32_t m = 1; m <= 1; m++) {
37167           GemmMicrokernelTester()
37168             .mr(1)
37169             .nr(8)
37170             .kr(8)
37171             .sr(1)
37172             .m(m)
37173             .n(n)
37174             .k(k)
37175             .iterations(1)
37176             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37177         }
37178       }
37179     }
37180   }
37181 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)37182   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
37183     TEST_REQUIRES_X86_AVX2;
37184     for (uint32_t n = 9; n < 16; n++) {
37185       for (size_t k = 1; k <= 40; k += 9) {
37186         GemmMicrokernelTester()
37187           .mr(1)
37188           .nr(8)
37189           .kr(8)
37190           .sr(1)
37191           .m(1)
37192           .n(n)
37193           .k(k)
37194           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37195       }
37196     }
37197   }
37198 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)37199   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
37200     TEST_REQUIRES_X86_AVX2;
37201     for (uint32_t n = 9; n < 16; n++) {
37202       for (size_t k = 1; k <= 40; k += 9) {
37203         GemmMicrokernelTester()
37204           .mr(1)
37205           .nr(8)
37206           .kr(8)
37207           .sr(1)
37208           .m(1)
37209           .n(n)
37210           .k(k)
37211           .cn_stride(11)
37212           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37213       }
37214     }
37215   }
37216 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)37217   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
37218     TEST_REQUIRES_X86_AVX2;
37219     for (uint32_t n = 9; n < 16; n++) {
37220       for (size_t k = 1; k <= 40; k += 9) {
37221         for (uint32_t m = 1; m <= 1; m++) {
37222           GemmMicrokernelTester()
37223             .mr(1)
37224             .nr(8)
37225             .kr(8)
37226             .sr(1)
37227             .m(m)
37228             .n(n)
37229             .k(k)
37230             .iterations(1)
37231             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37232         }
37233       }
37234     }
37235   }
37236 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)37237   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
37238     TEST_REQUIRES_X86_AVX2;
37239     for (uint32_t n = 16; n <= 24; n += 8) {
37240       for (size_t k = 1; k <= 40; k += 9) {
37241         GemmMicrokernelTester()
37242           .mr(1)
37243           .nr(8)
37244           .kr(8)
37245           .sr(1)
37246           .m(1)
37247           .n(n)
37248           .k(k)
37249           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37250       }
37251     }
37252   }
37253 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)37254   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
37255     TEST_REQUIRES_X86_AVX2;
37256     for (uint32_t n = 16; n <= 24; n += 8) {
37257       for (size_t k = 1; k <= 40; k += 9) {
37258         GemmMicrokernelTester()
37259           .mr(1)
37260           .nr(8)
37261           .kr(8)
37262           .sr(1)
37263           .m(1)
37264           .n(n)
37265           .k(k)
37266           .cn_stride(11)
37267           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37268       }
37269     }
37270   }
37271 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)37272   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
37273     TEST_REQUIRES_X86_AVX2;
37274     for (uint32_t n = 16; n <= 24; n += 8) {
37275       for (size_t k = 1; k <= 40; k += 9) {
37276         for (uint32_t m = 1; m <= 1; m++) {
37277           GemmMicrokernelTester()
37278             .mr(1)
37279             .nr(8)
37280             .kr(8)
37281             .sr(1)
37282             .m(m)
37283             .n(n)
37284             .k(k)
37285             .iterations(1)
37286             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37287         }
37288       }
37289     }
37290   }
37291 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)37292   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
37293     TEST_REQUIRES_X86_AVX2;
37294     for (size_t k = 1; k <= 40; k += 9) {
37295       GemmMicrokernelTester()
37296         .mr(1)
37297         .nr(8)
37298         .kr(8)
37299         .sr(1)
37300         .m(1)
37301         .n(8)
37302         .k(k)
37303         .ks(3)
37304         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37305     }
37306   }
37307 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)37308   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
37309     TEST_REQUIRES_X86_AVX2;
37310     for (size_t k = 1; k <= 40; k += 9) {
37311       for (uint32_t n = 1; n <= 8; n++) {
37312         for (uint32_t m = 1; m <= 1; m++) {
37313           GemmMicrokernelTester()
37314             .mr(1)
37315             .nr(8)
37316             .kr(8)
37317             .sr(1)
37318             .m(m)
37319             .n(n)
37320             .k(k)
37321             .ks(3)
37322             .iterations(1)
37323             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37324         }
37325       }
37326     }
37327   }
37328 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)37329   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
37330     TEST_REQUIRES_X86_AVX2;
37331     for (uint32_t n = 9; n < 16; n++) {
37332       for (size_t k = 1; k <= 40; k += 9) {
37333         GemmMicrokernelTester()
37334           .mr(1)
37335           .nr(8)
37336           .kr(8)
37337           .sr(1)
37338           .m(1)
37339           .n(n)
37340           .k(k)
37341           .ks(3)
37342           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37343       }
37344     }
37345   }
37346 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)37347   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
37348     TEST_REQUIRES_X86_AVX2;
37349     for (uint32_t n = 16; n <= 24; n += 8) {
37350       for (size_t k = 1; k <= 40; k += 9) {
37351         GemmMicrokernelTester()
37352           .mr(1)
37353           .nr(8)
37354           .kr(8)
37355           .sr(1)
37356           .m(1)
37357           .n(n)
37358           .k(k)
37359           .ks(3)
37360           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37361       }
37362     }
37363   }
37364 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)37365   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
37366     TEST_REQUIRES_X86_AVX2;
37367     for (size_t k = 1; k <= 40; k += 9) {
37368       for (uint32_t n = 1; n <= 8; n++) {
37369         for (uint32_t m = 1; m <= 1; m++) {
37370           GemmMicrokernelTester()
37371             .mr(1)
37372             .nr(8)
37373             .kr(8)
37374             .sr(1)
37375             .m(m)
37376             .n(n)
37377             .k(k)
37378             .cm_stride(11)
37379             .iterations(1)
37380             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37381         }
37382       }
37383     }
37384   }
37385 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)37386   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
37387     TEST_REQUIRES_X86_AVX2;
37388     for (size_t k = 1; k <= 40; k += 9) {
37389       GemmMicrokernelTester()
37390         .mr(1)
37391         .nr(8)
37392         .kr(8)
37393         .sr(1)
37394         .m(1)
37395         .n(8)
37396         .k(k)
37397         .ks(3)
37398         .a_offset(43)
37399         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37400     }
37401   }
37402 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)37403   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
37404     TEST_REQUIRES_X86_AVX2;
37405     for (size_t k = 1; k <= 40; k += 9) {
37406       for (uint32_t mz = 0; mz < 1; mz++) {
37407         GemmMicrokernelTester()
37408           .mr(1)
37409           .nr(8)
37410           .kr(8)
37411           .sr(1)
37412           .m(1)
37413           .n(8)
37414           .k(k)
37415           .ks(3)
37416           .a_offset(43)
37417           .zero_index(mz)
37418           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37419       }
37420     }
37421   }
37422 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)37423   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
37424     TEST_REQUIRES_X86_AVX2;
37425     GemmMicrokernelTester()
37426       .mr(1)
37427       .nr(8)
37428       .kr(8)
37429       .sr(1)
37430       .m(1)
37431       .n(8)
37432       .k(8)
37433       .qmin(128)
37434       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37435   }
37436 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)37437   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
37438     TEST_REQUIRES_X86_AVX2;
37439     GemmMicrokernelTester()
37440       .mr(1)
37441       .nr(8)
37442       .kr(8)
37443       .sr(1)
37444       .m(1)
37445       .n(8)
37446       .k(8)
37447       .qmax(128)
37448       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37449   }
37450 
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)37451   TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
37452     TEST_REQUIRES_X86_AVX2;
37453     GemmMicrokernelTester()
37454       .mr(1)
37455       .nr(8)
37456       .kr(8)
37457       .sr(1)
37458       .m(1)
37459       .n(8)
37460       .k(8)
37461       .cm_stride(11)
37462       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37463   }
37464 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
37465 
37466 
37467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)37468   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
37469     TEST_REQUIRES_X86_AVX512SKX;
37470     GemmMicrokernelTester()
37471       .mr(3)
37472       .nr(16)
37473       .kr(8)
37474       .sr(1)
37475       .m(3)
37476       .n(16)
37477       .k(8)
37478       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37479   }
37480 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)37481   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
37482     TEST_REQUIRES_X86_AVX512SKX;
37483     GemmMicrokernelTester()
37484       .mr(3)
37485       .nr(16)
37486       .kr(8)
37487       .sr(1)
37488       .m(3)
37489       .n(16)
37490       .k(8)
37491       .cn_stride(19)
37492       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37493   }
37494 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)37495   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
37496     TEST_REQUIRES_X86_AVX512SKX;
37497     for (uint32_t n = 1; n <= 16; n++) {
37498       for (uint32_t m = 1; m <= 3; m++) {
37499         GemmMicrokernelTester()
37500           .mr(3)
37501           .nr(16)
37502           .kr(8)
37503           .sr(1)
37504           .m(m)
37505           .n(n)
37506           .k(8)
37507           .iterations(1)
37508           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37509       }
37510     }
37511   }
37512 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)37513   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
37514     TEST_REQUIRES_X86_AVX512SKX;
37515     for (uint32_t m = 1; m <= 3; m++) {
37516       GemmMicrokernelTester()
37517         .mr(3)
37518         .nr(16)
37519         .kr(8)
37520         .sr(1)
37521         .m(m)
37522         .n(16)
37523         .k(8)
37524         .iterations(1)
37525         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37526     }
37527   }
37528 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)37529   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
37530     TEST_REQUIRES_X86_AVX512SKX;
37531     for (uint32_t n = 1; n <= 16; n++) {
37532       GemmMicrokernelTester()
37533         .mr(3)
37534         .nr(16)
37535         .kr(8)
37536         .sr(1)
37537         .m(3)
37538         .n(n)
37539         .k(8)
37540         .iterations(1)
37541         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37542     }
37543   }
37544 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)37545   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
37546     TEST_REQUIRES_X86_AVX512SKX;
37547     for (size_t k = 1; k < 8; k++) {
37548       GemmMicrokernelTester()
37549         .mr(3)
37550         .nr(16)
37551         .kr(8)
37552         .sr(1)
37553         .m(3)
37554         .n(16)
37555         .k(k)
37556         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37557     }
37558   }
37559 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)37560   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
37561     TEST_REQUIRES_X86_AVX512SKX;
37562     for (size_t k = 1; k < 8; k++) {
37563       for (uint32_t n = 1; n <= 16; n++) {
37564         for (uint32_t m = 1; m <= 3; m++) {
37565           GemmMicrokernelTester()
37566             .mr(3)
37567             .nr(16)
37568             .kr(8)
37569             .sr(1)
37570             .m(m)
37571             .n(n)
37572             .k(k)
37573             .iterations(1)
37574             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37575         }
37576       }
37577     }
37578   }
37579 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)37580   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
37581     TEST_REQUIRES_X86_AVX512SKX;
37582     for (size_t k = 9; k < 16; k++) {
37583       GemmMicrokernelTester()
37584         .mr(3)
37585         .nr(16)
37586         .kr(8)
37587         .sr(1)
37588         .m(3)
37589         .n(16)
37590         .k(k)
37591         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37592     }
37593   }
37594 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)37595   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
37596     TEST_REQUIRES_X86_AVX512SKX;
37597     for (size_t k = 9; k < 16; k++) {
37598       for (uint32_t n = 1; n <= 16; n++) {
37599         for (uint32_t m = 1; m <= 3; m++) {
37600           GemmMicrokernelTester()
37601             .mr(3)
37602             .nr(16)
37603             .kr(8)
37604             .sr(1)
37605             .m(m)
37606             .n(n)
37607             .k(k)
37608             .iterations(1)
37609             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37610         }
37611       }
37612     }
37613   }
37614 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)37615   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
37616     TEST_REQUIRES_X86_AVX512SKX;
37617     for (size_t k = 16; k <= 80; k += 8) {
37618       GemmMicrokernelTester()
37619         .mr(3)
37620         .nr(16)
37621         .kr(8)
37622         .sr(1)
37623         .m(3)
37624         .n(16)
37625         .k(k)
37626         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37627     }
37628   }
37629 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)37630   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
37631     TEST_REQUIRES_X86_AVX512SKX;
37632     for (size_t k = 16; k <= 80; k += 8) {
37633       for (uint32_t n = 1; n <= 16; n++) {
37634         for (uint32_t m = 1; m <= 3; m++) {
37635           GemmMicrokernelTester()
37636             .mr(3)
37637             .nr(16)
37638             .kr(8)
37639             .sr(1)
37640             .m(m)
37641             .n(n)
37642             .k(k)
37643             .iterations(1)
37644             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37645         }
37646       }
37647     }
37648   }
37649 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)37650   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
37651     TEST_REQUIRES_X86_AVX512SKX;
37652     for (uint32_t n = 17; n < 32; n++) {
37653       for (size_t k = 1; k <= 40; k += 9) {
37654         GemmMicrokernelTester()
37655           .mr(3)
37656           .nr(16)
37657           .kr(8)
37658           .sr(1)
37659           .m(3)
37660           .n(n)
37661           .k(k)
37662           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37663       }
37664     }
37665   }
37666 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)37667   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
37668     TEST_REQUIRES_X86_AVX512SKX;
37669     for (uint32_t n = 17; n < 32; n++) {
37670       for (size_t k = 1; k <= 40; k += 9) {
37671         GemmMicrokernelTester()
37672           .mr(3)
37673           .nr(16)
37674           .kr(8)
37675           .sr(1)
37676           .m(3)
37677           .n(n)
37678           .k(k)
37679           .cn_stride(19)
37680           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37681       }
37682     }
37683   }
37684 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)37685   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
37686     TEST_REQUIRES_X86_AVX512SKX;
37687     for (uint32_t n = 17; n < 32; n++) {
37688       for (size_t k = 1; k <= 40; k += 9) {
37689         for (uint32_t m = 1; m <= 3; m++) {
37690           GemmMicrokernelTester()
37691             .mr(3)
37692             .nr(16)
37693             .kr(8)
37694             .sr(1)
37695             .m(m)
37696             .n(n)
37697             .k(k)
37698             .iterations(1)
37699             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37700         }
37701       }
37702     }
37703   }
37704 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)37705   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
37706     TEST_REQUIRES_X86_AVX512SKX;
37707     for (uint32_t n = 32; n <= 48; n += 16) {
37708       for (size_t k = 1; k <= 40; k += 9) {
37709         GemmMicrokernelTester()
37710           .mr(3)
37711           .nr(16)
37712           .kr(8)
37713           .sr(1)
37714           .m(3)
37715           .n(n)
37716           .k(k)
37717           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37718       }
37719     }
37720   }
37721 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)37722   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
37723     TEST_REQUIRES_X86_AVX512SKX;
37724     for (uint32_t n = 32; n <= 48; n += 16) {
37725       for (size_t k = 1; k <= 40; k += 9) {
37726         GemmMicrokernelTester()
37727           .mr(3)
37728           .nr(16)
37729           .kr(8)
37730           .sr(1)
37731           .m(3)
37732           .n(n)
37733           .k(k)
37734           .cn_stride(19)
37735           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37736       }
37737     }
37738   }
37739 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)37740   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
37741     TEST_REQUIRES_X86_AVX512SKX;
37742     for (uint32_t n = 32; n <= 48; n += 16) {
37743       for (size_t k = 1; k <= 40; k += 9) {
37744         for (uint32_t m = 1; m <= 3; m++) {
37745           GemmMicrokernelTester()
37746             .mr(3)
37747             .nr(16)
37748             .kr(8)
37749             .sr(1)
37750             .m(m)
37751             .n(n)
37752             .k(k)
37753             .iterations(1)
37754             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37755         }
37756       }
37757     }
37758   }
37759 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel)37760   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
37761     TEST_REQUIRES_X86_AVX512SKX;
37762     for (size_t k = 1; k <= 40; k += 9) {
37763       GemmMicrokernelTester()
37764         .mr(3)
37765         .nr(16)
37766         .kr(8)
37767         .sr(1)
37768         .m(3)
37769         .n(16)
37770         .k(k)
37771         .ks(3)
37772         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37773     }
37774   }
37775 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel_subtile)37776   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
37777     TEST_REQUIRES_X86_AVX512SKX;
37778     for (size_t k = 1; k <= 40; k += 9) {
37779       for (uint32_t n = 1; n <= 16; n++) {
37780         for (uint32_t m = 1; m <= 3; m++) {
37781           GemmMicrokernelTester()
37782             .mr(3)
37783             .nr(16)
37784             .kr(8)
37785             .sr(1)
37786             .m(m)
37787             .n(n)
37788             .k(k)
37789             .ks(3)
37790             .iterations(1)
37791             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37792         }
37793       }
37794     }
37795   }
37796 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_small_kernel)37797   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
37798     TEST_REQUIRES_X86_AVX512SKX;
37799     for (uint32_t n = 17; n < 32; n++) {
37800       for (size_t k = 1; k <= 40; k += 9) {
37801         GemmMicrokernelTester()
37802           .mr(3)
37803           .nr(16)
37804           .kr(8)
37805           .sr(1)
37806           .m(3)
37807           .n(n)
37808           .k(k)
37809           .ks(3)
37810           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37811       }
37812     }
37813   }
37814 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_small_kernel)37815   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
37816     TEST_REQUIRES_X86_AVX512SKX;
37817     for (uint32_t n = 32; n <= 48; n += 16) {
37818       for (size_t k = 1; k <= 40; k += 9) {
37819         GemmMicrokernelTester()
37820           .mr(3)
37821           .nr(16)
37822           .kr(8)
37823           .sr(1)
37824           .m(3)
37825           .n(n)
37826           .k(k)
37827           .ks(3)
37828           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37829       }
37830     }
37831   }
37832 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)37833   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
37834     TEST_REQUIRES_X86_AVX512SKX;
37835     for (size_t k = 1; k <= 40; k += 9) {
37836       for (uint32_t n = 1; n <= 16; n++) {
37837         for (uint32_t m = 1; m <= 3; m++) {
37838           GemmMicrokernelTester()
37839             .mr(3)
37840             .nr(16)
37841             .kr(8)
37842             .sr(1)
37843             .m(m)
37844             .n(n)
37845             .k(k)
37846             .cm_stride(19)
37847             .iterations(1)
37848             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37849         }
37850       }
37851     }
37852   }
37853 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,a_offset)37854   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
37855     TEST_REQUIRES_X86_AVX512SKX;
37856     for (size_t k = 1; k <= 40; k += 9) {
37857       GemmMicrokernelTester()
37858         .mr(3)
37859         .nr(16)
37860         .kr(8)
37861         .sr(1)
37862         .m(3)
37863         .n(16)
37864         .k(k)
37865         .ks(3)
37866         .a_offset(127)
37867         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37868     }
37869   }
37870 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,zero)37871   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
37872     TEST_REQUIRES_X86_AVX512SKX;
37873     for (size_t k = 1; k <= 40; k += 9) {
37874       for (uint32_t mz = 0; mz < 3; mz++) {
37875         GemmMicrokernelTester()
37876           .mr(3)
37877           .nr(16)
37878           .kr(8)
37879           .sr(1)
37880           .m(3)
37881           .n(16)
37882           .k(k)
37883           .ks(3)
37884           .a_offset(127)
37885           .zero_index(mz)
37886           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37887       }
37888     }
37889   }
37890 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)37891   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
37892     TEST_REQUIRES_X86_AVX512SKX;
37893     GemmMicrokernelTester()
37894       .mr(3)
37895       .nr(16)
37896       .kr(8)
37897       .sr(1)
37898       .m(3)
37899       .n(16)
37900       .k(8)
37901       .qmin(128)
37902       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37903   }
37904 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)37905   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
37906     TEST_REQUIRES_X86_AVX512SKX;
37907     GemmMicrokernelTester()
37908       .mr(3)
37909       .nr(16)
37910       .kr(8)
37911       .sr(1)
37912       .m(3)
37913       .n(16)
37914       .k(8)
37915       .qmax(128)
37916       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37917   }
37918 
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)37919   TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
37920     TEST_REQUIRES_X86_AVX512SKX;
37921     GemmMicrokernelTester()
37922       .mr(3)
37923       .nr(16)
37924       .kr(8)
37925       .sr(1)
37926       .m(3)
37927       .n(16)
37928       .k(8)
37929       .cm_stride(19)
37930       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37931   }
37932 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
37933 
37934 
37935 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)37936   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
37937     GemmMicrokernelTester()
37938       .mr(1)
37939       .nr(4)
37940       .kr(2)
37941       .sr(1)
37942       .m(1)
37943       .n(4)
37944       .k(8)
37945       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37946   }
37947 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)37948   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
37949     GemmMicrokernelTester()
37950       .mr(1)
37951       .nr(4)
37952       .kr(2)
37953       .sr(1)
37954       .m(1)
37955       .n(4)
37956       .k(8)
37957       .cn_stride(7)
37958       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37959   }
37960 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)37961   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
37962     for (uint32_t n = 1; n <= 4; n++) {
37963       for (uint32_t m = 1; m <= 1; m++) {
37964         GemmMicrokernelTester()
37965           .mr(1)
37966           .nr(4)
37967           .kr(2)
37968           .sr(1)
37969           .m(m)
37970           .n(n)
37971           .k(8)
37972           .iterations(1)
37973           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37974       }
37975     }
37976   }
37977 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)37978   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
37979     for (uint32_t m = 1; m <= 1; m++) {
37980       GemmMicrokernelTester()
37981         .mr(1)
37982         .nr(4)
37983         .kr(2)
37984         .sr(1)
37985         .m(m)
37986         .n(4)
37987         .k(8)
37988         .iterations(1)
37989         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37990     }
37991   }
37992 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)37993   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
37994     for (uint32_t n = 1; n <= 4; n++) {
37995       GemmMicrokernelTester()
37996         .mr(1)
37997         .nr(4)
37998         .kr(2)
37999         .sr(1)
38000         .m(1)
38001         .n(n)
38002         .k(8)
38003         .iterations(1)
38004         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38005     }
38006   }
38007 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)38008   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
38009     for (size_t k = 1; k < 8; k++) {
38010       GemmMicrokernelTester()
38011         .mr(1)
38012         .nr(4)
38013         .kr(2)
38014         .sr(1)
38015         .m(1)
38016         .n(4)
38017         .k(k)
38018         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38019     }
38020   }
38021 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)38022   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
38023     for (size_t k = 1; k < 8; k++) {
38024       for (uint32_t n = 1; n <= 4; n++) {
38025         for (uint32_t m = 1; m <= 1; m++) {
38026           GemmMicrokernelTester()
38027             .mr(1)
38028             .nr(4)
38029             .kr(2)
38030             .sr(1)
38031             .m(m)
38032             .n(n)
38033             .k(k)
38034             .iterations(1)
38035             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38036         }
38037       }
38038     }
38039   }
38040 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)38041   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
38042     for (size_t k = 9; k < 16; k++) {
38043       GemmMicrokernelTester()
38044         .mr(1)
38045         .nr(4)
38046         .kr(2)
38047         .sr(1)
38048         .m(1)
38049         .n(4)
38050         .k(k)
38051         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38052     }
38053   }
38054 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)38055   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
38056     for (size_t k = 9; k < 16; k++) {
38057       for (uint32_t n = 1; n <= 4; n++) {
38058         for (uint32_t m = 1; m <= 1; m++) {
38059           GemmMicrokernelTester()
38060             .mr(1)
38061             .nr(4)
38062             .kr(2)
38063             .sr(1)
38064             .m(m)
38065             .n(n)
38066             .k(k)
38067             .iterations(1)
38068             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38069         }
38070       }
38071     }
38072   }
38073 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)38074   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
38075     for (size_t k = 16; k <= 80; k += 8) {
38076       GemmMicrokernelTester()
38077         .mr(1)
38078         .nr(4)
38079         .kr(2)
38080         .sr(1)
38081         .m(1)
38082         .n(4)
38083         .k(k)
38084         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38085     }
38086   }
38087 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)38088   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38089     for (size_t k = 16; k <= 80; k += 8) {
38090       for (uint32_t n = 1; n <= 4; n++) {
38091         for (uint32_t m = 1; m <= 1; m++) {
38092           GemmMicrokernelTester()
38093             .mr(1)
38094             .nr(4)
38095             .kr(2)
38096             .sr(1)
38097             .m(m)
38098             .n(n)
38099             .k(k)
38100             .iterations(1)
38101             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38102         }
38103       }
38104     }
38105   }
38106 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)38107   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38108     for (uint32_t n = 5; n < 8; n++) {
38109       for (size_t k = 1; k <= 40; k += 9) {
38110         GemmMicrokernelTester()
38111           .mr(1)
38112           .nr(4)
38113           .kr(2)
38114           .sr(1)
38115           .m(1)
38116           .n(n)
38117           .k(k)
38118           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38119       }
38120     }
38121   }
38122 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)38123   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38124     for (uint32_t n = 5; n < 8; n++) {
38125       for (size_t k = 1; k <= 40; k += 9) {
38126         GemmMicrokernelTester()
38127           .mr(1)
38128           .nr(4)
38129           .kr(2)
38130           .sr(1)
38131           .m(1)
38132           .n(n)
38133           .k(k)
38134           .cn_stride(7)
38135           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38136       }
38137     }
38138   }
38139 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)38140   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38141     for (uint32_t n = 5; n < 8; n++) {
38142       for (size_t k = 1; k <= 40; k += 9) {
38143         for (uint32_t m = 1; m <= 1; m++) {
38144           GemmMicrokernelTester()
38145             .mr(1)
38146             .nr(4)
38147             .kr(2)
38148             .sr(1)
38149             .m(m)
38150             .n(n)
38151             .k(k)
38152             .iterations(1)
38153             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38154         }
38155       }
38156     }
38157   }
38158 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)38159   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
38160     for (uint32_t n = 8; n <= 12; n += 4) {
38161       for (size_t k = 1; k <= 40; k += 9) {
38162         GemmMicrokernelTester()
38163           .mr(1)
38164           .nr(4)
38165           .kr(2)
38166           .sr(1)
38167           .m(1)
38168           .n(n)
38169           .k(k)
38170           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38171       }
38172     }
38173   }
38174 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)38175   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
38176     for (uint32_t n = 8; n <= 12; n += 4) {
38177       for (size_t k = 1; k <= 40; k += 9) {
38178         GemmMicrokernelTester()
38179           .mr(1)
38180           .nr(4)
38181           .kr(2)
38182           .sr(1)
38183           .m(1)
38184           .n(n)
38185           .k(k)
38186           .cn_stride(7)
38187           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38188       }
38189     }
38190   }
38191 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)38192   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
38193     for (uint32_t n = 8; n <= 12; n += 4) {
38194       for (size_t k = 1; k <= 40; k += 9) {
38195         for (uint32_t m = 1; m <= 1; m++) {
38196           GemmMicrokernelTester()
38197             .mr(1)
38198             .nr(4)
38199             .kr(2)
38200             .sr(1)
38201             .m(m)
38202             .n(n)
38203             .k(k)
38204             .iterations(1)
38205             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38206         }
38207       }
38208     }
38209   }
38210 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)38211   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
38212     for (size_t k = 1; k <= 40; k += 9) {
38213       GemmMicrokernelTester()
38214         .mr(1)
38215         .nr(4)
38216         .kr(2)
38217         .sr(1)
38218         .m(1)
38219         .n(4)
38220         .k(k)
38221         .ks(3)
38222         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38223     }
38224   }
38225 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)38226   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
38227     for (size_t k = 1; k <= 40; k += 9) {
38228       for (uint32_t n = 1; n <= 4; n++) {
38229         for (uint32_t m = 1; m <= 1; m++) {
38230           GemmMicrokernelTester()
38231             .mr(1)
38232             .nr(4)
38233             .kr(2)
38234             .sr(1)
38235             .m(m)
38236             .n(n)
38237             .k(k)
38238             .ks(3)
38239             .iterations(1)
38240             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38241         }
38242       }
38243     }
38244   }
38245 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)38246   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
38247     for (uint32_t n = 5; n < 8; n++) {
38248       for (size_t k = 1; k <= 40; k += 9) {
38249         GemmMicrokernelTester()
38250           .mr(1)
38251           .nr(4)
38252           .kr(2)
38253           .sr(1)
38254           .m(1)
38255           .n(n)
38256           .k(k)
38257           .ks(3)
38258           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38259       }
38260     }
38261   }
38262 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)38263   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
38264     for (uint32_t n = 8; n <= 12; n += 4) {
38265       for (size_t k = 1; k <= 40; k += 9) {
38266         GemmMicrokernelTester()
38267           .mr(1)
38268           .nr(4)
38269           .kr(2)
38270           .sr(1)
38271           .m(1)
38272           .n(n)
38273           .k(k)
38274           .ks(3)
38275           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38276       }
38277     }
38278   }
38279 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)38280   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
38281     for (size_t k = 1; k <= 40; k += 9) {
38282       for (uint32_t n = 1; n <= 4; n++) {
38283         for (uint32_t m = 1; m <= 1; m++) {
38284           GemmMicrokernelTester()
38285             .mr(1)
38286             .nr(4)
38287             .kr(2)
38288             .sr(1)
38289             .m(m)
38290             .n(n)
38291             .k(k)
38292             .cm_stride(7)
38293             .iterations(1)
38294             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38295         }
38296       }
38297     }
38298   }
38299 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,a_offset)38300   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
38301     for (size_t k = 1; k <= 40; k += 9) {
38302       GemmMicrokernelTester()
38303         .mr(1)
38304         .nr(4)
38305         .kr(2)
38306         .sr(1)
38307         .m(1)
38308         .n(4)
38309         .k(k)
38310         .ks(3)
38311         .a_offset(43)
38312         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38313     }
38314   }
38315 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,zero)38316   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
38317     for (size_t k = 1; k <= 40; k += 9) {
38318       for (uint32_t mz = 0; mz < 1; mz++) {
38319         GemmMicrokernelTester()
38320           .mr(1)
38321           .nr(4)
38322           .kr(2)
38323           .sr(1)
38324           .m(1)
38325           .n(4)
38326           .k(k)
38327           .ks(3)
38328           .a_offset(43)
38329           .zero_index(mz)
38330           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38331       }
38332     }
38333   }
38334 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)38335   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
38336     GemmMicrokernelTester()
38337       .mr(1)
38338       .nr(4)
38339       .kr(2)
38340       .sr(1)
38341       .m(1)
38342       .n(4)
38343       .k(8)
38344       .qmin(128)
38345       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38346   }
38347 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)38348   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
38349     GemmMicrokernelTester()
38350       .mr(1)
38351       .nr(4)
38352       .kr(2)
38353       .sr(1)
38354       .m(1)
38355       .n(4)
38356       .k(8)
38357       .qmax(128)
38358       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38359   }
38360 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)38361   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
38362     GemmMicrokernelTester()
38363       .mr(1)
38364       .nr(4)
38365       .kr(2)
38366       .sr(1)
38367       .m(1)
38368       .n(4)
38369       .k(8)
38370       .cm_stride(7)
38371       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38372   }
38373 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38374 
38375 
38376 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)38377   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38378     GemmMicrokernelTester()
38379       .mr(1)
38380       .nr(4)
38381       .kr(2)
38382       .sr(1)
38383       .m(1)
38384       .n(4)
38385       .k(8)
38386       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38387   }
38388 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)38389   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
38390     GemmMicrokernelTester()
38391       .mr(1)
38392       .nr(4)
38393       .kr(2)
38394       .sr(1)
38395       .m(1)
38396       .n(4)
38397       .k(8)
38398       .cn_stride(7)
38399       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38400   }
38401 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)38402   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
38403     for (uint32_t n = 1; n <= 4; n++) {
38404       for (uint32_t m = 1; m <= 1; m++) {
38405         GemmMicrokernelTester()
38406           .mr(1)
38407           .nr(4)
38408           .kr(2)
38409           .sr(1)
38410           .m(m)
38411           .n(n)
38412           .k(8)
38413           .iterations(1)
38414           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38415       }
38416     }
38417   }
38418 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)38419   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38420     for (uint32_t m = 1; m <= 1; m++) {
38421       GemmMicrokernelTester()
38422         .mr(1)
38423         .nr(4)
38424         .kr(2)
38425         .sr(1)
38426         .m(m)
38427         .n(4)
38428         .k(8)
38429         .iterations(1)
38430         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38431     }
38432   }
38433 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)38434   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38435     for (uint32_t n = 1; n <= 4; n++) {
38436       GemmMicrokernelTester()
38437         .mr(1)
38438         .nr(4)
38439         .kr(2)
38440         .sr(1)
38441         .m(1)
38442         .n(n)
38443         .k(8)
38444         .iterations(1)
38445         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38446     }
38447   }
38448 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)38449   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38450     for (size_t k = 1; k < 8; k++) {
38451       GemmMicrokernelTester()
38452         .mr(1)
38453         .nr(4)
38454         .kr(2)
38455         .sr(1)
38456         .m(1)
38457         .n(4)
38458         .k(k)
38459         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38460     }
38461   }
38462 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)38463   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38464     for (size_t k = 1; k < 8; k++) {
38465       for (uint32_t n = 1; n <= 4; n++) {
38466         for (uint32_t m = 1; m <= 1; m++) {
38467           GemmMicrokernelTester()
38468             .mr(1)
38469             .nr(4)
38470             .kr(2)
38471             .sr(1)
38472             .m(m)
38473             .n(n)
38474             .k(k)
38475             .iterations(1)
38476             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38477         }
38478       }
38479     }
38480   }
38481 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)38482   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38483     for (size_t k = 9; k < 16; k++) {
38484       GemmMicrokernelTester()
38485         .mr(1)
38486         .nr(4)
38487         .kr(2)
38488         .sr(1)
38489         .m(1)
38490         .n(4)
38491         .k(k)
38492         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38493     }
38494   }
38495 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)38496   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38497     for (size_t k = 9; k < 16; k++) {
38498       for (uint32_t n = 1; n <= 4; n++) {
38499         for (uint32_t m = 1; m <= 1; m++) {
38500           GemmMicrokernelTester()
38501             .mr(1)
38502             .nr(4)
38503             .kr(2)
38504             .sr(1)
38505             .m(m)
38506             .n(n)
38507             .k(k)
38508             .iterations(1)
38509             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38510         }
38511       }
38512     }
38513   }
38514 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)38515   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
38516     for (size_t k = 16; k <= 80; k += 8) {
38517       GemmMicrokernelTester()
38518         .mr(1)
38519         .nr(4)
38520         .kr(2)
38521         .sr(1)
38522         .m(1)
38523         .n(4)
38524         .k(k)
38525         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38526     }
38527   }
38528 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)38529   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38530     for (size_t k = 16; k <= 80; k += 8) {
38531       for (uint32_t n = 1; n <= 4; n++) {
38532         for (uint32_t m = 1; m <= 1; m++) {
38533           GemmMicrokernelTester()
38534             .mr(1)
38535             .nr(4)
38536             .kr(2)
38537             .sr(1)
38538             .m(m)
38539             .n(n)
38540             .k(k)
38541             .iterations(1)
38542             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38543         }
38544       }
38545     }
38546   }
38547 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)38548   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38549     for (uint32_t n = 5; n < 8; n++) {
38550       for (size_t k = 1; k <= 40; k += 9) {
38551         GemmMicrokernelTester()
38552           .mr(1)
38553           .nr(4)
38554           .kr(2)
38555           .sr(1)
38556           .m(1)
38557           .n(n)
38558           .k(k)
38559           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38560       }
38561     }
38562   }
38563 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)38564   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
38565     for (uint32_t n = 5; n < 8; n++) {
38566       for (size_t k = 1; k <= 40; k += 9) {
38567         GemmMicrokernelTester()
38568           .mr(1)
38569           .nr(4)
38570           .kr(2)
38571           .sr(1)
38572           .m(1)
38573           .n(n)
38574           .k(k)
38575           .cn_stride(7)
38576           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38577       }
38578     }
38579   }
38580 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)38581   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
38582     for (uint32_t n = 5; n < 8; n++) {
38583       for (size_t k = 1; k <= 40; k += 9) {
38584         for (uint32_t m = 1; m <= 1; m++) {
38585           GemmMicrokernelTester()
38586             .mr(1)
38587             .nr(4)
38588             .kr(2)
38589             .sr(1)
38590             .m(m)
38591             .n(n)
38592             .k(k)
38593             .iterations(1)
38594             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38595         }
38596       }
38597     }
38598   }
38599 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)38600   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
38601     for (uint32_t n = 8; n <= 12; n += 4) {
38602       for (size_t k = 1; k <= 40; k += 9) {
38603         GemmMicrokernelTester()
38604           .mr(1)
38605           .nr(4)
38606           .kr(2)
38607           .sr(1)
38608           .m(1)
38609           .n(n)
38610           .k(k)
38611           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38612       }
38613     }
38614   }
38615 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)38616   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
38617     for (uint32_t n = 8; n <= 12; n += 4) {
38618       for (size_t k = 1; k <= 40; k += 9) {
38619         GemmMicrokernelTester()
38620           .mr(1)
38621           .nr(4)
38622           .kr(2)
38623           .sr(1)
38624           .m(1)
38625           .n(n)
38626           .k(k)
38627           .cn_stride(7)
38628           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38629       }
38630     }
38631   }
38632 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)38633   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
38634     for (uint32_t n = 8; n <= 12; n += 4) {
38635       for (size_t k = 1; k <= 40; k += 9) {
38636         for (uint32_t m = 1; m <= 1; m++) {
38637           GemmMicrokernelTester()
38638             .mr(1)
38639             .nr(4)
38640             .kr(2)
38641             .sr(1)
38642             .m(m)
38643             .n(n)
38644             .k(k)
38645             .iterations(1)
38646             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38647         }
38648       }
38649     }
38650   }
38651 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)38652   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
38653     for (size_t k = 1; k <= 40; k += 9) {
38654       GemmMicrokernelTester()
38655         .mr(1)
38656         .nr(4)
38657         .kr(2)
38658         .sr(1)
38659         .m(1)
38660         .n(4)
38661         .k(k)
38662         .ks(3)
38663         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38664     }
38665   }
38666 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)38667   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
38668     for (size_t k = 1; k <= 40; k += 9) {
38669       for (uint32_t n = 1; n <= 4; n++) {
38670         for (uint32_t m = 1; m <= 1; m++) {
38671           GemmMicrokernelTester()
38672             .mr(1)
38673             .nr(4)
38674             .kr(2)
38675             .sr(1)
38676             .m(m)
38677             .n(n)
38678             .k(k)
38679             .ks(3)
38680             .iterations(1)
38681             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38682         }
38683       }
38684     }
38685   }
38686 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)38687   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
38688     for (uint32_t n = 5; n < 8; n++) {
38689       for (size_t k = 1; k <= 40; k += 9) {
38690         GemmMicrokernelTester()
38691           .mr(1)
38692           .nr(4)
38693           .kr(2)
38694           .sr(1)
38695           .m(1)
38696           .n(n)
38697           .k(k)
38698           .ks(3)
38699           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38700       }
38701     }
38702   }
38703 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)38704   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
38705     for (uint32_t n = 8; n <= 12; n += 4) {
38706       for (size_t k = 1; k <= 40; k += 9) {
38707         GemmMicrokernelTester()
38708           .mr(1)
38709           .nr(4)
38710           .kr(2)
38711           .sr(1)
38712           .m(1)
38713           .n(n)
38714           .k(k)
38715           .ks(3)
38716           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38717       }
38718     }
38719   }
38720 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)38721   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
38722     for (size_t k = 1; k <= 40; k += 9) {
38723       for (uint32_t n = 1; n <= 4; n++) {
38724         for (uint32_t m = 1; m <= 1; m++) {
38725           GemmMicrokernelTester()
38726             .mr(1)
38727             .nr(4)
38728             .kr(2)
38729             .sr(1)
38730             .m(m)
38731             .n(n)
38732             .k(k)
38733             .cm_stride(7)
38734             .iterations(1)
38735             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38736         }
38737       }
38738     }
38739   }
38740 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,a_offset)38741   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
38742     for (size_t k = 1; k <= 40; k += 9) {
38743       GemmMicrokernelTester()
38744         .mr(1)
38745         .nr(4)
38746         .kr(2)
38747         .sr(1)
38748         .m(1)
38749         .n(4)
38750         .k(k)
38751         .ks(3)
38752         .a_offset(43)
38753         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38754     }
38755   }
38756 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,zero)38757   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
38758     for (size_t k = 1; k <= 40; k += 9) {
38759       for (uint32_t mz = 0; mz < 1; mz++) {
38760         GemmMicrokernelTester()
38761           .mr(1)
38762           .nr(4)
38763           .kr(2)
38764           .sr(1)
38765           .m(1)
38766           .n(4)
38767           .k(k)
38768           .ks(3)
38769           .a_offset(43)
38770           .zero_index(mz)
38771           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38772       }
38773     }
38774   }
38775 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)38776   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
38777     GemmMicrokernelTester()
38778       .mr(1)
38779       .nr(4)
38780       .kr(2)
38781       .sr(1)
38782       .m(1)
38783       .n(4)
38784       .k(8)
38785       .qmin(128)
38786       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38787   }
38788 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)38789   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
38790     GemmMicrokernelTester()
38791       .mr(1)
38792       .nr(4)
38793       .kr(2)
38794       .sr(1)
38795       .m(1)
38796       .n(4)
38797       .k(8)
38798       .qmax(128)
38799       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38800   }
38801 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)38802   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
38803     GemmMicrokernelTester()
38804       .mr(1)
38805       .nr(4)
38806       .kr(2)
38807       .sr(1)
38808       .m(1)
38809       .n(4)
38810       .k(8)
38811       .cm_stride(7)
38812       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38813   }
38814 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38815 
38816 
38817 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)38818   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38819     GemmMicrokernelTester()
38820       .mr(1)
38821       .nr(4)
38822       .kr(2)
38823       .sr(4)
38824       .m(1)
38825       .n(4)
38826       .k(8)
38827       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38828   }
38829 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)38830   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
38831     GemmMicrokernelTester()
38832       .mr(1)
38833       .nr(4)
38834       .kr(2)
38835       .sr(4)
38836       .m(1)
38837       .n(4)
38838       .k(8)
38839       .cn_stride(7)
38840       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38841   }
38842 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)38843   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
38844     for (uint32_t n = 1; n <= 4; n++) {
38845       for (uint32_t m = 1; m <= 1; m++) {
38846         GemmMicrokernelTester()
38847           .mr(1)
38848           .nr(4)
38849           .kr(2)
38850           .sr(4)
38851           .m(m)
38852           .n(n)
38853           .k(8)
38854           .iterations(1)
38855           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38856       }
38857     }
38858   }
38859 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)38860   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38861     for (uint32_t m = 1; m <= 1; m++) {
38862       GemmMicrokernelTester()
38863         .mr(1)
38864         .nr(4)
38865         .kr(2)
38866         .sr(4)
38867         .m(m)
38868         .n(4)
38869         .k(8)
38870         .iterations(1)
38871         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38872     }
38873   }
38874 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)38875   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38876     for (uint32_t n = 1; n <= 4; n++) {
38877       GemmMicrokernelTester()
38878         .mr(1)
38879         .nr(4)
38880         .kr(2)
38881         .sr(4)
38882         .m(1)
38883         .n(n)
38884         .k(8)
38885         .iterations(1)
38886         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38887     }
38888   }
38889 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)38890   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38891     for (size_t k = 1; k < 8; k++) {
38892       GemmMicrokernelTester()
38893         .mr(1)
38894         .nr(4)
38895         .kr(2)
38896         .sr(4)
38897         .m(1)
38898         .n(4)
38899         .k(k)
38900         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38901     }
38902   }
38903 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)38904   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38905     for (size_t k = 1; k < 8; k++) {
38906       for (uint32_t n = 1; n <= 4; n++) {
38907         for (uint32_t m = 1; m <= 1; m++) {
38908           GemmMicrokernelTester()
38909             .mr(1)
38910             .nr(4)
38911             .kr(2)
38912             .sr(4)
38913             .m(m)
38914             .n(n)
38915             .k(k)
38916             .iterations(1)
38917             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38918         }
38919       }
38920     }
38921   }
38922 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)38923   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38924     for (size_t k = 9; k < 16; k++) {
38925       GemmMicrokernelTester()
38926         .mr(1)
38927         .nr(4)
38928         .kr(2)
38929         .sr(4)
38930         .m(1)
38931         .n(4)
38932         .k(k)
38933         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38934     }
38935   }
38936 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)38937   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38938     for (size_t k = 9; k < 16; k++) {
38939       for (uint32_t n = 1; n <= 4; n++) {
38940         for (uint32_t m = 1; m <= 1; m++) {
38941           GemmMicrokernelTester()
38942             .mr(1)
38943             .nr(4)
38944             .kr(2)
38945             .sr(4)
38946             .m(m)
38947             .n(n)
38948             .k(k)
38949             .iterations(1)
38950             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38951         }
38952       }
38953     }
38954   }
38955 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)38956   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
38957     for (size_t k = 16; k <= 80; k += 8) {
38958       GemmMicrokernelTester()
38959         .mr(1)
38960         .nr(4)
38961         .kr(2)
38962         .sr(4)
38963         .m(1)
38964         .n(4)
38965         .k(k)
38966         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38967     }
38968   }
38969 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)38970   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38971     for (size_t k = 16; k <= 80; k += 8) {
38972       for (uint32_t n = 1; n <= 4; n++) {
38973         for (uint32_t m = 1; m <= 1; m++) {
38974           GemmMicrokernelTester()
38975             .mr(1)
38976             .nr(4)
38977             .kr(2)
38978             .sr(4)
38979             .m(m)
38980             .n(n)
38981             .k(k)
38982             .iterations(1)
38983             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38984         }
38985       }
38986     }
38987   }
38988 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)38989   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38990     for (uint32_t n = 5; n < 8; n++) {
38991       for (size_t k = 1; k <= 40; k += 9) {
38992         GemmMicrokernelTester()
38993           .mr(1)
38994           .nr(4)
38995           .kr(2)
38996           .sr(4)
38997           .m(1)
38998           .n(n)
38999           .k(k)
39000           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39001       }
39002     }
39003   }
39004 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39005   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39006     for (uint32_t n = 5; n < 8; n++) {
39007       for (size_t k = 1; k <= 40; k += 9) {
39008         GemmMicrokernelTester()
39009           .mr(1)
39010           .nr(4)
39011           .kr(2)
39012           .sr(4)
39013           .m(1)
39014           .n(n)
39015           .k(k)
39016           .cn_stride(7)
39017           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39018       }
39019     }
39020   }
39021 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39022   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39023     for (uint32_t n = 5; n < 8; n++) {
39024       for (size_t k = 1; k <= 40; k += 9) {
39025         for (uint32_t m = 1; m <= 1; m++) {
39026           GemmMicrokernelTester()
39027             .mr(1)
39028             .nr(4)
39029             .kr(2)
39030             .sr(4)
39031             .m(m)
39032             .n(n)
39033             .k(k)
39034             .iterations(1)
39035             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39036         }
39037       }
39038     }
39039   }
39040 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)39041   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
39042     for (uint32_t n = 8; n <= 12; n += 4) {
39043       for (size_t k = 1; k <= 40; k += 9) {
39044         GemmMicrokernelTester()
39045           .mr(1)
39046           .nr(4)
39047           .kr(2)
39048           .sr(4)
39049           .m(1)
39050           .n(n)
39051           .k(k)
39052           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39053       }
39054     }
39055   }
39056 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39057   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39058     for (uint32_t n = 8; n <= 12; n += 4) {
39059       for (size_t k = 1; k <= 40; k += 9) {
39060         GemmMicrokernelTester()
39061           .mr(1)
39062           .nr(4)
39063           .kr(2)
39064           .sr(4)
39065           .m(1)
39066           .n(n)
39067           .k(k)
39068           .cn_stride(7)
39069           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39070       }
39071     }
39072   }
39073 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39074   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39075     for (uint32_t n = 8; n <= 12; n += 4) {
39076       for (size_t k = 1; k <= 40; k += 9) {
39077         for (uint32_t m = 1; m <= 1; m++) {
39078           GemmMicrokernelTester()
39079             .mr(1)
39080             .nr(4)
39081             .kr(2)
39082             .sr(4)
39083             .m(m)
39084             .n(n)
39085             .k(k)
39086             .iterations(1)
39087             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39088         }
39089       }
39090     }
39091   }
39092 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)39093   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
39094     for (size_t k = 1; k <= 40; k += 9) {
39095       GemmMicrokernelTester()
39096         .mr(1)
39097         .nr(4)
39098         .kr(2)
39099         .sr(4)
39100         .m(1)
39101         .n(4)
39102         .k(k)
39103         .ks(3)
39104         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39105     }
39106   }
39107 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39108   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39109     for (size_t k = 1; k <= 40; k += 9) {
39110       for (uint32_t n = 1; n <= 4; n++) {
39111         for (uint32_t m = 1; m <= 1; m++) {
39112           GemmMicrokernelTester()
39113             .mr(1)
39114             .nr(4)
39115             .kr(2)
39116             .sr(4)
39117             .m(m)
39118             .n(n)
39119             .k(k)
39120             .ks(3)
39121             .iterations(1)
39122             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39123         }
39124       }
39125     }
39126   }
39127 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)39128   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
39129     for (uint32_t n = 5; n < 8; n++) {
39130       for (size_t k = 1; k <= 40; k += 9) {
39131         GemmMicrokernelTester()
39132           .mr(1)
39133           .nr(4)
39134           .kr(2)
39135           .sr(4)
39136           .m(1)
39137           .n(n)
39138           .k(k)
39139           .ks(3)
39140           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39141       }
39142     }
39143   }
39144 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)39145   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
39146     for (uint32_t n = 8; n <= 12; n += 4) {
39147       for (size_t k = 1; k <= 40; k += 9) {
39148         GemmMicrokernelTester()
39149           .mr(1)
39150           .nr(4)
39151           .kr(2)
39152           .sr(4)
39153           .m(1)
39154           .n(n)
39155           .k(k)
39156           .ks(3)
39157           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39158       }
39159     }
39160   }
39161 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)39162   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39163     for (size_t k = 1; k <= 40; k += 9) {
39164       for (uint32_t n = 1; n <= 4; n++) {
39165         for (uint32_t m = 1; m <= 1; m++) {
39166           GemmMicrokernelTester()
39167             .mr(1)
39168             .nr(4)
39169             .kr(2)
39170             .sr(4)
39171             .m(m)
39172             .n(n)
39173             .k(k)
39174             .cm_stride(7)
39175             .iterations(1)
39176             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39177         }
39178       }
39179     }
39180   }
39181 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)39182   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
39183     for (size_t k = 1; k <= 40; k += 9) {
39184       GemmMicrokernelTester()
39185         .mr(1)
39186         .nr(4)
39187         .kr(2)
39188         .sr(4)
39189         .m(1)
39190         .n(4)
39191         .k(k)
39192         .ks(3)
39193         .a_offset(43)
39194         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39195     }
39196   }
39197 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,zero)39198   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
39199     for (size_t k = 1; k <= 40; k += 9) {
39200       for (uint32_t mz = 0; mz < 1; mz++) {
39201         GemmMicrokernelTester()
39202           .mr(1)
39203           .nr(4)
39204           .kr(2)
39205           .sr(4)
39206           .m(1)
39207           .n(4)
39208           .k(k)
39209           .ks(3)
39210           .a_offset(43)
39211           .zero_index(mz)
39212           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39213       }
39214     }
39215   }
39216 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)39217   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
39218     GemmMicrokernelTester()
39219       .mr(1)
39220       .nr(4)
39221       .kr(2)
39222       .sr(4)
39223       .m(1)
39224       .n(4)
39225       .k(8)
39226       .qmin(128)
39227       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39228   }
39229 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)39230   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
39231     GemmMicrokernelTester()
39232       .mr(1)
39233       .nr(4)
39234       .kr(2)
39235       .sr(4)
39236       .m(1)
39237       .n(4)
39238       .k(8)
39239       .qmax(128)
39240       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39241   }
39242 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)39243   TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
39244     GemmMicrokernelTester()
39245       .mr(1)
39246       .nr(4)
39247       .kr(2)
39248       .sr(4)
39249       .m(1)
39250       .n(4)
39251       .k(8)
39252       .cm_stride(7)
39253       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39254   }
39255 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39256 
39257 
39258 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)39259   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39260     GemmMicrokernelTester()
39261       .mr(1)
39262       .nr(4)
39263       .kr(8)
39264       .sr(1)
39265       .m(1)
39266       .n(4)
39267       .k(8)
39268       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39269   }
39270 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)39271   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
39272     GemmMicrokernelTester()
39273       .mr(1)
39274       .nr(4)
39275       .kr(8)
39276       .sr(1)
39277       .m(1)
39278       .n(4)
39279       .k(8)
39280       .cn_stride(7)
39281       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39282   }
39283 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)39284   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
39285     for (uint32_t n = 1; n <= 4; n++) {
39286       for (uint32_t m = 1; m <= 1; m++) {
39287         GemmMicrokernelTester()
39288           .mr(1)
39289           .nr(4)
39290           .kr(8)
39291           .sr(1)
39292           .m(m)
39293           .n(n)
39294           .k(8)
39295           .iterations(1)
39296           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39297       }
39298     }
39299   }
39300 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)39301   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39302     for (uint32_t m = 1; m <= 1; m++) {
39303       GemmMicrokernelTester()
39304         .mr(1)
39305         .nr(4)
39306         .kr(8)
39307         .sr(1)
39308         .m(m)
39309         .n(4)
39310         .k(8)
39311         .iterations(1)
39312         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39313     }
39314   }
39315 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)39316   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39317     for (uint32_t n = 1; n <= 4; n++) {
39318       GemmMicrokernelTester()
39319         .mr(1)
39320         .nr(4)
39321         .kr(8)
39322         .sr(1)
39323         .m(1)
39324         .n(n)
39325         .k(8)
39326         .iterations(1)
39327         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39328     }
39329   }
39330 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)39331   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39332     for (size_t k = 1; k < 8; k++) {
39333       GemmMicrokernelTester()
39334         .mr(1)
39335         .nr(4)
39336         .kr(8)
39337         .sr(1)
39338         .m(1)
39339         .n(4)
39340         .k(k)
39341         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39342     }
39343   }
39344 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)39345   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39346     for (size_t k = 1; k < 8; k++) {
39347       for (uint32_t n = 1; n <= 4; n++) {
39348         for (uint32_t m = 1; m <= 1; m++) {
39349           GemmMicrokernelTester()
39350             .mr(1)
39351             .nr(4)
39352             .kr(8)
39353             .sr(1)
39354             .m(m)
39355             .n(n)
39356             .k(k)
39357             .iterations(1)
39358             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39359         }
39360       }
39361     }
39362   }
39363 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)39364   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39365     for (size_t k = 9; k < 16; k++) {
39366       GemmMicrokernelTester()
39367         .mr(1)
39368         .nr(4)
39369         .kr(8)
39370         .sr(1)
39371         .m(1)
39372         .n(4)
39373         .k(k)
39374         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39375     }
39376   }
39377 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)39378   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39379     for (size_t k = 9; k < 16; k++) {
39380       for (uint32_t n = 1; n <= 4; n++) {
39381         for (uint32_t m = 1; m <= 1; m++) {
39382           GemmMicrokernelTester()
39383             .mr(1)
39384             .nr(4)
39385             .kr(8)
39386             .sr(1)
39387             .m(m)
39388             .n(n)
39389             .k(k)
39390             .iterations(1)
39391             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39392         }
39393       }
39394     }
39395   }
39396 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)39397   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
39398     for (size_t k = 16; k <= 80; k += 8) {
39399       GemmMicrokernelTester()
39400         .mr(1)
39401         .nr(4)
39402         .kr(8)
39403         .sr(1)
39404         .m(1)
39405         .n(4)
39406         .k(k)
39407         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39408     }
39409   }
39410 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)39411   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39412     for (size_t k = 16; k <= 80; k += 8) {
39413       for (uint32_t n = 1; n <= 4; n++) {
39414         for (uint32_t m = 1; m <= 1; m++) {
39415           GemmMicrokernelTester()
39416             .mr(1)
39417             .nr(4)
39418             .kr(8)
39419             .sr(1)
39420             .m(m)
39421             .n(n)
39422             .k(k)
39423             .iterations(1)
39424             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39425         }
39426       }
39427     }
39428   }
39429 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)39430   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39431     for (uint32_t n = 5; n < 8; n++) {
39432       for (size_t k = 1; k <= 40; k += 9) {
39433         GemmMicrokernelTester()
39434           .mr(1)
39435           .nr(4)
39436           .kr(8)
39437           .sr(1)
39438           .m(1)
39439           .n(n)
39440           .k(k)
39441           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39442       }
39443     }
39444   }
39445 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39446   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39447     for (uint32_t n = 5; n < 8; n++) {
39448       for (size_t k = 1; k <= 40; k += 9) {
39449         GemmMicrokernelTester()
39450           .mr(1)
39451           .nr(4)
39452           .kr(8)
39453           .sr(1)
39454           .m(1)
39455           .n(n)
39456           .k(k)
39457           .cn_stride(7)
39458           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39459       }
39460     }
39461   }
39462 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39463   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39464     for (uint32_t n = 5; n < 8; n++) {
39465       for (size_t k = 1; k <= 40; k += 9) {
39466         for (uint32_t m = 1; m <= 1; m++) {
39467           GemmMicrokernelTester()
39468             .mr(1)
39469             .nr(4)
39470             .kr(8)
39471             .sr(1)
39472             .m(m)
39473             .n(n)
39474             .k(k)
39475             .iterations(1)
39476             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39477         }
39478       }
39479     }
39480   }
39481 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)39482   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
39483     for (uint32_t n = 8; n <= 12; n += 4) {
39484       for (size_t k = 1; k <= 40; k += 9) {
39485         GemmMicrokernelTester()
39486           .mr(1)
39487           .nr(4)
39488           .kr(8)
39489           .sr(1)
39490           .m(1)
39491           .n(n)
39492           .k(k)
39493           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39494       }
39495     }
39496   }
39497 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39498   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39499     for (uint32_t n = 8; n <= 12; n += 4) {
39500       for (size_t k = 1; k <= 40; k += 9) {
39501         GemmMicrokernelTester()
39502           .mr(1)
39503           .nr(4)
39504           .kr(8)
39505           .sr(1)
39506           .m(1)
39507           .n(n)
39508           .k(k)
39509           .cn_stride(7)
39510           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39511       }
39512     }
39513   }
39514 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39515   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39516     for (uint32_t n = 8; n <= 12; n += 4) {
39517       for (size_t k = 1; k <= 40; k += 9) {
39518         for (uint32_t m = 1; m <= 1; m++) {
39519           GemmMicrokernelTester()
39520             .mr(1)
39521             .nr(4)
39522             .kr(8)
39523             .sr(1)
39524             .m(m)
39525             .n(n)
39526             .k(k)
39527             .iterations(1)
39528             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39529         }
39530       }
39531     }
39532   }
39533 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)39534   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
39535     for (size_t k = 1; k <= 40; k += 9) {
39536       GemmMicrokernelTester()
39537         .mr(1)
39538         .nr(4)
39539         .kr(8)
39540         .sr(1)
39541         .m(1)
39542         .n(4)
39543         .k(k)
39544         .ks(3)
39545         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39546     }
39547   }
39548 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39549   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39550     for (size_t k = 1; k <= 40; k += 9) {
39551       for (uint32_t n = 1; n <= 4; n++) {
39552         for (uint32_t m = 1; m <= 1; m++) {
39553           GemmMicrokernelTester()
39554             .mr(1)
39555             .nr(4)
39556             .kr(8)
39557             .sr(1)
39558             .m(m)
39559             .n(n)
39560             .k(k)
39561             .ks(3)
39562             .iterations(1)
39563             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39564         }
39565       }
39566     }
39567   }
39568 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)39569   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
39570     for (uint32_t n = 5; n < 8; n++) {
39571       for (size_t k = 1; k <= 40; k += 9) {
39572         GemmMicrokernelTester()
39573           .mr(1)
39574           .nr(4)
39575           .kr(8)
39576           .sr(1)
39577           .m(1)
39578           .n(n)
39579           .k(k)
39580           .ks(3)
39581           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39582       }
39583     }
39584   }
39585 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)39586   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
39587     for (uint32_t n = 8; n <= 12; n += 4) {
39588       for (size_t k = 1; k <= 40; k += 9) {
39589         GemmMicrokernelTester()
39590           .mr(1)
39591           .nr(4)
39592           .kr(8)
39593           .sr(1)
39594           .m(1)
39595           .n(n)
39596           .k(k)
39597           .ks(3)
39598           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39599       }
39600     }
39601   }
39602 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)39603   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39604     for (size_t k = 1; k <= 40; k += 9) {
39605       for (uint32_t n = 1; n <= 4; n++) {
39606         for (uint32_t m = 1; m <= 1; m++) {
39607           GemmMicrokernelTester()
39608             .mr(1)
39609             .nr(4)
39610             .kr(8)
39611             .sr(1)
39612             .m(m)
39613             .n(n)
39614             .k(k)
39615             .cm_stride(7)
39616             .iterations(1)
39617             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39618         }
39619       }
39620     }
39621   }
39622 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,a_offset)39623   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
39624     for (size_t k = 1; k <= 40; k += 9) {
39625       GemmMicrokernelTester()
39626         .mr(1)
39627         .nr(4)
39628         .kr(8)
39629         .sr(1)
39630         .m(1)
39631         .n(4)
39632         .k(k)
39633         .ks(3)
39634         .a_offset(43)
39635         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39636     }
39637   }
39638 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,zero)39639   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
39640     for (size_t k = 1; k <= 40; k += 9) {
39641       for (uint32_t mz = 0; mz < 1; mz++) {
39642         GemmMicrokernelTester()
39643           .mr(1)
39644           .nr(4)
39645           .kr(8)
39646           .sr(1)
39647           .m(1)
39648           .n(4)
39649           .k(k)
39650           .ks(3)
39651           .a_offset(43)
39652           .zero_index(mz)
39653           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39654       }
39655     }
39656   }
39657 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)39658   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
39659     GemmMicrokernelTester()
39660       .mr(1)
39661       .nr(4)
39662       .kr(8)
39663       .sr(1)
39664       .m(1)
39665       .n(4)
39666       .k(8)
39667       .qmin(128)
39668       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39669   }
39670 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)39671   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
39672     GemmMicrokernelTester()
39673       .mr(1)
39674       .nr(4)
39675       .kr(8)
39676       .sr(1)
39677       .m(1)
39678       .n(4)
39679       .k(8)
39680       .qmax(128)
39681       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39682   }
39683 
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)39684   TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
39685     GemmMicrokernelTester()
39686       .mr(1)
39687       .nr(4)
39688       .kr(8)
39689       .sr(1)
39690       .m(1)
39691       .n(4)
39692       .k(8)
39693       .cm_stride(7)
39694       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39695   }
39696 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39697 
39698 
39699 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)39700   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39701     GemmMicrokernelTester()
39702       .mr(2)
39703       .nr(4)
39704       .kr(2)
39705       .sr(1)
39706       .m(2)
39707       .n(4)
39708       .k(8)
39709       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39710   }
39711 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)39712   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
39713     GemmMicrokernelTester()
39714       .mr(2)
39715       .nr(4)
39716       .kr(2)
39717       .sr(1)
39718       .m(2)
39719       .n(4)
39720       .k(8)
39721       .cn_stride(7)
39722       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39723   }
39724 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)39725   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
39726     for (uint32_t n = 1; n <= 4; n++) {
39727       for (uint32_t m = 1; m <= 2; m++) {
39728         GemmMicrokernelTester()
39729           .mr(2)
39730           .nr(4)
39731           .kr(2)
39732           .sr(1)
39733           .m(m)
39734           .n(n)
39735           .k(8)
39736           .iterations(1)
39737           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39738       }
39739     }
39740   }
39741 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)39742   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39743     for (uint32_t m = 1; m <= 2; m++) {
39744       GemmMicrokernelTester()
39745         .mr(2)
39746         .nr(4)
39747         .kr(2)
39748         .sr(1)
39749         .m(m)
39750         .n(4)
39751         .k(8)
39752         .iterations(1)
39753         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39754     }
39755   }
39756 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)39757   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39758     for (uint32_t n = 1; n <= 4; n++) {
39759       GemmMicrokernelTester()
39760         .mr(2)
39761         .nr(4)
39762         .kr(2)
39763         .sr(1)
39764         .m(2)
39765         .n(n)
39766         .k(8)
39767         .iterations(1)
39768         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39769     }
39770   }
39771 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)39772   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39773     for (size_t k = 1; k < 8; k++) {
39774       GemmMicrokernelTester()
39775         .mr(2)
39776         .nr(4)
39777         .kr(2)
39778         .sr(1)
39779         .m(2)
39780         .n(4)
39781         .k(k)
39782         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39783     }
39784   }
39785 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)39786   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39787     for (size_t k = 1; k < 8; k++) {
39788       for (uint32_t n = 1; n <= 4; n++) {
39789         for (uint32_t m = 1; m <= 2; m++) {
39790           GemmMicrokernelTester()
39791             .mr(2)
39792             .nr(4)
39793             .kr(2)
39794             .sr(1)
39795             .m(m)
39796             .n(n)
39797             .k(k)
39798             .iterations(1)
39799             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39800         }
39801       }
39802     }
39803   }
39804 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)39805   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39806     for (size_t k = 9; k < 16; k++) {
39807       GemmMicrokernelTester()
39808         .mr(2)
39809         .nr(4)
39810         .kr(2)
39811         .sr(1)
39812         .m(2)
39813         .n(4)
39814         .k(k)
39815         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39816     }
39817   }
39818 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)39819   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39820     for (size_t k = 9; k < 16; k++) {
39821       for (uint32_t n = 1; n <= 4; n++) {
39822         for (uint32_t m = 1; m <= 2; m++) {
39823           GemmMicrokernelTester()
39824             .mr(2)
39825             .nr(4)
39826             .kr(2)
39827             .sr(1)
39828             .m(m)
39829             .n(n)
39830             .k(k)
39831             .iterations(1)
39832             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39833         }
39834       }
39835     }
39836   }
39837 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)39838   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
39839     for (size_t k = 16; k <= 80; k += 8) {
39840       GemmMicrokernelTester()
39841         .mr(2)
39842         .nr(4)
39843         .kr(2)
39844         .sr(1)
39845         .m(2)
39846         .n(4)
39847         .k(k)
39848         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39849     }
39850   }
39851 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)39852   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39853     for (size_t k = 16; k <= 80; k += 8) {
39854       for (uint32_t n = 1; n <= 4; n++) {
39855         for (uint32_t m = 1; m <= 2; m++) {
39856           GemmMicrokernelTester()
39857             .mr(2)
39858             .nr(4)
39859             .kr(2)
39860             .sr(1)
39861             .m(m)
39862             .n(n)
39863             .k(k)
39864             .iterations(1)
39865             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39866         }
39867       }
39868     }
39869   }
39870 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)39871   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39872     for (uint32_t n = 5; n < 8; n++) {
39873       for (size_t k = 1; k <= 40; k += 9) {
39874         GemmMicrokernelTester()
39875           .mr(2)
39876           .nr(4)
39877           .kr(2)
39878           .sr(1)
39879           .m(2)
39880           .n(n)
39881           .k(k)
39882           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39883       }
39884     }
39885   }
39886 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39887   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39888     for (uint32_t n = 5; n < 8; n++) {
39889       for (size_t k = 1; k <= 40; k += 9) {
39890         GemmMicrokernelTester()
39891           .mr(2)
39892           .nr(4)
39893           .kr(2)
39894           .sr(1)
39895           .m(2)
39896           .n(n)
39897           .k(k)
39898           .cn_stride(7)
39899           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39900       }
39901     }
39902   }
39903 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39904   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39905     for (uint32_t n = 5; n < 8; n++) {
39906       for (size_t k = 1; k <= 40; k += 9) {
39907         for (uint32_t m = 1; m <= 2; m++) {
39908           GemmMicrokernelTester()
39909             .mr(2)
39910             .nr(4)
39911             .kr(2)
39912             .sr(1)
39913             .m(m)
39914             .n(n)
39915             .k(k)
39916             .iterations(1)
39917             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39918         }
39919       }
39920     }
39921   }
39922 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)39923   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
39924     for (uint32_t n = 8; n <= 12; n += 4) {
39925       for (size_t k = 1; k <= 40; k += 9) {
39926         GemmMicrokernelTester()
39927           .mr(2)
39928           .nr(4)
39929           .kr(2)
39930           .sr(1)
39931           .m(2)
39932           .n(n)
39933           .k(k)
39934           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39935       }
39936     }
39937   }
39938 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39939   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39940     for (uint32_t n = 8; n <= 12; n += 4) {
39941       for (size_t k = 1; k <= 40; k += 9) {
39942         GemmMicrokernelTester()
39943           .mr(2)
39944           .nr(4)
39945           .kr(2)
39946           .sr(1)
39947           .m(2)
39948           .n(n)
39949           .k(k)
39950           .cn_stride(7)
39951           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39952       }
39953     }
39954   }
39955 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39956   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39957     for (uint32_t n = 8; n <= 12; n += 4) {
39958       for (size_t k = 1; k <= 40; k += 9) {
39959         for (uint32_t m = 1; m <= 2; m++) {
39960           GemmMicrokernelTester()
39961             .mr(2)
39962             .nr(4)
39963             .kr(2)
39964             .sr(1)
39965             .m(m)
39966             .n(n)
39967             .k(k)
39968             .iterations(1)
39969             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39970         }
39971       }
39972     }
39973   }
39974 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)39975   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
39976     for (size_t k = 1; k <= 40; k += 9) {
39977       GemmMicrokernelTester()
39978         .mr(2)
39979         .nr(4)
39980         .kr(2)
39981         .sr(1)
39982         .m(2)
39983         .n(4)
39984         .k(k)
39985         .ks(3)
39986         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39987     }
39988   }
39989 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39990   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39991     for (size_t k = 1; k <= 40; k += 9) {
39992       for (uint32_t n = 1; n <= 4; n++) {
39993         for (uint32_t m = 1; m <= 2; m++) {
39994           GemmMicrokernelTester()
39995             .mr(2)
39996             .nr(4)
39997             .kr(2)
39998             .sr(1)
39999             .m(m)
40000             .n(n)
40001             .k(k)
40002             .ks(3)
40003             .iterations(1)
40004             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40005         }
40006       }
40007     }
40008   }
40009 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)40010   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
40011     for (uint32_t n = 5; n < 8; n++) {
40012       for (size_t k = 1; k <= 40; k += 9) {
40013         GemmMicrokernelTester()
40014           .mr(2)
40015           .nr(4)
40016           .kr(2)
40017           .sr(1)
40018           .m(2)
40019           .n(n)
40020           .k(k)
40021           .ks(3)
40022           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40023       }
40024     }
40025   }
40026 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)40027   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
40028     for (uint32_t n = 8; n <= 12; n += 4) {
40029       for (size_t k = 1; k <= 40; k += 9) {
40030         GemmMicrokernelTester()
40031           .mr(2)
40032           .nr(4)
40033           .kr(2)
40034           .sr(1)
40035           .m(2)
40036           .n(n)
40037           .k(k)
40038           .ks(3)
40039           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40040       }
40041     }
40042   }
40043 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)40044   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
40045     for (size_t k = 1; k <= 40; k += 9) {
40046       for (uint32_t n = 1; n <= 4; n++) {
40047         for (uint32_t m = 1; m <= 2; m++) {
40048           GemmMicrokernelTester()
40049             .mr(2)
40050             .nr(4)
40051             .kr(2)
40052             .sr(1)
40053             .m(m)
40054             .n(n)
40055             .k(k)
40056             .cm_stride(7)
40057             .iterations(1)
40058             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40059         }
40060       }
40061     }
40062   }
40063 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)40064   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
40065     for (size_t k = 1; k <= 40; k += 9) {
40066       GemmMicrokernelTester()
40067         .mr(2)
40068         .nr(4)
40069         .kr(2)
40070         .sr(1)
40071         .m(2)
40072         .n(4)
40073         .k(k)
40074         .ks(3)
40075         .a_offset(83)
40076         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40077     }
40078   }
40079 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)40080   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
40081     for (size_t k = 1; k <= 40; k += 9) {
40082       for (uint32_t mz = 0; mz < 2; mz++) {
40083         GemmMicrokernelTester()
40084           .mr(2)
40085           .nr(4)
40086           .kr(2)
40087           .sr(1)
40088           .m(2)
40089           .n(4)
40090           .k(k)
40091           .ks(3)
40092           .a_offset(83)
40093           .zero_index(mz)
40094           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40095       }
40096     }
40097   }
40098 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)40099   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
40100     GemmMicrokernelTester()
40101       .mr(2)
40102       .nr(4)
40103       .kr(2)
40104       .sr(1)
40105       .m(2)
40106       .n(4)
40107       .k(8)
40108       .qmin(128)
40109       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40110   }
40111 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)40112   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
40113     GemmMicrokernelTester()
40114       .mr(2)
40115       .nr(4)
40116       .kr(2)
40117       .sr(1)
40118       .m(2)
40119       .n(4)
40120       .k(8)
40121       .qmax(128)
40122       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40123   }
40124 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)40125   TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
40126     GemmMicrokernelTester()
40127       .mr(2)
40128       .nr(4)
40129       .kr(2)
40130       .sr(1)
40131       .m(2)
40132       .n(4)
40133       .k(8)
40134       .cm_stride(7)
40135       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40136   }
40137 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40138 
40139 
40140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)40141   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40142     GemmMicrokernelTester()
40143       .mr(2)
40144       .nr(4)
40145       .kr(8)
40146       .sr(1)
40147       .m(2)
40148       .n(4)
40149       .k(8)
40150       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40151   }
40152 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)40153   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
40154     GemmMicrokernelTester()
40155       .mr(2)
40156       .nr(4)
40157       .kr(8)
40158       .sr(1)
40159       .m(2)
40160       .n(4)
40161       .k(8)
40162       .cn_stride(7)
40163       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40164   }
40165 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40166   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40167     for (uint32_t n = 1; n <= 4; n++) {
40168       for (uint32_t m = 1; m <= 2; m++) {
40169         GemmMicrokernelTester()
40170           .mr(2)
40171           .nr(4)
40172           .kr(8)
40173           .sr(1)
40174           .m(m)
40175           .n(n)
40176           .k(8)
40177           .iterations(1)
40178           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40179       }
40180     }
40181   }
40182 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40183   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40184     for (uint32_t m = 1; m <= 2; m++) {
40185       GemmMicrokernelTester()
40186         .mr(2)
40187         .nr(4)
40188         .kr(8)
40189         .sr(1)
40190         .m(m)
40191         .n(4)
40192         .k(8)
40193         .iterations(1)
40194         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40195     }
40196   }
40197 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40198   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40199     for (uint32_t n = 1; n <= 4; n++) {
40200       GemmMicrokernelTester()
40201         .mr(2)
40202         .nr(4)
40203         .kr(8)
40204         .sr(1)
40205         .m(2)
40206         .n(n)
40207         .k(8)
40208         .iterations(1)
40209         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40210     }
40211   }
40212 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)40213   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40214     for (size_t k = 1; k < 8; k++) {
40215       GemmMicrokernelTester()
40216         .mr(2)
40217         .nr(4)
40218         .kr(8)
40219         .sr(1)
40220         .m(2)
40221         .n(4)
40222         .k(k)
40223         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40224     }
40225   }
40226 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40227   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40228     for (size_t k = 1; k < 8; k++) {
40229       for (uint32_t n = 1; n <= 4; n++) {
40230         for (uint32_t m = 1; m <= 2; m++) {
40231           GemmMicrokernelTester()
40232             .mr(2)
40233             .nr(4)
40234             .kr(8)
40235             .sr(1)
40236             .m(m)
40237             .n(n)
40238             .k(k)
40239             .iterations(1)
40240             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40241         }
40242       }
40243     }
40244   }
40245 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)40246   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40247     for (size_t k = 9; k < 16; k++) {
40248       GemmMicrokernelTester()
40249         .mr(2)
40250         .nr(4)
40251         .kr(8)
40252         .sr(1)
40253         .m(2)
40254         .n(4)
40255         .k(k)
40256         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40257     }
40258   }
40259 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40260   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40261     for (size_t k = 9; k < 16; k++) {
40262       for (uint32_t n = 1; n <= 4; n++) {
40263         for (uint32_t m = 1; m <= 2; m++) {
40264           GemmMicrokernelTester()
40265             .mr(2)
40266             .nr(4)
40267             .kr(8)
40268             .sr(1)
40269             .m(m)
40270             .n(n)
40271             .k(k)
40272             .iterations(1)
40273             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40274         }
40275       }
40276     }
40277   }
40278 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)40279   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
40280     for (size_t k = 16; k <= 80; k += 8) {
40281       GemmMicrokernelTester()
40282         .mr(2)
40283         .nr(4)
40284         .kr(8)
40285         .sr(1)
40286         .m(2)
40287         .n(4)
40288         .k(k)
40289         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40290     }
40291   }
40292 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40293   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40294     for (size_t k = 16; k <= 80; k += 8) {
40295       for (uint32_t n = 1; n <= 4; n++) {
40296         for (uint32_t m = 1; m <= 2; m++) {
40297           GemmMicrokernelTester()
40298             .mr(2)
40299             .nr(4)
40300             .kr(8)
40301             .sr(1)
40302             .m(m)
40303             .n(n)
40304             .k(k)
40305             .iterations(1)
40306             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40307         }
40308       }
40309     }
40310   }
40311 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)40312   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40313     for (uint32_t n = 5; n < 8; n++) {
40314       for (size_t k = 1; k <= 40; k += 9) {
40315         GemmMicrokernelTester()
40316           .mr(2)
40317           .nr(4)
40318           .kr(8)
40319           .sr(1)
40320           .m(2)
40321           .n(n)
40322           .k(k)
40323           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40324       }
40325     }
40326   }
40327 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40328   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40329     for (uint32_t n = 5; n < 8; n++) {
40330       for (size_t k = 1; k <= 40; k += 9) {
40331         GemmMicrokernelTester()
40332           .mr(2)
40333           .nr(4)
40334           .kr(8)
40335           .sr(1)
40336           .m(2)
40337           .n(n)
40338           .k(k)
40339           .cn_stride(7)
40340           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40341       }
40342     }
40343   }
40344 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40345   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40346     for (uint32_t n = 5; n < 8; n++) {
40347       for (size_t k = 1; k <= 40; k += 9) {
40348         for (uint32_t m = 1; m <= 2; m++) {
40349           GemmMicrokernelTester()
40350             .mr(2)
40351             .nr(4)
40352             .kr(8)
40353             .sr(1)
40354             .m(m)
40355             .n(n)
40356             .k(k)
40357             .iterations(1)
40358             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40359         }
40360       }
40361     }
40362   }
40363 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)40364   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
40365     for (uint32_t n = 8; n <= 12; n += 4) {
40366       for (size_t k = 1; k <= 40; k += 9) {
40367         GemmMicrokernelTester()
40368           .mr(2)
40369           .nr(4)
40370           .kr(8)
40371           .sr(1)
40372           .m(2)
40373           .n(n)
40374           .k(k)
40375           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40376       }
40377     }
40378   }
40379 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40380   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40381     for (uint32_t n = 8; n <= 12; n += 4) {
40382       for (size_t k = 1; k <= 40; k += 9) {
40383         GemmMicrokernelTester()
40384           .mr(2)
40385           .nr(4)
40386           .kr(8)
40387           .sr(1)
40388           .m(2)
40389           .n(n)
40390           .k(k)
40391           .cn_stride(7)
40392           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40393       }
40394     }
40395   }
40396 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40397   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40398     for (uint32_t n = 8; n <= 12; n += 4) {
40399       for (size_t k = 1; k <= 40; k += 9) {
40400         for (uint32_t m = 1; m <= 2; m++) {
40401           GemmMicrokernelTester()
40402             .mr(2)
40403             .nr(4)
40404             .kr(8)
40405             .sr(1)
40406             .m(m)
40407             .n(n)
40408             .k(k)
40409             .iterations(1)
40410             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40411         }
40412       }
40413     }
40414   }
40415 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)40416   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
40417     for (size_t k = 1; k <= 40; k += 9) {
40418       GemmMicrokernelTester()
40419         .mr(2)
40420         .nr(4)
40421         .kr(8)
40422         .sr(1)
40423         .m(2)
40424         .n(4)
40425         .k(k)
40426         .ks(3)
40427         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40428     }
40429   }
40430 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40431   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40432     for (size_t k = 1; k <= 40; k += 9) {
40433       for (uint32_t n = 1; n <= 4; n++) {
40434         for (uint32_t m = 1; m <= 2; m++) {
40435           GemmMicrokernelTester()
40436             .mr(2)
40437             .nr(4)
40438             .kr(8)
40439             .sr(1)
40440             .m(m)
40441             .n(n)
40442             .k(k)
40443             .ks(3)
40444             .iterations(1)
40445             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40446         }
40447       }
40448     }
40449   }
40450 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40451   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40452     for (uint32_t n = 5; n < 8; n++) {
40453       for (size_t k = 1; k <= 40; k += 9) {
40454         GemmMicrokernelTester()
40455           .mr(2)
40456           .nr(4)
40457           .kr(8)
40458           .sr(1)
40459           .m(2)
40460           .n(n)
40461           .k(k)
40462           .ks(3)
40463           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40464       }
40465     }
40466   }
40467 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40468   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40469     for (uint32_t n = 8; n <= 12; n += 4) {
40470       for (size_t k = 1; k <= 40; k += 9) {
40471         GemmMicrokernelTester()
40472           .mr(2)
40473           .nr(4)
40474           .kr(8)
40475           .sr(1)
40476           .m(2)
40477           .n(n)
40478           .k(k)
40479           .ks(3)
40480           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40481       }
40482     }
40483   }
40484 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40485   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40486     for (size_t k = 1; k <= 40; k += 9) {
40487       for (uint32_t n = 1; n <= 4; n++) {
40488         for (uint32_t m = 1; m <= 2; m++) {
40489           GemmMicrokernelTester()
40490             .mr(2)
40491             .nr(4)
40492             .kr(8)
40493             .sr(1)
40494             .m(m)
40495             .n(n)
40496             .k(k)
40497             .cm_stride(7)
40498             .iterations(1)
40499             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40500         }
40501       }
40502     }
40503   }
40504 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,a_offset)40505   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
40506     for (size_t k = 1; k <= 40; k += 9) {
40507       GemmMicrokernelTester()
40508         .mr(2)
40509         .nr(4)
40510         .kr(8)
40511         .sr(1)
40512         .m(2)
40513         .n(4)
40514         .k(k)
40515         .ks(3)
40516         .a_offset(83)
40517         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40518     }
40519   }
40520 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,zero)40521   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
40522     for (size_t k = 1; k <= 40; k += 9) {
40523       for (uint32_t mz = 0; mz < 2; mz++) {
40524         GemmMicrokernelTester()
40525           .mr(2)
40526           .nr(4)
40527           .kr(8)
40528           .sr(1)
40529           .m(2)
40530           .n(4)
40531           .k(k)
40532           .ks(3)
40533           .a_offset(83)
40534           .zero_index(mz)
40535           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40536       }
40537     }
40538   }
40539 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)40540   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
40541     GemmMicrokernelTester()
40542       .mr(2)
40543       .nr(4)
40544       .kr(8)
40545       .sr(1)
40546       .m(2)
40547       .n(4)
40548       .k(8)
40549       .qmin(128)
40550       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40551   }
40552 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)40553   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
40554     GemmMicrokernelTester()
40555       .mr(2)
40556       .nr(4)
40557       .kr(8)
40558       .sr(1)
40559       .m(2)
40560       .n(4)
40561       .k(8)
40562       .qmax(128)
40563       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40564   }
40565 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)40566   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
40567     GemmMicrokernelTester()
40568       .mr(2)
40569       .nr(4)
40570       .kr(8)
40571       .sr(1)
40572       .m(2)
40573       .n(4)
40574       .k(8)
40575       .cm_stride(7)
40576       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40577   }
40578 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40579 
40580 
40581 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)40582   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
40583     GemmMicrokernelTester()
40584       .mr(2)
40585       .nr(4)
40586       .kr(8)
40587       .sr(1)
40588       .m(2)
40589       .n(4)
40590       .k(8)
40591       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40592   }
40593 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)40594   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
40595     GemmMicrokernelTester()
40596       .mr(2)
40597       .nr(4)
40598       .kr(8)
40599       .sr(1)
40600       .m(2)
40601       .n(4)
40602       .k(8)
40603       .cn_stride(7)
40604       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40605   }
40606 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)40607   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
40608     for (uint32_t n = 1; n <= 4; n++) {
40609       for (uint32_t m = 1; m <= 2; m++) {
40610         GemmMicrokernelTester()
40611           .mr(2)
40612           .nr(4)
40613           .kr(8)
40614           .sr(1)
40615           .m(m)
40616           .n(n)
40617           .k(8)
40618           .iterations(1)
40619           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40620       }
40621     }
40622   }
40623 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)40624   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
40625     for (uint32_t m = 1; m <= 2; m++) {
40626       GemmMicrokernelTester()
40627         .mr(2)
40628         .nr(4)
40629         .kr(8)
40630         .sr(1)
40631         .m(m)
40632         .n(4)
40633         .k(8)
40634         .iterations(1)
40635         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40636     }
40637   }
40638 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)40639   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
40640     for (uint32_t n = 1; n <= 4; n++) {
40641       GemmMicrokernelTester()
40642         .mr(2)
40643         .nr(4)
40644         .kr(8)
40645         .sr(1)
40646         .m(2)
40647         .n(n)
40648         .k(8)
40649         .iterations(1)
40650         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40651     }
40652   }
40653 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)40654   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
40655     for (size_t k = 1; k < 8; k++) {
40656       GemmMicrokernelTester()
40657         .mr(2)
40658         .nr(4)
40659         .kr(8)
40660         .sr(1)
40661         .m(2)
40662         .n(4)
40663         .k(k)
40664         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40665     }
40666   }
40667 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)40668   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
40669     for (size_t k = 1; k < 8; k++) {
40670       for (uint32_t n = 1; n <= 4; n++) {
40671         for (uint32_t m = 1; m <= 2; m++) {
40672           GemmMicrokernelTester()
40673             .mr(2)
40674             .nr(4)
40675             .kr(8)
40676             .sr(1)
40677             .m(m)
40678             .n(n)
40679             .k(k)
40680             .iterations(1)
40681             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40682         }
40683       }
40684     }
40685   }
40686 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)40687   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
40688     for (size_t k = 9; k < 16; k++) {
40689       GemmMicrokernelTester()
40690         .mr(2)
40691         .nr(4)
40692         .kr(8)
40693         .sr(1)
40694         .m(2)
40695         .n(4)
40696         .k(k)
40697         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40698     }
40699   }
40700 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)40701   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
40702     for (size_t k = 9; k < 16; k++) {
40703       for (uint32_t n = 1; n <= 4; n++) {
40704         for (uint32_t m = 1; m <= 2; m++) {
40705           GemmMicrokernelTester()
40706             .mr(2)
40707             .nr(4)
40708             .kr(8)
40709             .sr(1)
40710             .m(m)
40711             .n(n)
40712             .k(k)
40713             .iterations(1)
40714             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40715         }
40716       }
40717     }
40718   }
40719 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)40720   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
40721     for (size_t k = 16; k <= 80; k += 8) {
40722       GemmMicrokernelTester()
40723         .mr(2)
40724         .nr(4)
40725         .kr(8)
40726         .sr(1)
40727         .m(2)
40728         .n(4)
40729         .k(k)
40730         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40731     }
40732   }
40733 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)40734   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
40735     for (size_t k = 16; k <= 80; k += 8) {
40736       for (uint32_t n = 1; n <= 4; n++) {
40737         for (uint32_t m = 1; m <= 2; m++) {
40738           GemmMicrokernelTester()
40739             .mr(2)
40740             .nr(4)
40741             .kr(8)
40742             .sr(1)
40743             .m(m)
40744             .n(n)
40745             .k(k)
40746             .iterations(1)
40747             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40748         }
40749       }
40750     }
40751   }
40752 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)40753   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
40754     for (uint32_t n = 5; n < 8; n++) {
40755       for (size_t k = 1; k <= 40; k += 9) {
40756         GemmMicrokernelTester()
40757           .mr(2)
40758           .nr(4)
40759           .kr(8)
40760           .sr(1)
40761           .m(2)
40762           .n(n)
40763           .k(k)
40764           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40765       }
40766     }
40767   }
40768 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)40769   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
40770     for (uint32_t n = 5; n < 8; n++) {
40771       for (size_t k = 1; k <= 40; k += 9) {
40772         GemmMicrokernelTester()
40773           .mr(2)
40774           .nr(4)
40775           .kr(8)
40776           .sr(1)
40777           .m(2)
40778           .n(n)
40779           .k(k)
40780           .cn_stride(7)
40781           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40782       }
40783     }
40784   }
40785 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)40786   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
40787     for (uint32_t n = 5; n < 8; n++) {
40788       for (size_t k = 1; k <= 40; k += 9) {
40789         for (uint32_t m = 1; m <= 2; m++) {
40790           GemmMicrokernelTester()
40791             .mr(2)
40792             .nr(4)
40793             .kr(8)
40794             .sr(1)
40795             .m(m)
40796             .n(n)
40797             .k(k)
40798             .iterations(1)
40799             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40800         }
40801       }
40802     }
40803   }
40804 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)40805   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
40806     for (uint32_t n = 8; n <= 12; n += 4) {
40807       for (size_t k = 1; k <= 40; k += 9) {
40808         GemmMicrokernelTester()
40809           .mr(2)
40810           .nr(4)
40811           .kr(8)
40812           .sr(1)
40813           .m(2)
40814           .n(n)
40815           .k(k)
40816           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40817       }
40818     }
40819   }
40820 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)40821   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
40822     for (uint32_t n = 8; n <= 12; n += 4) {
40823       for (size_t k = 1; k <= 40; k += 9) {
40824         GemmMicrokernelTester()
40825           .mr(2)
40826           .nr(4)
40827           .kr(8)
40828           .sr(1)
40829           .m(2)
40830           .n(n)
40831           .k(k)
40832           .cn_stride(7)
40833           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40834       }
40835     }
40836   }
40837 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)40838   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
40839     for (uint32_t n = 8; n <= 12; n += 4) {
40840       for (size_t k = 1; k <= 40; k += 9) {
40841         for (uint32_t m = 1; m <= 2; m++) {
40842           GemmMicrokernelTester()
40843             .mr(2)
40844             .nr(4)
40845             .kr(8)
40846             .sr(1)
40847             .m(m)
40848             .n(n)
40849             .k(k)
40850             .iterations(1)
40851             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40852         }
40853       }
40854     }
40855   }
40856 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)40857   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
40858     for (size_t k = 1; k <= 40; k += 9) {
40859       GemmMicrokernelTester()
40860         .mr(2)
40861         .nr(4)
40862         .kr(8)
40863         .sr(1)
40864         .m(2)
40865         .n(4)
40866         .k(k)
40867         .ks(3)
40868         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40869     }
40870   }
40871 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)40872   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
40873     for (size_t k = 1; k <= 40; k += 9) {
40874       for (uint32_t n = 1; n <= 4; n++) {
40875         for (uint32_t m = 1; m <= 2; m++) {
40876           GemmMicrokernelTester()
40877             .mr(2)
40878             .nr(4)
40879             .kr(8)
40880             .sr(1)
40881             .m(m)
40882             .n(n)
40883             .k(k)
40884             .ks(3)
40885             .iterations(1)
40886             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40887         }
40888       }
40889     }
40890   }
40891 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)40892   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
40893     for (uint32_t n = 5; n < 8; n++) {
40894       for (size_t k = 1; k <= 40; k += 9) {
40895         GemmMicrokernelTester()
40896           .mr(2)
40897           .nr(4)
40898           .kr(8)
40899           .sr(1)
40900           .m(2)
40901           .n(n)
40902           .k(k)
40903           .ks(3)
40904           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40905       }
40906     }
40907   }
40908 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)40909   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
40910     for (uint32_t n = 8; n <= 12; n += 4) {
40911       for (size_t k = 1; k <= 40; k += 9) {
40912         GemmMicrokernelTester()
40913           .mr(2)
40914           .nr(4)
40915           .kr(8)
40916           .sr(1)
40917           .m(2)
40918           .n(n)
40919           .k(k)
40920           .ks(3)
40921           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40922       }
40923     }
40924   }
40925 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)40926   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
40927     for (size_t k = 1; k <= 40; k += 9) {
40928       for (uint32_t n = 1; n <= 4; n++) {
40929         for (uint32_t m = 1; m <= 2; m++) {
40930           GemmMicrokernelTester()
40931             .mr(2)
40932             .nr(4)
40933             .kr(8)
40934             .sr(1)
40935             .m(m)
40936             .n(n)
40937             .k(k)
40938             .cm_stride(7)
40939             .iterations(1)
40940             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40941         }
40942       }
40943     }
40944   }
40945 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,a_offset)40946   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
40947     for (size_t k = 1; k <= 40; k += 9) {
40948       GemmMicrokernelTester()
40949         .mr(2)
40950         .nr(4)
40951         .kr(8)
40952         .sr(1)
40953         .m(2)
40954         .n(4)
40955         .k(k)
40956         .ks(3)
40957         .a_offset(83)
40958         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40959     }
40960   }
40961 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,zero)40962   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
40963     for (size_t k = 1; k <= 40; k += 9) {
40964       for (uint32_t mz = 0; mz < 2; mz++) {
40965         GemmMicrokernelTester()
40966           .mr(2)
40967           .nr(4)
40968           .kr(8)
40969           .sr(1)
40970           .m(2)
40971           .n(4)
40972           .k(k)
40973           .ks(3)
40974           .a_offset(83)
40975           .zero_index(mz)
40976           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40977       }
40978     }
40979   }
40980 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)40981   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
40982     GemmMicrokernelTester()
40983       .mr(2)
40984       .nr(4)
40985       .kr(8)
40986       .sr(1)
40987       .m(2)
40988       .n(4)
40989       .k(8)
40990       .qmin(128)
40991       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40992   }
40993 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)40994   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
40995     GemmMicrokernelTester()
40996       .mr(2)
40997       .nr(4)
40998       .kr(8)
40999       .sr(1)
41000       .m(2)
41001       .n(4)
41002       .k(8)
41003       .qmax(128)
41004       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41005   }
41006 
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)41007   TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
41008     GemmMicrokernelTester()
41009       .mr(2)
41010       .nr(4)
41011       .kr(8)
41012       .sr(1)
41013       .m(2)
41014       .n(4)
41015       .k(8)
41016       .cm_stride(7)
41017       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41018   }
41019 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41020 
41021 
41022 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)41023   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41024     GemmMicrokernelTester()
41025       .mr(3)
41026       .nr(4)
41027       .kr(2)
41028       .sr(1)
41029       .m(3)
41030       .n(4)
41031       .k(8)
41032       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41033   }
41034 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)41035   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
41036     GemmMicrokernelTester()
41037       .mr(3)
41038       .nr(4)
41039       .kr(2)
41040       .sr(1)
41041       .m(3)
41042       .n(4)
41043       .k(8)
41044       .cn_stride(7)
41045       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41046   }
41047 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)41048   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
41049     for (uint32_t n = 1; n <= 4; n++) {
41050       for (uint32_t m = 1; m <= 3; m++) {
41051         GemmMicrokernelTester()
41052           .mr(3)
41053           .nr(4)
41054           .kr(2)
41055           .sr(1)
41056           .m(m)
41057           .n(n)
41058           .k(8)
41059           .iterations(1)
41060           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41061       }
41062     }
41063   }
41064 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)41065   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41066     for (uint32_t m = 1; m <= 3; m++) {
41067       GemmMicrokernelTester()
41068         .mr(3)
41069         .nr(4)
41070         .kr(2)
41071         .sr(1)
41072         .m(m)
41073         .n(4)
41074         .k(8)
41075         .iterations(1)
41076         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41077     }
41078   }
41079 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)41080   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41081     for (uint32_t n = 1; n <= 4; n++) {
41082       GemmMicrokernelTester()
41083         .mr(3)
41084         .nr(4)
41085         .kr(2)
41086         .sr(1)
41087         .m(3)
41088         .n(n)
41089         .k(8)
41090         .iterations(1)
41091         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41092     }
41093   }
41094 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)41095   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41096     for (size_t k = 1; k < 8; k++) {
41097       GemmMicrokernelTester()
41098         .mr(3)
41099         .nr(4)
41100         .kr(2)
41101         .sr(1)
41102         .m(3)
41103         .n(4)
41104         .k(k)
41105         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41106     }
41107   }
41108 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)41109   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41110     for (size_t k = 1; k < 8; k++) {
41111       for (uint32_t n = 1; n <= 4; n++) {
41112         for (uint32_t m = 1; m <= 3; m++) {
41113           GemmMicrokernelTester()
41114             .mr(3)
41115             .nr(4)
41116             .kr(2)
41117             .sr(1)
41118             .m(m)
41119             .n(n)
41120             .k(k)
41121             .iterations(1)
41122             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41123         }
41124       }
41125     }
41126   }
41127 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)41128   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41129     for (size_t k = 9; k < 16; k++) {
41130       GemmMicrokernelTester()
41131         .mr(3)
41132         .nr(4)
41133         .kr(2)
41134         .sr(1)
41135         .m(3)
41136         .n(4)
41137         .k(k)
41138         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41139     }
41140   }
41141 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)41142   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41143     for (size_t k = 9; k < 16; k++) {
41144       for (uint32_t n = 1; n <= 4; n++) {
41145         for (uint32_t m = 1; m <= 3; m++) {
41146           GemmMicrokernelTester()
41147             .mr(3)
41148             .nr(4)
41149             .kr(2)
41150             .sr(1)
41151             .m(m)
41152             .n(n)
41153             .k(k)
41154             .iterations(1)
41155             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41156         }
41157       }
41158     }
41159   }
41160 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)41161   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
41162     for (size_t k = 16; k <= 80; k += 8) {
41163       GemmMicrokernelTester()
41164         .mr(3)
41165         .nr(4)
41166         .kr(2)
41167         .sr(1)
41168         .m(3)
41169         .n(4)
41170         .k(k)
41171         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41172     }
41173   }
41174 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)41175   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41176     for (size_t k = 16; k <= 80; k += 8) {
41177       for (uint32_t n = 1; n <= 4; n++) {
41178         for (uint32_t m = 1; m <= 3; m++) {
41179           GemmMicrokernelTester()
41180             .mr(3)
41181             .nr(4)
41182             .kr(2)
41183             .sr(1)
41184             .m(m)
41185             .n(n)
41186             .k(k)
41187             .iterations(1)
41188             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41189         }
41190       }
41191     }
41192   }
41193 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)41194   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41195     for (uint32_t n = 5; n < 8; n++) {
41196       for (size_t k = 1; k <= 40; k += 9) {
41197         GemmMicrokernelTester()
41198           .mr(3)
41199           .nr(4)
41200           .kr(2)
41201           .sr(1)
41202           .m(3)
41203           .n(n)
41204           .k(k)
41205           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41206       }
41207     }
41208   }
41209 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)41210   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41211     for (uint32_t n = 5; n < 8; n++) {
41212       for (size_t k = 1; k <= 40; k += 9) {
41213         GemmMicrokernelTester()
41214           .mr(3)
41215           .nr(4)
41216           .kr(2)
41217           .sr(1)
41218           .m(3)
41219           .n(n)
41220           .k(k)
41221           .cn_stride(7)
41222           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41223       }
41224     }
41225   }
41226 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)41227   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41228     for (uint32_t n = 5; n < 8; n++) {
41229       for (size_t k = 1; k <= 40; k += 9) {
41230         for (uint32_t m = 1; m <= 3; m++) {
41231           GemmMicrokernelTester()
41232             .mr(3)
41233             .nr(4)
41234             .kr(2)
41235             .sr(1)
41236             .m(m)
41237             .n(n)
41238             .k(k)
41239             .iterations(1)
41240             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41241         }
41242       }
41243     }
41244   }
41245 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)41246   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
41247     for (uint32_t n = 8; n <= 12; n += 4) {
41248       for (size_t k = 1; k <= 40; k += 9) {
41249         GemmMicrokernelTester()
41250           .mr(3)
41251           .nr(4)
41252           .kr(2)
41253           .sr(1)
41254           .m(3)
41255           .n(n)
41256           .k(k)
41257           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41258       }
41259     }
41260   }
41261 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)41262   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
41263     for (uint32_t n = 8; n <= 12; n += 4) {
41264       for (size_t k = 1; k <= 40; k += 9) {
41265         GemmMicrokernelTester()
41266           .mr(3)
41267           .nr(4)
41268           .kr(2)
41269           .sr(1)
41270           .m(3)
41271           .n(n)
41272           .k(k)
41273           .cn_stride(7)
41274           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41275       }
41276     }
41277   }
41278 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)41279   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
41280     for (uint32_t n = 8; n <= 12; n += 4) {
41281       for (size_t k = 1; k <= 40; k += 9) {
41282         for (uint32_t m = 1; m <= 3; m++) {
41283           GemmMicrokernelTester()
41284             .mr(3)
41285             .nr(4)
41286             .kr(2)
41287             .sr(1)
41288             .m(m)
41289             .n(n)
41290             .k(k)
41291             .iterations(1)
41292             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41293         }
41294       }
41295     }
41296   }
41297 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)41298   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
41299     for (size_t k = 1; k <= 40; k += 9) {
41300       GemmMicrokernelTester()
41301         .mr(3)
41302         .nr(4)
41303         .kr(2)
41304         .sr(1)
41305         .m(3)
41306         .n(4)
41307         .k(k)
41308         .ks(3)
41309         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41310     }
41311   }
41312 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)41313   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
41314     for (size_t k = 1; k <= 40; k += 9) {
41315       for (uint32_t n = 1; n <= 4; n++) {
41316         for (uint32_t m = 1; m <= 3; m++) {
41317           GemmMicrokernelTester()
41318             .mr(3)
41319             .nr(4)
41320             .kr(2)
41321             .sr(1)
41322             .m(m)
41323             .n(n)
41324             .k(k)
41325             .ks(3)
41326             .iterations(1)
41327             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41328         }
41329       }
41330     }
41331   }
41332 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)41333   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
41334     for (uint32_t n = 5; n < 8; n++) {
41335       for (size_t k = 1; k <= 40; k += 9) {
41336         GemmMicrokernelTester()
41337           .mr(3)
41338           .nr(4)
41339           .kr(2)
41340           .sr(1)
41341           .m(3)
41342           .n(n)
41343           .k(k)
41344           .ks(3)
41345           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41346       }
41347     }
41348   }
41349 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)41350   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
41351     for (uint32_t n = 8; n <= 12; n += 4) {
41352       for (size_t k = 1; k <= 40; k += 9) {
41353         GemmMicrokernelTester()
41354           .mr(3)
41355           .nr(4)
41356           .kr(2)
41357           .sr(1)
41358           .m(3)
41359           .n(n)
41360           .k(k)
41361           .ks(3)
41362           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41363       }
41364     }
41365   }
41366 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)41367   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41368     for (size_t k = 1; k <= 40; k += 9) {
41369       for (uint32_t n = 1; n <= 4; n++) {
41370         for (uint32_t m = 1; m <= 3; m++) {
41371           GemmMicrokernelTester()
41372             .mr(3)
41373             .nr(4)
41374             .kr(2)
41375             .sr(1)
41376             .m(m)
41377             .n(n)
41378             .k(k)
41379             .cm_stride(7)
41380             .iterations(1)
41381             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41382         }
41383       }
41384     }
41385   }
41386 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,a_offset)41387   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
41388     for (size_t k = 1; k <= 40; k += 9) {
41389       GemmMicrokernelTester()
41390         .mr(3)
41391         .nr(4)
41392         .kr(2)
41393         .sr(1)
41394         .m(3)
41395         .n(4)
41396         .k(k)
41397         .ks(3)
41398         .a_offset(127)
41399         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41400     }
41401   }
41402 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,zero)41403   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
41404     for (size_t k = 1; k <= 40; k += 9) {
41405       for (uint32_t mz = 0; mz < 3; mz++) {
41406         GemmMicrokernelTester()
41407           .mr(3)
41408           .nr(4)
41409           .kr(2)
41410           .sr(1)
41411           .m(3)
41412           .n(4)
41413           .k(k)
41414           .ks(3)
41415           .a_offset(127)
41416           .zero_index(mz)
41417           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41418       }
41419     }
41420   }
41421 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)41422   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
41423     GemmMicrokernelTester()
41424       .mr(3)
41425       .nr(4)
41426       .kr(2)
41427       .sr(1)
41428       .m(3)
41429       .n(4)
41430       .k(8)
41431       .qmin(128)
41432       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41433   }
41434 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)41435   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
41436     GemmMicrokernelTester()
41437       .mr(3)
41438       .nr(4)
41439       .kr(2)
41440       .sr(1)
41441       .m(3)
41442       .n(4)
41443       .k(8)
41444       .qmax(128)
41445       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41446   }
41447 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)41448   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
41449     GemmMicrokernelTester()
41450       .mr(3)
41451       .nr(4)
41452       .kr(2)
41453       .sr(1)
41454       .m(3)
41455       .n(4)
41456       .k(8)
41457       .cm_stride(7)
41458       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41459   }
41460 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41461 
41462 
41463 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)41464   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41465     GemmMicrokernelTester()
41466       .mr(3)
41467       .nr(4)
41468       .kr(2)
41469       .sr(1)
41470       .m(3)
41471       .n(4)
41472       .k(8)
41473       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41474   }
41475 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)41476   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
41477     GemmMicrokernelTester()
41478       .mr(3)
41479       .nr(4)
41480       .kr(2)
41481       .sr(1)
41482       .m(3)
41483       .n(4)
41484       .k(8)
41485       .cn_stride(7)
41486       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41487   }
41488 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41489   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41490     for (uint32_t n = 1; n <= 4; n++) {
41491       for (uint32_t m = 1; m <= 3; m++) {
41492         GemmMicrokernelTester()
41493           .mr(3)
41494           .nr(4)
41495           .kr(2)
41496           .sr(1)
41497           .m(m)
41498           .n(n)
41499           .k(8)
41500           .iterations(1)
41501           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41502       }
41503     }
41504   }
41505 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41506   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41507     for (uint32_t m = 1; m <= 3; m++) {
41508       GemmMicrokernelTester()
41509         .mr(3)
41510         .nr(4)
41511         .kr(2)
41512         .sr(1)
41513         .m(m)
41514         .n(4)
41515         .k(8)
41516         .iterations(1)
41517         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41518     }
41519   }
41520 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41521   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41522     for (uint32_t n = 1; n <= 4; n++) {
41523       GemmMicrokernelTester()
41524         .mr(3)
41525         .nr(4)
41526         .kr(2)
41527         .sr(1)
41528         .m(3)
41529         .n(n)
41530         .k(8)
41531         .iterations(1)
41532         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41533     }
41534   }
41535 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)41536   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41537     for (size_t k = 1; k < 8; k++) {
41538       GemmMicrokernelTester()
41539         .mr(3)
41540         .nr(4)
41541         .kr(2)
41542         .sr(1)
41543         .m(3)
41544         .n(4)
41545         .k(k)
41546         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41547     }
41548   }
41549 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41550   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41551     for (size_t k = 1; k < 8; k++) {
41552       for (uint32_t n = 1; n <= 4; n++) {
41553         for (uint32_t m = 1; m <= 3; m++) {
41554           GemmMicrokernelTester()
41555             .mr(3)
41556             .nr(4)
41557             .kr(2)
41558             .sr(1)
41559             .m(m)
41560             .n(n)
41561             .k(k)
41562             .iterations(1)
41563             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41564         }
41565       }
41566     }
41567   }
41568 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)41569   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
41570     for (size_t k = 9; k < 16; k++) {
41571       GemmMicrokernelTester()
41572         .mr(3)
41573         .nr(4)
41574         .kr(2)
41575         .sr(1)
41576         .m(3)
41577         .n(4)
41578         .k(k)
41579         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41580     }
41581   }
41582 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)41583   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
41584     for (size_t k = 9; k < 16; k++) {
41585       for (uint32_t n = 1; n <= 4; n++) {
41586         for (uint32_t m = 1; m <= 3; m++) {
41587           GemmMicrokernelTester()
41588             .mr(3)
41589             .nr(4)
41590             .kr(2)
41591             .sr(1)
41592             .m(m)
41593             .n(n)
41594             .k(k)
41595             .iterations(1)
41596             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41597         }
41598       }
41599     }
41600   }
41601 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)41602   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
41603     for (size_t k = 16; k <= 80; k += 8) {
41604       GemmMicrokernelTester()
41605         .mr(3)
41606         .nr(4)
41607         .kr(2)
41608         .sr(1)
41609         .m(3)
41610         .n(4)
41611         .k(k)
41612         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41613     }
41614   }
41615 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)41616   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
41617     for (size_t k = 16; k <= 80; k += 8) {
41618       for (uint32_t n = 1; n <= 4; n++) {
41619         for (uint32_t m = 1; m <= 3; m++) {
41620           GemmMicrokernelTester()
41621             .mr(3)
41622             .nr(4)
41623             .kr(2)
41624             .sr(1)
41625             .m(m)
41626             .n(n)
41627             .k(k)
41628             .iterations(1)
41629             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41630         }
41631       }
41632     }
41633   }
41634 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)41635   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
41636     for (uint32_t n = 5; n < 8; n++) {
41637       for (size_t k = 1; k <= 40; k += 9) {
41638         GemmMicrokernelTester()
41639           .mr(3)
41640           .nr(4)
41641           .kr(2)
41642           .sr(1)
41643           .m(3)
41644           .n(n)
41645           .k(k)
41646           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41647       }
41648     }
41649   }
41650 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)41651   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
41652     for (uint32_t n = 5; n < 8; n++) {
41653       for (size_t k = 1; k <= 40; k += 9) {
41654         GemmMicrokernelTester()
41655           .mr(3)
41656           .nr(4)
41657           .kr(2)
41658           .sr(1)
41659           .m(3)
41660           .n(n)
41661           .k(k)
41662           .cn_stride(7)
41663           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41664       }
41665     }
41666   }
41667 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)41668   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
41669     for (uint32_t n = 5; n < 8; n++) {
41670       for (size_t k = 1; k <= 40; k += 9) {
41671         for (uint32_t m = 1; m <= 3; m++) {
41672           GemmMicrokernelTester()
41673             .mr(3)
41674             .nr(4)
41675             .kr(2)
41676             .sr(1)
41677             .m(m)
41678             .n(n)
41679             .k(k)
41680             .iterations(1)
41681             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41682         }
41683       }
41684     }
41685   }
41686 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)41687   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
41688     for (uint32_t n = 8; n <= 12; n += 4) {
41689       for (size_t k = 1; k <= 40; k += 9) {
41690         GemmMicrokernelTester()
41691           .mr(3)
41692           .nr(4)
41693           .kr(2)
41694           .sr(1)
41695           .m(3)
41696           .n(n)
41697           .k(k)
41698           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41699       }
41700     }
41701   }
41702 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)41703   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
41704     for (uint32_t n = 8; n <= 12; n += 4) {
41705       for (size_t k = 1; k <= 40; k += 9) {
41706         GemmMicrokernelTester()
41707           .mr(3)
41708           .nr(4)
41709           .kr(2)
41710           .sr(1)
41711           .m(3)
41712           .n(n)
41713           .k(k)
41714           .cn_stride(7)
41715           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41716       }
41717     }
41718   }
41719 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)41720   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
41721     for (uint32_t n = 8; n <= 12; n += 4) {
41722       for (size_t k = 1; k <= 40; k += 9) {
41723         for (uint32_t m = 1; m <= 3; m++) {
41724           GemmMicrokernelTester()
41725             .mr(3)
41726             .nr(4)
41727             .kr(2)
41728             .sr(1)
41729             .m(m)
41730             .n(n)
41731             .k(k)
41732             .iterations(1)
41733             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41734         }
41735       }
41736     }
41737   }
41738 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)41739   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
41740     for (size_t k = 1; k <= 40; k += 9) {
41741       GemmMicrokernelTester()
41742         .mr(3)
41743         .nr(4)
41744         .kr(2)
41745         .sr(1)
41746         .m(3)
41747         .n(4)
41748         .k(k)
41749         .ks(3)
41750         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41751     }
41752   }
41753 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)41754   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
41755     for (size_t k = 1; k <= 40; k += 9) {
41756       for (uint32_t n = 1; n <= 4; n++) {
41757         for (uint32_t m = 1; m <= 3; m++) {
41758           GemmMicrokernelTester()
41759             .mr(3)
41760             .nr(4)
41761             .kr(2)
41762             .sr(1)
41763             .m(m)
41764             .n(n)
41765             .k(k)
41766             .ks(3)
41767             .iterations(1)
41768             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41769         }
41770       }
41771     }
41772   }
41773 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)41774   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
41775     for (uint32_t n = 5; n < 8; n++) {
41776       for (size_t k = 1; k <= 40; k += 9) {
41777         GemmMicrokernelTester()
41778           .mr(3)
41779           .nr(4)
41780           .kr(2)
41781           .sr(1)
41782           .m(3)
41783           .n(n)
41784           .k(k)
41785           .ks(3)
41786           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41787       }
41788     }
41789   }
41790 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)41791   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
41792     for (uint32_t n = 8; n <= 12; n += 4) {
41793       for (size_t k = 1; k <= 40; k += 9) {
41794         GemmMicrokernelTester()
41795           .mr(3)
41796           .nr(4)
41797           .kr(2)
41798           .sr(1)
41799           .m(3)
41800           .n(n)
41801           .k(k)
41802           .ks(3)
41803           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41804       }
41805     }
41806   }
41807 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)41808   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
41809     for (size_t k = 1; k <= 40; k += 9) {
41810       for (uint32_t n = 1; n <= 4; n++) {
41811         for (uint32_t m = 1; m <= 3; m++) {
41812           GemmMicrokernelTester()
41813             .mr(3)
41814             .nr(4)
41815             .kr(2)
41816             .sr(1)
41817             .m(m)
41818             .n(n)
41819             .k(k)
41820             .cm_stride(7)
41821             .iterations(1)
41822             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41823         }
41824       }
41825     }
41826   }
41827 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,a_offset)41828   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
41829     for (size_t k = 1; k <= 40; k += 9) {
41830       GemmMicrokernelTester()
41831         .mr(3)
41832         .nr(4)
41833         .kr(2)
41834         .sr(1)
41835         .m(3)
41836         .n(4)
41837         .k(k)
41838         .ks(3)
41839         .a_offset(127)
41840         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41841     }
41842   }
41843 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,zero)41844   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
41845     for (size_t k = 1; k <= 40; k += 9) {
41846       for (uint32_t mz = 0; mz < 3; mz++) {
41847         GemmMicrokernelTester()
41848           .mr(3)
41849           .nr(4)
41850           .kr(2)
41851           .sr(1)
41852           .m(3)
41853           .n(4)
41854           .k(k)
41855           .ks(3)
41856           .a_offset(127)
41857           .zero_index(mz)
41858           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41859       }
41860     }
41861   }
41862 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)41863   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
41864     GemmMicrokernelTester()
41865       .mr(3)
41866       .nr(4)
41867       .kr(2)
41868       .sr(1)
41869       .m(3)
41870       .n(4)
41871       .k(8)
41872       .qmin(128)
41873       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41874   }
41875 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)41876   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
41877     GemmMicrokernelTester()
41878       .mr(3)
41879       .nr(4)
41880       .kr(2)
41881       .sr(1)
41882       .m(3)
41883       .n(4)
41884       .k(8)
41885       .qmax(128)
41886       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41887   }
41888 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)41889   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
41890     GemmMicrokernelTester()
41891       .mr(3)
41892       .nr(4)
41893       .kr(2)
41894       .sr(1)
41895       .m(3)
41896       .n(4)
41897       .k(8)
41898       .cm_stride(7)
41899       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41900   }
41901 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41902 
41903 
41904 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)41905   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41906     GemmMicrokernelTester()
41907       .mr(3)
41908       .nr(4)
41909       .kr(2)
41910       .sr(4)
41911       .m(3)
41912       .n(4)
41913       .k(8)
41914       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41915   }
41916 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)41917   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
41918     GemmMicrokernelTester()
41919       .mr(3)
41920       .nr(4)
41921       .kr(2)
41922       .sr(4)
41923       .m(3)
41924       .n(4)
41925       .k(8)
41926       .cn_stride(7)
41927       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41928   }
41929 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41930   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41931     for (uint32_t n = 1; n <= 4; n++) {
41932       for (uint32_t m = 1; m <= 3; m++) {
41933         GemmMicrokernelTester()
41934           .mr(3)
41935           .nr(4)
41936           .kr(2)
41937           .sr(4)
41938           .m(m)
41939           .n(n)
41940           .k(8)
41941           .iterations(1)
41942           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41943       }
41944     }
41945   }
41946 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41947   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41948     for (uint32_t m = 1; m <= 3; m++) {
41949       GemmMicrokernelTester()
41950         .mr(3)
41951         .nr(4)
41952         .kr(2)
41953         .sr(4)
41954         .m(m)
41955         .n(4)
41956         .k(8)
41957         .iterations(1)
41958         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41959     }
41960   }
41961 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41962   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41963     for (uint32_t n = 1; n <= 4; n++) {
41964       GemmMicrokernelTester()
41965         .mr(3)
41966         .nr(4)
41967         .kr(2)
41968         .sr(4)
41969         .m(3)
41970         .n(n)
41971         .k(8)
41972         .iterations(1)
41973         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41974     }
41975   }
41976 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)41977   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41978     for (size_t k = 1; k < 8; k++) {
41979       GemmMicrokernelTester()
41980         .mr(3)
41981         .nr(4)
41982         .kr(2)
41983         .sr(4)
41984         .m(3)
41985         .n(4)
41986         .k(k)
41987         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41988     }
41989   }
41990 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41991   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41992     for (size_t k = 1; k < 8; k++) {
41993       for (uint32_t n = 1; n <= 4; n++) {
41994         for (uint32_t m = 1; m <= 3; m++) {
41995           GemmMicrokernelTester()
41996             .mr(3)
41997             .nr(4)
41998             .kr(2)
41999             .sr(4)
42000             .m(m)
42001             .n(n)
42002             .k(k)
42003             .iterations(1)
42004             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42005         }
42006       }
42007     }
42008   }
42009 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)42010   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42011     for (size_t k = 9; k < 16; k++) {
42012       GemmMicrokernelTester()
42013         .mr(3)
42014         .nr(4)
42015         .kr(2)
42016         .sr(4)
42017         .m(3)
42018         .n(4)
42019         .k(k)
42020         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42021     }
42022   }
42023 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)42024   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42025     for (size_t k = 9; k < 16; k++) {
42026       for (uint32_t n = 1; n <= 4; n++) {
42027         for (uint32_t m = 1; m <= 3; m++) {
42028           GemmMicrokernelTester()
42029             .mr(3)
42030             .nr(4)
42031             .kr(2)
42032             .sr(4)
42033             .m(m)
42034             .n(n)
42035             .k(k)
42036             .iterations(1)
42037             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42038         }
42039       }
42040     }
42041   }
42042 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)42043   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
42044     for (size_t k = 16; k <= 80; k += 8) {
42045       GemmMicrokernelTester()
42046         .mr(3)
42047         .nr(4)
42048         .kr(2)
42049         .sr(4)
42050         .m(3)
42051         .n(4)
42052         .k(k)
42053         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42054     }
42055   }
42056 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)42057   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42058     for (size_t k = 16; k <= 80; k += 8) {
42059       for (uint32_t n = 1; n <= 4; n++) {
42060         for (uint32_t m = 1; m <= 3; m++) {
42061           GemmMicrokernelTester()
42062             .mr(3)
42063             .nr(4)
42064             .kr(2)
42065             .sr(4)
42066             .m(m)
42067             .n(n)
42068             .k(k)
42069             .iterations(1)
42070             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42071         }
42072       }
42073     }
42074   }
42075 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)42076   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42077     for (uint32_t n = 5; n < 8; n++) {
42078       for (size_t k = 1; k <= 40; k += 9) {
42079         GemmMicrokernelTester()
42080           .mr(3)
42081           .nr(4)
42082           .kr(2)
42083           .sr(4)
42084           .m(3)
42085           .n(n)
42086           .k(k)
42087           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42088       }
42089     }
42090   }
42091 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)42092   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42093     for (uint32_t n = 5; n < 8; n++) {
42094       for (size_t k = 1; k <= 40; k += 9) {
42095         GemmMicrokernelTester()
42096           .mr(3)
42097           .nr(4)
42098           .kr(2)
42099           .sr(4)
42100           .m(3)
42101           .n(n)
42102           .k(k)
42103           .cn_stride(7)
42104           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42105       }
42106     }
42107   }
42108 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)42109   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42110     for (uint32_t n = 5; n < 8; n++) {
42111       for (size_t k = 1; k <= 40; k += 9) {
42112         for (uint32_t m = 1; m <= 3; m++) {
42113           GemmMicrokernelTester()
42114             .mr(3)
42115             .nr(4)
42116             .kr(2)
42117             .sr(4)
42118             .m(m)
42119             .n(n)
42120             .k(k)
42121             .iterations(1)
42122             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42123         }
42124       }
42125     }
42126   }
42127 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)42128   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
42129     for (uint32_t n = 8; n <= 12; n += 4) {
42130       for (size_t k = 1; k <= 40; k += 9) {
42131         GemmMicrokernelTester()
42132           .mr(3)
42133           .nr(4)
42134           .kr(2)
42135           .sr(4)
42136           .m(3)
42137           .n(n)
42138           .k(k)
42139           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42140       }
42141     }
42142   }
42143 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)42144   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42145     for (uint32_t n = 8; n <= 12; n += 4) {
42146       for (size_t k = 1; k <= 40; k += 9) {
42147         GemmMicrokernelTester()
42148           .mr(3)
42149           .nr(4)
42150           .kr(2)
42151           .sr(4)
42152           .m(3)
42153           .n(n)
42154           .k(k)
42155           .cn_stride(7)
42156           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42157       }
42158     }
42159   }
42160 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)42161   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42162     for (uint32_t n = 8; n <= 12; n += 4) {
42163       for (size_t k = 1; k <= 40; k += 9) {
42164         for (uint32_t m = 1; m <= 3; m++) {
42165           GemmMicrokernelTester()
42166             .mr(3)
42167             .nr(4)
42168             .kr(2)
42169             .sr(4)
42170             .m(m)
42171             .n(n)
42172             .k(k)
42173             .iterations(1)
42174             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42175         }
42176       }
42177     }
42178   }
42179 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)42180   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
42181     for (size_t k = 1; k <= 40; k += 9) {
42182       GemmMicrokernelTester()
42183         .mr(3)
42184         .nr(4)
42185         .kr(2)
42186         .sr(4)
42187         .m(3)
42188         .n(4)
42189         .k(k)
42190         .ks(3)
42191         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42192     }
42193   }
42194 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)42195   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
42196     for (size_t k = 1; k <= 40; k += 9) {
42197       for (uint32_t n = 1; n <= 4; n++) {
42198         for (uint32_t m = 1; m <= 3; m++) {
42199           GemmMicrokernelTester()
42200             .mr(3)
42201             .nr(4)
42202             .kr(2)
42203             .sr(4)
42204             .m(m)
42205             .n(n)
42206             .k(k)
42207             .ks(3)
42208             .iterations(1)
42209             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42210         }
42211       }
42212     }
42213   }
42214 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)42215   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
42216     for (uint32_t n = 5; n < 8; n++) {
42217       for (size_t k = 1; k <= 40; k += 9) {
42218         GemmMicrokernelTester()
42219           .mr(3)
42220           .nr(4)
42221           .kr(2)
42222           .sr(4)
42223           .m(3)
42224           .n(n)
42225           .k(k)
42226           .ks(3)
42227           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42228       }
42229     }
42230   }
42231 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)42232   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
42233     for (uint32_t n = 8; n <= 12; n += 4) {
42234       for (size_t k = 1; k <= 40; k += 9) {
42235         GemmMicrokernelTester()
42236           .mr(3)
42237           .nr(4)
42238           .kr(2)
42239           .sr(4)
42240           .m(3)
42241           .n(n)
42242           .k(k)
42243           .ks(3)
42244           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42245       }
42246     }
42247   }
42248 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)42249   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42250     for (size_t k = 1; k <= 40; k += 9) {
42251       for (uint32_t n = 1; n <= 4; n++) {
42252         for (uint32_t m = 1; m <= 3; m++) {
42253           GemmMicrokernelTester()
42254             .mr(3)
42255             .nr(4)
42256             .kr(2)
42257             .sr(4)
42258             .m(m)
42259             .n(n)
42260             .k(k)
42261             .cm_stride(7)
42262             .iterations(1)
42263             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42264         }
42265       }
42266     }
42267   }
42268 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)42269   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
42270     for (size_t k = 1; k <= 40; k += 9) {
42271       GemmMicrokernelTester()
42272         .mr(3)
42273         .nr(4)
42274         .kr(2)
42275         .sr(4)
42276         .m(3)
42277         .n(4)
42278         .k(k)
42279         .ks(3)
42280         .a_offset(127)
42281         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42282     }
42283   }
42284 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)42285   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
42286     for (size_t k = 1; k <= 40; k += 9) {
42287       for (uint32_t mz = 0; mz < 3; mz++) {
42288         GemmMicrokernelTester()
42289           .mr(3)
42290           .nr(4)
42291           .kr(2)
42292           .sr(4)
42293           .m(3)
42294           .n(4)
42295           .k(k)
42296           .ks(3)
42297           .a_offset(127)
42298           .zero_index(mz)
42299           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42300       }
42301     }
42302   }
42303 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)42304   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
42305     GemmMicrokernelTester()
42306       .mr(3)
42307       .nr(4)
42308       .kr(2)
42309       .sr(4)
42310       .m(3)
42311       .n(4)
42312       .k(8)
42313       .qmin(128)
42314       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42315   }
42316 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)42317   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
42318     GemmMicrokernelTester()
42319       .mr(3)
42320       .nr(4)
42321       .kr(2)
42322       .sr(4)
42323       .m(3)
42324       .n(4)
42325       .k(8)
42326       .qmax(128)
42327       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42328   }
42329 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)42330   TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
42331     GemmMicrokernelTester()
42332       .mr(3)
42333       .nr(4)
42334       .kr(2)
42335       .sr(4)
42336       .m(3)
42337       .n(4)
42338       .k(8)
42339       .cm_stride(7)
42340       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42341   }
42342 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42343 
42344 
42345 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)42346   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
42347     GemmMicrokernelTester()
42348       .mr(3)
42349       .nr(4)
42350       .kr(8)
42351       .sr(1)
42352       .m(3)
42353       .n(4)
42354       .k(8)
42355       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42356   }
42357 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)42358   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
42359     GemmMicrokernelTester()
42360       .mr(3)
42361       .nr(4)
42362       .kr(8)
42363       .sr(1)
42364       .m(3)
42365       .n(4)
42366       .k(8)
42367       .cn_stride(7)
42368       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42369   }
42370 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)42371   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
42372     for (uint32_t n = 1; n <= 4; n++) {
42373       for (uint32_t m = 1; m <= 3; m++) {
42374         GemmMicrokernelTester()
42375           .mr(3)
42376           .nr(4)
42377           .kr(8)
42378           .sr(1)
42379           .m(m)
42380           .n(n)
42381           .k(8)
42382           .iterations(1)
42383           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42384       }
42385     }
42386   }
42387 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)42388   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
42389     for (uint32_t m = 1; m <= 3; m++) {
42390       GemmMicrokernelTester()
42391         .mr(3)
42392         .nr(4)
42393         .kr(8)
42394         .sr(1)
42395         .m(m)
42396         .n(4)
42397         .k(8)
42398         .iterations(1)
42399         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42400     }
42401   }
42402 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)42403   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
42404     for (uint32_t n = 1; n <= 4; n++) {
42405       GemmMicrokernelTester()
42406         .mr(3)
42407         .nr(4)
42408         .kr(8)
42409         .sr(1)
42410         .m(3)
42411         .n(n)
42412         .k(8)
42413         .iterations(1)
42414         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42415     }
42416   }
42417 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)42418   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
42419     for (size_t k = 1; k < 8; k++) {
42420       GemmMicrokernelTester()
42421         .mr(3)
42422         .nr(4)
42423         .kr(8)
42424         .sr(1)
42425         .m(3)
42426         .n(4)
42427         .k(k)
42428         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42429     }
42430   }
42431 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)42432   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
42433     for (size_t k = 1; k < 8; k++) {
42434       for (uint32_t n = 1; n <= 4; n++) {
42435         for (uint32_t m = 1; m <= 3; m++) {
42436           GemmMicrokernelTester()
42437             .mr(3)
42438             .nr(4)
42439             .kr(8)
42440             .sr(1)
42441             .m(m)
42442             .n(n)
42443             .k(k)
42444             .iterations(1)
42445             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42446         }
42447       }
42448     }
42449   }
42450 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)42451   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42452     for (size_t k = 9; k < 16; k++) {
42453       GemmMicrokernelTester()
42454         .mr(3)
42455         .nr(4)
42456         .kr(8)
42457         .sr(1)
42458         .m(3)
42459         .n(4)
42460         .k(k)
42461         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42462     }
42463   }
42464 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)42465   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42466     for (size_t k = 9; k < 16; k++) {
42467       for (uint32_t n = 1; n <= 4; n++) {
42468         for (uint32_t m = 1; m <= 3; m++) {
42469           GemmMicrokernelTester()
42470             .mr(3)
42471             .nr(4)
42472             .kr(8)
42473             .sr(1)
42474             .m(m)
42475             .n(n)
42476             .k(k)
42477             .iterations(1)
42478             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42479         }
42480       }
42481     }
42482   }
42483 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)42484   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
42485     for (size_t k = 16; k <= 80; k += 8) {
42486       GemmMicrokernelTester()
42487         .mr(3)
42488         .nr(4)
42489         .kr(8)
42490         .sr(1)
42491         .m(3)
42492         .n(4)
42493         .k(k)
42494         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42495     }
42496   }
42497 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)42498   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42499     for (size_t k = 16; k <= 80; k += 8) {
42500       for (uint32_t n = 1; n <= 4; n++) {
42501         for (uint32_t m = 1; m <= 3; m++) {
42502           GemmMicrokernelTester()
42503             .mr(3)
42504             .nr(4)
42505             .kr(8)
42506             .sr(1)
42507             .m(m)
42508             .n(n)
42509             .k(k)
42510             .iterations(1)
42511             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42512         }
42513       }
42514     }
42515   }
42516 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)42517   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42518     for (uint32_t n = 5; n < 8; n++) {
42519       for (size_t k = 1; k <= 40; k += 9) {
42520         GemmMicrokernelTester()
42521           .mr(3)
42522           .nr(4)
42523           .kr(8)
42524           .sr(1)
42525           .m(3)
42526           .n(n)
42527           .k(k)
42528           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42529       }
42530     }
42531   }
42532 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)42533   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42534     for (uint32_t n = 5; n < 8; n++) {
42535       for (size_t k = 1; k <= 40; k += 9) {
42536         GemmMicrokernelTester()
42537           .mr(3)
42538           .nr(4)
42539           .kr(8)
42540           .sr(1)
42541           .m(3)
42542           .n(n)
42543           .k(k)
42544           .cn_stride(7)
42545           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42546       }
42547     }
42548   }
42549 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)42550   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42551     for (uint32_t n = 5; n < 8; n++) {
42552       for (size_t k = 1; k <= 40; k += 9) {
42553         for (uint32_t m = 1; m <= 3; m++) {
42554           GemmMicrokernelTester()
42555             .mr(3)
42556             .nr(4)
42557             .kr(8)
42558             .sr(1)
42559             .m(m)
42560             .n(n)
42561             .k(k)
42562             .iterations(1)
42563             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42564         }
42565       }
42566     }
42567   }
42568 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)42569   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
42570     for (uint32_t n = 8; n <= 12; n += 4) {
42571       for (size_t k = 1; k <= 40; k += 9) {
42572         GemmMicrokernelTester()
42573           .mr(3)
42574           .nr(4)
42575           .kr(8)
42576           .sr(1)
42577           .m(3)
42578           .n(n)
42579           .k(k)
42580           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42581       }
42582     }
42583   }
42584 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)42585   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42586     for (uint32_t n = 8; n <= 12; n += 4) {
42587       for (size_t k = 1; k <= 40; k += 9) {
42588         GemmMicrokernelTester()
42589           .mr(3)
42590           .nr(4)
42591           .kr(8)
42592           .sr(1)
42593           .m(3)
42594           .n(n)
42595           .k(k)
42596           .cn_stride(7)
42597           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42598       }
42599     }
42600   }
42601 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)42602   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42603     for (uint32_t n = 8; n <= 12; n += 4) {
42604       for (size_t k = 1; k <= 40; k += 9) {
42605         for (uint32_t m = 1; m <= 3; m++) {
42606           GemmMicrokernelTester()
42607             .mr(3)
42608             .nr(4)
42609             .kr(8)
42610             .sr(1)
42611             .m(m)
42612             .n(n)
42613             .k(k)
42614             .iterations(1)
42615             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42616         }
42617       }
42618     }
42619   }
42620 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)42621   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
42622     for (size_t k = 1; k <= 40; k += 9) {
42623       GemmMicrokernelTester()
42624         .mr(3)
42625         .nr(4)
42626         .kr(8)
42627         .sr(1)
42628         .m(3)
42629         .n(4)
42630         .k(k)
42631         .ks(3)
42632         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42633     }
42634   }
42635 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)42636   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
42637     for (size_t k = 1; k <= 40; k += 9) {
42638       for (uint32_t n = 1; n <= 4; n++) {
42639         for (uint32_t m = 1; m <= 3; m++) {
42640           GemmMicrokernelTester()
42641             .mr(3)
42642             .nr(4)
42643             .kr(8)
42644             .sr(1)
42645             .m(m)
42646             .n(n)
42647             .k(k)
42648             .ks(3)
42649             .iterations(1)
42650             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42651         }
42652       }
42653     }
42654   }
42655 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)42656   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
42657     for (uint32_t n = 5; n < 8; n++) {
42658       for (size_t k = 1; k <= 40; k += 9) {
42659         GemmMicrokernelTester()
42660           .mr(3)
42661           .nr(4)
42662           .kr(8)
42663           .sr(1)
42664           .m(3)
42665           .n(n)
42666           .k(k)
42667           .ks(3)
42668           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42669       }
42670     }
42671   }
42672 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)42673   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
42674     for (uint32_t n = 8; n <= 12; n += 4) {
42675       for (size_t k = 1; k <= 40; k += 9) {
42676         GemmMicrokernelTester()
42677           .mr(3)
42678           .nr(4)
42679           .kr(8)
42680           .sr(1)
42681           .m(3)
42682           .n(n)
42683           .k(k)
42684           .ks(3)
42685           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42686       }
42687     }
42688   }
42689 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)42690   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42691     for (size_t k = 1; k <= 40; k += 9) {
42692       for (uint32_t n = 1; n <= 4; n++) {
42693         for (uint32_t m = 1; m <= 3; m++) {
42694           GemmMicrokernelTester()
42695             .mr(3)
42696             .nr(4)
42697             .kr(8)
42698             .sr(1)
42699             .m(m)
42700             .n(n)
42701             .k(k)
42702             .cm_stride(7)
42703             .iterations(1)
42704             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42705         }
42706       }
42707     }
42708   }
42709 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,a_offset)42710   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
42711     for (size_t k = 1; k <= 40; k += 9) {
42712       GemmMicrokernelTester()
42713         .mr(3)
42714         .nr(4)
42715         .kr(8)
42716         .sr(1)
42717         .m(3)
42718         .n(4)
42719         .k(k)
42720         .ks(3)
42721         .a_offset(127)
42722         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42723     }
42724   }
42725 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,zero)42726   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
42727     for (size_t k = 1; k <= 40; k += 9) {
42728       for (uint32_t mz = 0; mz < 3; mz++) {
42729         GemmMicrokernelTester()
42730           .mr(3)
42731           .nr(4)
42732           .kr(8)
42733           .sr(1)
42734           .m(3)
42735           .n(4)
42736           .k(k)
42737           .ks(3)
42738           .a_offset(127)
42739           .zero_index(mz)
42740           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42741       }
42742     }
42743   }
42744 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)42745   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
42746     GemmMicrokernelTester()
42747       .mr(3)
42748       .nr(4)
42749       .kr(8)
42750       .sr(1)
42751       .m(3)
42752       .n(4)
42753       .k(8)
42754       .qmin(128)
42755       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42756   }
42757 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)42758   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
42759     GemmMicrokernelTester()
42760       .mr(3)
42761       .nr(4)
42762       .kr(8)
42763       .sr(1)
42764       .m(3)
42765       .n(4)
42766       .k(8)
42767       .qmax(128)
42768       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42769   }
42770 
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)42771   TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
42772     GemmMicrokernelTester()
42773       .mr(3)
42774       .nr(4)
42775       .kr(8)
42776       .sr(1)
42777       .m(3)
42778       .n(4)
42779       .k(8)
42780       .cm_stride(7)
42781       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42782   }
42783 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42784 
42785 
42786 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)42787   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42788     GemmMicrokernelTester()
42789       .mr(4)
42790       .nr(4)
42791       .kr(8)
42792       .sr(1)
42793       .m(4)
42794       .n(4)
42795       .k(8)
42796       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42797   }
42798 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)42799   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
42800     GemmMicrokernelTester()
42801       .mr(4)
42802       .nr(4)
42803       .kr(8)
42804       .sr(1)
42805       .m(4)
42806       .n(4)
42807       .k(8)
42808       .cn_stride(7)
42809       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42810   }
42811 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42812   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42813     for (uint32_t n = 1; n <= 4; n++) {
42814       for (uint32_t m = 1; m <= 4; m++) {
42815         GemmMicrokernelTester()
42816           .mr(4)
42817           .nr(4)
42818           .kr(8)
42819           .sr(1)
42820           .m(m)
42821           .n(n)
42822           .k(8)
42823           .iterations(1)
42824           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42825       }
42826     }
42827   }
42828 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42829   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42830     for (uint32_t m = 1; m <= 4; m++) {
42831       GemmMicrokernelTester()
42832         .mr(4)
42833         .nr(4)
42834         .kr(8)
42835         .sr(1)
42836         .m(m)
42837         .n(4)
42838         .k(8)
42839         .iterations(1)
42840         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42841     }
42842   }
42843 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42844   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42845     for (uint32_t n = 1; n <= 4; n++) {
42846       GemmMicrokernelTester()
42847         .mr(4)
42848         .nr(4)
42849         .kr(8)
42850         .sr(1)
42851         .m(4)
42852         .n(n)
42853         .k(8)
42854         .iterations(1)
42855         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42856     }
42857   }
42858 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)42859   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42860     for (size_t k = 1; k < 8; k++) {
42861       GemmMicrokernelTester()
42862         .mr(4)
42863         .nr(4)
42864         .kr(8)
42865         .sr(1)
42866         .m(4)
42867         .n(4)
42868         .k(k)
42869         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42870     }
42871   }
42872 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42873   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42874     for (size_t k = 1; k < 8; k++) {
42875       for (uint32_t n = 1; n <= 4; n++) {
42876         for (uint32_t m = 1; m <= 4; m++) {
42877           GemmMicrokernelTester()
42878             .mr(4)
42879             .nr(4)
42880             .kr(8)
42881             .sr(1)
42882             .m(m)
42883             .n(n)
42884             .k(k)
42885             .iterations(1)
42886             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42887         }
42888       }
42889     }
42890   }
42891 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)42892   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42893     for (size_t k = 9; k < 16; k++) {
42894       GemmMicrokernelTester()
42895         .mr(4)
42896         .nr(4)
42897         .kr(8)
42898         .sr(1)
42899         .m(4)
42900         .n(4)
42901         .k(k)
42902         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42903     }
42904   }
42905 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42906   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42907     for (size_t k = 9; k < 16; k++) {
42908       for (uint32_t n = 1; n <= 4; n++) {
42909         for (uint32_t m = 1; m <= 4; m++) {
42910           GemmMicrokernelTester()
42911             .mr(4)
42912             .nr(4)
42913             .kr(8)
42914             .sr(1)
42915             .m(m)
42916             .n(n)
42917             .k(k)
42918             .iterations(1)
42919             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42920         }
42921       }
42922     }
42923   }
42924 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)42925   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
42926     for (size_t k = 16; k <= 80; k += 8) {
42927       GemmMicrokernelTester()
42928         .mr(4)
42929         .nr(4)
42930         .kr(8)
42931         .sr(1)
42932         .m(4)
42933         .n(4)
42934         .k(k)
42935         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42936     }
42937   }
42938 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42939   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42940     for (size_t k = 16; k <= 80; k += 8) {
42941       for (uint32_t n = 1; n <= 4; n++) {
42942         for (uint32_t m = 1; m <= 4; m++) {
42943           GemmMicrokernelTester()
42944             .mr(4)
42945             .nr(4)
42946             .kr(8)
42947             .sr(1)
42948             .m(m)
42949             .n(n)
42950             .k(k)
42951             .iterations(1)
42952             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42953         }
42954       }
42955     }
42956   }
42957 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)42958   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42959     for (uint32_t n = 5; n < 8; n++) {
42960       for (size_t k = 1; k <= 40; k += 9) {
42961         GemmMicrokernelTester()
42962           .mr(4)
42963           .nr(4)
42964           .kr(8)
42965           .sr(1)
42966           .m(4)
42967           .n(n)
42968           .k(k)
42969           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42970       }
42971     }
42972   }
42973 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42974   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42975     for (uint32_t n = 5; n < 8; n++) {
42976       for (size_t k = 1; k <= 40; k += 9) {
42977         GemmMicrokernelTester()
42978           .mr(4)
42979           .nr(4)
42980           .kr(8)
42981           .sr(1)
42982           .m(4)
42983           .n(n)
42984           .k(k)
42985           .cn_stride(7)
42986           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42987       }
42988     }
42989   }
42990 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42991   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42992     for (uint32_t n = 5; n < 8; n++) {
42993       for (size_t k = 1; k <= 40; k += 9) {
42994         for (uint32_t m = 1; m <= 4; m++) {
42995           GemmMicrokernelTester()
42996             .mr(4)
42997             .nr(4)
42998             .kr(8)
42999             .sr(1)
43000             .m(m)
43001             .n(n)
43002             .k(k)
43003             .iterations(1)
43004             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43005         }
43006       }
43007     }
43008   }
43009 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)43010   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
43011     for (uint32_t n = 8; n <= 12; n += 4) {
43012       for (size_t k = 1; k <= 40; k += 9) {
43013         GemmMicrokernelTester()
43014           .mr(4)
43015           .nr(4)
43016           .kr(8)
43017           .sr(1)
43018           .m(4)
43019           .n(n)
43020           .k(k)
43021           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43022       }
43023     }
43024   }
43025 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)43026   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
43027     for (uint32_t n = 8; n <= 12; n += 4) {
43028       for (size_t k = 1; k <= 40; k += 9) {
43029         GemmMicrokernelTester()
43030           .mr(4)
43031           .nr(4)
43032           .kr(8)
43033           .sr(1)
43034           .m(4)
43035           .n(n)
43036           .k(k)
43037           .cn_stride(7)
43038           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43039       }
43040     }
43041   }
43042 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)43043   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
43044     for (uint32_t n = 8; n <= 12; n += 4) {
43045       for (size_t k = 1; k <= 40; k += 9) {
43046         for (uint32_t m = 1; m <= 4; m++) {
43047           GemmMicrokernelTester()
43048             .mr(4)
43049             .nr(4)
43050             .kr(8)
43051             .sr(1)
43052             .m(m)
43053             .n(n)
43054             .k(k)
43055             .iterations(1)
43056             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43057         }
43058       }
43059     }
43060   }
43061 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)43062   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
43063     for (size_t k = 1; k <= 40; k += 9) {
43064       GemmMicrokernelTester()
43065         .mr(4)
43066         .nr(4)
43067         .kr(8)
43068         .sr(1)
43069         .m(4)
43070         .n(4)
43071         .k(k)
43072         .ks(3)
43073         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43074     }
43075   }
43076 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)43077   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
43078     for (size_t k = 1; k <= 40; k += 9) {
43079       for (uint32_t n = 1; n <= 4; n++) {
43080         for (uint32_t m = 1; m <= 4; m++) {
43081           GemmMicrokernelTester()
43082             .mr(4)
43083             .nr(4)
43084             .kr(8)
43085             .sr(1)
43086             .m(m)
43087             .n(n)
43088             .k(k)
43089             .ks(3)
43090             .iterations(1)
43091             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43092         }
43093       }
43094     }
43095   }
43096 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)43097   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
43098     for (uint32_t n = 5; n < 8; n++) {
43099       for (size_t k = 1; k <= 40; k += 9) {
43100         GemmMicrokernelTester()
43101           .mr(4)
43102           .nr(4)
43103           .kr(8)
43104           .sr(1)
43105           .m(4)
43106           .n(n)
43107           .k(k)
43108           .ks(3)
43109           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43110       }
43111     }
43112   }
43113 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)43114   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
43115     for (uint32_t n = 8; n <= 12; n += 4) {
43116       for (size_t k = 1; k <= 40; k += 9) {
43117         GemmMicrokernelTester()
43118           .mr(4)
43119           .nr(4)
43120           .kr(8)
43121           .sr(1)
43122           .m(4)
43123           .n(n)
43124           .k(k)
43125           .ks(3)
43126           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43127       }
43128     }
43129   }
43130 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)43131   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
43132     for (size_t k = 1; k <= 40; k += 9) {
43133       for (uint32_t n = 1; n <= 4; n++) {
43134         for (uint32_t m = 1; m <= 4; m++) {
43135           GemmMicrokernelTester()
43136             .mr(4)
43137             .nr(4)
43138             .kr(8)
43139             .sr(1)
43140             .m(m)
43141             .n(n)
43142             .k(k)
43143             .cm_stride(7)
43144             .iterations(1)
43145             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43146         }
43147       }
43148     }
43149   }
43150 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,a_offset)43151   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
43152     for (size_t k = 1; k <= 40; k += 9) {
43153       GemmMicrokernelTester()
43154         .mr(4)
43155         .nr(4)
43156         .kr(8)
43157         .sr(1)
43158         .m(4)
43159         .n(4)
43160         .k(k)
43161         .ks(3)
43162         .a_offset(163)
43163         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43164     }
43165   }
43166 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,zero)43167   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
43168     for (size_t k = 1; k <= 40; k += 9) {
43169       for (uint32_t mz = 0; mz < 4; mz++) {
43170         GemmMicrokernelTester()
43171           .mr(4)
43172           .nr(4)
43173           .kr(8)
43174           .sr(1)
43175           .m(4)
43176           .n(4)
43177           .k(k)
43178           .ks(3)
43179           .a_offset(163)
43180           .zero_index(mz)
43181           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43182       }
43183     }
43184   }
43185 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)43186   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
43187     GemmMicrokernelTester()
43188       .mr(4)
43189       .nr(4)
43190       .kr(8)
43191       .sr(1)
43192       .m(4)
43193       .n(4)
43194       .k(8)
43195       .qmin(128)
43196       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43197   }
43198 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)43199   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
43200     GemmMicrokernelTester()
43201       .mr(4)
43202       .nr(4)
43203       .kr(8)
43204       .sr(1)
43205       .m(4)
43206       .n(4)
43207       .k(8)
43208       .qmax(128)
43209       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43210   }
43211 
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)43212   TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
43213     GemmMicrokernelTester()
43214       .mr(4)
43215       .nr(4)
43216       .kr(8)
43217       .sr(1)
43218       .m(4)
43219       .n(4)
43220       .k(8)
43221       .cm_stride(7)
43222       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43223   }
43224 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43225 
43226 
43227 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)43228   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
43229     GemmMicrokernelTester()
43230       .mr(1)
43231       .nr(2)
43232       .kr(1)
43233       .sr(1)
43234       .m(1)
43235       .n(2)
43236       .k(1)
43237       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43238   }
43239 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)43240   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
43241     GemmMicrokernelTester()
43242       .mr(1)
43243       .nr(2)
43244       .kr(1)
43245       .sr(1)
43246       .m(1)
43247       .n(2)
43248       .k(1)
43249       .cn_stride(5)
43250       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43251   }
43252 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)43253   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
43254     for (uint32_t n = 1; n <= 2; n++) {
43255       for (uint32_t m = 1; m <= 1; m++) {
43256         GemmMicrokernelTester()
43257           .mr(1)
43258           .nr(2)
43259           .kr(1)
43260           .sr(1)
43261           .m(m)
43262           .n(n)
43263           .k(1)
43264           .iterations(1)
43265           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43266       }
43267     }
43268   }
43269 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)43270   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
43271     for (uint32_t m = 1; m <= 1; m++) {
43272       GemmMicrokernelTester()
43273         .mr(1)
43274         .nr(2)
43275         .kr(1)
43276         .sr(1)
43277         .m(m)
43278         .n(2)
43279         .k(1)
43280         .iterations(1)
43281         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43282     }
43283   }
43284 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)43285   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
43286     for (uint32_t n = 1; n <= 2; n++) {
43287       GemmMicrokernelTester()
43288         .mr(1)
43289         .nr(2)
43290         .kr(1)
43291         .sr(1)
43292         .m(1)
43293         .n(n)
43294         .k(1)
43295         .iterations(1)
43296         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43297     }
43298   }
43299 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)43300   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
43301     for (size_t k = 2; k < 10; k++) {
43302       GemmMicrokernelTester()
43303         .mr(1)
43304         .nr(2)
43305         .kr(1)
43306         .sr(1)
43307         .m(1)
43308         .n(2)
43309         .k(k)
43310         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43311     }
43312   }
43313 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)43314   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
43315     for (size_t k = 2; k < 10; k++) {
43316       for (uint32_t n = 1; n <= 2; n++) {
43317         for (uint32_t m = 1; m <= 1; m++) {
43318           GemmMicrokernelTester()
43319             .mr(1)
43320             .nr(2)
43321             .kr(1)
43322             .sr(1)
43323             .m(m)
43324             .n(n)
43325             .k(k)
43326             .iterations(1)
43327             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43328         }
43329       }
43330     }
43331   }
43332 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)43333   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
43334     for (uint32_t n = 3; n < 4; n++) {
43335       for (size_t k = 1; k <= 5; k += 2) {
43336         GemmMicrokernelTester()
43337           .mr(1)
43338           .nr(2)
43339           .kr(1)
43340           .sr(1)
43341           .m(1)
43342           .n(n)
43343           .k(k)
43344           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43345       }
43346     }
43347   }
43348 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)43349   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
43350     for (uint32_t n = 3; n < 4; n++) {
43351       for (size_t k = 1; k <= 5; k += 2) {
43352         GemmMicrokernelTester()
43353           .mr(1)
43354           .nr(2)
43355           .kr(1)
43356           .sr(1)
43357           .m(1)
43358           .n(n)
43359           .k(k)
43360           .cn_stride(5)
43361           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43362       }
43363     }
43364   }
43365 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)43366   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
43367     for (uint32_t n = 3; n < 4; n++) {
43368       for (size_t k = 1; k <= 5; k += 2) {
43369         for (uint32_t m = 1; m <= 1; m++) {
43370           GemmMicrokernelTester()
43371             .mr(1)
43372             .nr(2)
43373             .kr(1)
43374             .sr(1)
43375             .m(m)
43376             .n(n)
43377             .k(k)
43378             .iterations(1)
43379             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43380         }
43381       }
43382     }
43383   }
43384 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)43385   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
43386     for (uint32_t n = 4; n <= 6; n += 2) {
43387       for (size_t k = 1; k <= 5; k += 2) {
43388         GemmMicrokernelTester()
43389           .mr(1)
43390           .nr(2)
43391           .kr(1)
43392           .sr(1)
43393           .m(1)
43394           .n(n)
43395           .k(k)
43396           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43397       }
43398     }
43399   }
43400 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)43401   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
43402     for (uint32_t n = 4; n <= 6; n += 2) {
43403       for (size_t k = 1; k <= 5; k += 2) {
43404         GemmMicrokernelTester()
43405           .mr(1)
43406           .nr(2)
43407           .kr(1)
43408           .sr(1)
43409           .m(1)
43410           .n(n)
43411           .k(k)
43412           .cn_stride(5)
43413           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43414       }
43415     }
43416   }
43417 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)43418   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
43419     for (uint32_t n = 4; n <= 6; n += 2) {
43420       for (size_t k = 1; k <= 5; k += 2) {
43421         for (uint32_t m = 1; m <= 1; m++) {
43422           GemmMicrokernelTester()
43423             .mr(1)
43424             .nr(2)
43425             .kr(1)
43426             .sr(1)
43427             .m(m)
43428             .n(n)
43429             .k(k)
43430             .iterations(1)
43431             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43432         }
43433       }
43434     }
43435   }
43436 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel)43437   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
43438     for (size_t k = 1; k <= 5; k += 2) {
43439       GemmMicrokernelTester()
43440         .mr(1)
43441         .nr(2)
43442         .kr(1)
43443         .sr(1)
43444         .m(1)
43445         .n(2)
43446         .k(k)
43447         .ks(3)
43448         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43449     }
43450   }
43451 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel_subtile)43452   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
43453     for (size_t k = 1; k <= 5; k += 2) {
43454       for (uint32_t n = 1; n <= 2; n++) {
43455         for (uint32_t m = 1; m <= 1; m++) {
43456           GemmMicrokernelTester()
43457             .mr(1)
43458             .nr(2)
43459             .kr(1)
43460             .sr(1)
43461             .m(m)
43462             .n(n)
43463             .k(k)
43464             .ks(3)
43465             .iterations(1)
43466             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43467         }
43468       }
43469     }
43470   }
43471 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_small_kernel)43472   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
43473     for (uint32_t n = 3; n < 4; n++) {
43474       for (size_t k = 1; k <= 5; k += 2) {
43475         GemmMicrokernelTester()
43476           .mr(1)
43477           .nr(2)
43478           .kr(1)
43479           .sr(1)
43480           .m(1)
43481           .n(n)
43482           .k(k)
43483           .ks(3)
43484           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43485       }
43486     }
43487   }
43488 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_small_kernel)43489   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
43490     for (uint32_t n = 4; n <= 6; n += 2) {
43491       for (size_t k = 1; k <= 5; k += 2) {
43492         GemmMicrokernelTester()
43493           .mr(1)
43494           .nr(2)
43495           .kr(1)
43496           .sr(1)
43497           .m(1)
43498           .n(n)
43499           .k(k)
43500           .ks(3)
43501           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43502       }
43503     }
43504   }
43505 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)43506   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
43507     for (size_t k = 1; k <= 5; k += 2) {
43508       for (uint32_t n = 1; n <= 2; n++) {
43509         for (uint32_t m = 1; m <= 1; m++) {
43510           GemmMicrokernelTester()
43511             .mr(1)
43512             .nr(2)
43513             .kr(1)
43514             .sr(1)
43515             .m(m)
43516             .n(n)
43517             .k(k)
43518             .cm_stride(5)
43519             .iterations(1)
43520             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43521         }
43522       }
43523     }
43524   }
43525 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,a_offset)43526   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
43527     for (size_t k = 1; k <= 5; k += 2) {
43528       GemmMicrokernelTester()
43529         .mr(1)
43530         .nr(2)
43531         .kr(1)
43532         .sr(1)
43533         .m(1)
43534         .n(2)
43535         .k(k)
43536         .ks(3)
43537         .a_offset(7)
43538         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43539     }
43540   }
43541 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,zero)43542   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
43543     for (size_t k = 1; k <= 5; k += 2) {
43544       for (uint32_t mz = 0; mz < 1; mz++) {
43545         GemmMicrokernelTester()
43546           .mr(1)
43547           .nr(2)
43548           .kr(1)
43549           .sr(1)
43550           .m(1)
43551           .n(2)
43552           .k(k)
43553           .ks(3)
43554           .a_offset(7)
43555           .zero_index(mz)
43556           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43557       }
43558     }
43559   }
43560 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)43561   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
43562     GemmMicrokernelTester()
43563       .mr(1)
43564       .nr(2)
43565       .kr(1)
43566       .sr(1)
43567       .m(1)
43568       .n(2)
43569       .k(1)
43570       .qmin(128)
43571       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43572   }
43573 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)43574   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
43575     GemmMicrokernelTester()
43576       .mr(1)
43577       .nr(2)
43578       .kr(1)
43579       .sr(1)
43580       .m(1)
43581       .n(2)
43582       .k(1)
43583       .qmax(128)
43584       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43585   }
43586 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)43587   TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
43588     GemmMicrokernelTester()
43589       .mr(1)
43590       .nr(2)
43591       .kr(1)
43592       .sr(1)
43593       .m(1)
43594       .n(2)
43595       .k(1)
43596       .cm_stride(5)
43597       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43598   }
43599 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43600 
43601 
43602 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)43603   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
43604     GemmMicrokernelTester()
43605       .mr(1)
43606       .nr(4)
43607       .kr(1)
43608       .sr(1)
43609       .m(1)
43610       .n(4)
43611       .k(1)
43612       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43613   }
43614 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)43615   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
43616     GemmMicrokernelTester()
43617       .mr(1)
43618       .nr(4)
43619       .kr(1)
43620       .sr(1)
43621       .m(1)
43622       .n(4)
43623       .k(1)
43624       .cn_stride(7)
43625       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43626   }
43627 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)43628   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
43629     for (uint32_t n = 1; n <= 4; n++) {
43630       for (uint32_t m = 1; m <= 1; m++) {
43631         GemmMicrokernelTester()
43632           .mr(1)
43633           .nr(4)
43634           .kr(1)
43635           .sr(1)
43636           .m(m)
43637           .n(n)
43638           .k(1)
43639           .iterations(1)
43640           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43641       }
43642     }
43643   }
43644 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)43645   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
43646     for (uint32_t m = 1; m <= 1; m++) {
43647       GemmMicrokernelTester()
43648         .mr(1)
43649         .nr(4)
43650         .kr(1)
43651         .sr(1)
43652         .m(m)
43653         .n(4)
43654         .k(1)
43655         .iterations(1)
43656         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43657     }
43658   }
43659 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)43660   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
43661     for (uint32_t n = 1; n <= 4; n++) {
43662       GemmMicrokernelTester()
43663         .mr(1)
43664         .nr(4)
43665         .kr(1)
43666         .sr(1)
43667         .m(1)
43668         .n(n)
43669         .k(1)
43670         .iterations(1)
43671         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43672     }
43673   }
43674 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)43675   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
43676     for (size_t k = 2; k < 10; k++) {
43677       GemmMicrokernelTester()
43678         .mr(1)
43679         .nr(4)
43680         .kr(1)
43681         .sr(1)
43682         .m(1)
43683         .n(4)
43684         .k(k)
43685         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43686     }
43687   }
43688 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)43689   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
43690     for (size_t k = 2; k < 10; k++) {
43691       for (uint32_t n = 1; n <= 4; n++) {
43692         for (uint32_t m = 1; m <= 1; m++) {
43693           GemmMicrokernelTester()
43694             .mr(1)
43695             .nr(4)
43696             .kr(1)
43697             .sr(1)
43698             .m(m)
43699             .n(n)
43700             .k(k)
43701             .iterations(1)
43702             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43703         }
43704       }
43705     }
43706   }
43707 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)43708   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
43709     for (uint32_t n = 5; n < 8; n++) {
43710       for (size_t k = 1; k <= 5; k += 2) {
43711         GemmMicrokernelTester()
43712           .mr(1)
43713           .nr(4)
43714           .kr(1)
43715           .sr(1)
43716           .m(1)
43717           .n(n)
43718           .k(k)
43719           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43720       }
43721     }
43722   }
43723 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)43724   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
43725     for (uint32_t n = 5; n < 8; n++) {
43726       for (size_t k = 1; k <= 5; k += 2) {
43727         GemmMicrokernelTester()
43728           .mr(1)
43729           .nr(4)
43730           .kr(1)
43731           .sr(1)
43732           .m(1)
43733           .n(n)
43734           .k(k)
43735           .cn_stride(7)
43736           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43737       }
43738     }
43739   }
43740 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)43741   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
43742     for (uint32_t n = 5; n < 8; n++) {
43743       for (size_t k = 1; k <= 5; k += 2) {
43744         for (uint32_t m = 1; m <= 1; m++) {
43745           GemmMicrokernelTester()
43746             .mr(1)
43747             .nr(4)
43748             .kr(1)
43749             .sr(1)
43750             .m(m)
43751             .n(n)
43752             .k(k)
43753             .iterations(1)
43754             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43755         }
43756       }
43757     }
43758   }
43759 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)43760   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
43761     for (uint32_t n = 8; n <= 12; n += 4) {
43762       for (size_t k = 1; k <= 5; k += 2) {
43763         GemmMicrokernelTester()
43764           .mr(1)
43765           .nr(4)
43766           .kr(1)
43767           .sr(1)
43768           .m(1)
43769           .n(n)
43770           .k(k)
43771           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43772       }
43773     }
43774   }
43775 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)43776   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
43777     for (uint32_t n = 8; n <= 12; n += 4) {
43778       for (size_t k = 1; k <= 5; k += 2) {
43779         GemmMicrokernelTester()
43780           .mr(1)
43781           .nr(4)
43782           .kr(1)
43783           .sr(1)
43784           .m(1)
43785           .n(n)
43786           .k(k)
43787           .cn_stride(7)
43788           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43789       }
43790     }
43791   }
43792 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)43793   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
43794     for (uint32_t n = 8; n <= 12; n += 4) {
43795       for (size_t k = 1; k <= 5; k += 2) {
43796         for (uint32_t m = 1; m <= 1; m++) {
43797           GemmMicrokernelTester()
43798             .mr(1)
43799             .nr(4)
43800             .kr(1)
43801             .sr(1)
43802             .m(m)
43803             .n(n)
43804             .k(k)
43805             .iterations(1)
43806             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43807         }
43808       }
43809     }
43810   }
43811 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel)43812   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
43813     for (size_t k = 1; k <= 5; k += 2) {
43814       GemmMicrokernelTester()
43815         .mr(1)
43816         .nr(4)
43817         .kr(1)
43818         .sr(1)
43819         .m(1)
43820         .n(4)
43821         .k(k)
43822         .ks(3)
43823         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43824     }
43825   }
43826 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel_subtile)43827   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
43828     for (size_t k = 1; k <= 5; k += 2) {
43829       for (uint32_t n = 1; n <= 4; n++) {
43830         for (uint32_t m = 1; m <= 1; m++) {
43831           GemmMicrokernelTester()
43832             .mr(1)
43833             .nr(4)
43834             .kr(1)
43835             .sr(1)
43836             .m(m)
43837             .n(n)
43838             .k(k)
43839             .ks(3)
43840             .iterations(1)
43841             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43842         }
43843       }
43844     }
43845   }
43846 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_small_kernel)43847   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
43848     for (uint32_t n = 5; n < 8; n++) {
43849       for (size_t k = 1; k <= 5; k += 2) {
43850         GemmMicrokernelTester()
43851           .mr(1)
43852           .nr(4)
43853           .kr(1)
43854           .sr(1)
43855           .m(1)
43856           .n(n)
43857           .k(k)
43858           .ks(3)
43859           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43860       }
43861     }
43862   }
43863 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_small_kernel)43864   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
43865     for (uint32_t n = 8; n <= 12; n += 4) {
43866       for (size_t k = 1; k <= 5; k += 2) {
43867         GemmMicrokernelTester()
43868           .mr(1)
43869           .nr(4)
43870           .kr(1)
43871           .sr(1)
43872           .m(1)
43873           .n(n)
43874           .k(k)
43875           .ks(3)
43876           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43877       }
43878     }
43879   }
43880 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)43881   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
43882     for (size_t k = 1; k <= 5; k += 2) {
43883       for (uint32_t n = 1; n <= 4; n++) {
43884         for (uint32_t m = 1; m <= 1; m++) {
43885           GemmMicrokernelTester()
43886             .mr(1)
43887             .nr(4)
43888             .kr(1)
43889             .sr(1)
43890             .m(m)
43891             .n(n)
43892             .k(k)
43893             .cm_stride(7)
43894             .iterations(1)
43895             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43896         }
43897       }
43898     }
43899   }
43900 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,a_offset)43901   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
43902     for (size_t k = 1; k <= 5; k += 2) {
43903       GemmMicrokernelTester()
43904         .mr(1)
43905         .nr(4)
43906         .kr(1)
43907         .sr(1)
43908         .m(1)
43909         .n(4)
43910         .k(k)
43911         .ks(3)
43912         .a_offset(7)
43913         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43914     }
43915   }
43916 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,zero)43917   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
43918     for (size_t k = 1; k <= 5; k += 2) {
43919       for (uint32_t mz = 0; mz < 1; mz++) {
43920         GemmMicrokernelTester()
43921           .mr(1)
43922           .nr(4)
43923           .kr(1)
43924           .sr(1)
43925           .m(1)
43926           .n(4)
43927           .k(k)
43928           .ks(3)
43929           .a_offset(7)
43930           .zero_index(mz)
43931           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43932       }
43933     }
43934   }
43935 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)43936   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
43937     GemmMicrokernelTester()
43938       .mr(1)
43939       .nr(4)
43940       .kr(1)
43941       .sr(1)
43942       .m(1)
43943       .n(4)
43944       .k(1)
43945       .qmin(128)
43946       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43947   }
43948 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)43949   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
43950     GemmMicrokernelTester()
43951       .mr(1)
43952       .nr(4)
43953       .kr(1)
43954       .sr(1)
43955       .m(1)
43956       .n(4)
43957       .k(1)
43958       .qmax(128)
43959       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43960   }
43961 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)43962   TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
43963     GemmMicrokernelTester()
43964       .mr(1)
43965       .nr(4)
43966       .kr(1)
43967       .sr(1)
43968       .m(1)
43969       .n(4)
43970       .k(1)
43971       .cm_stride(7)
43972       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43973   }
43974 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43975 
43976 
43977 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)43978   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
43979     GemmMicrokernelTester()
43980       .mr(2)
43981       .nr(2)
43982       .kr(1)
43983       .sr(1)
43984       .m(2)
43985       .n(2)
43986       .k(1)
43987       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43988   }
43989 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)43990   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
43991     GemmMicrokernelTester()
43992       .mr(2)
43993       .nr(2)
43994       .kr(1)
43995       .sr(1)
43996       .m(2)
43997       .n(2)
43998       .k(1)
43999       .cn_stride(5)
44000       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44001   }
44002 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)44003   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
44004     for (uint32_t n = 1; n <= 2; n++) {
44005       for (uint32_t m = 1; m <= 2; m++) {
44006         GemmMicrokernelTester()
44007           .mr(2)
44008           .nr(2)
44009           .kr(1)
44010           .sr(1)
44011           .m(m)
44012           .n(n)
44013           .k(1)
44014           .iterations(1)
44015           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44016       }
44017     }
44018   }
44019 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)44020   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
44021     for (uint32_t m = 1; m <= 2; m++) {
44022       GemmMicrokernelTester()
44023         .mr(2)
44024         .nr(2)
44025         .kr(1)
44026         .sr(1)
44027         .m(m)
44028         .n(2)
44029         .k(1)
44030         .iterations(1)
44031         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44032     }
44033   }
44034 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)44035   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
44036     for (uint32_t n = 1; n <= 2; n++) {
44037       GemmMicrokernelTester()
44038         .mr(2)
44039         .nr(2)
44040         .kr(1)
44041         .sr(1)
44042         .m(2)
44043         .n(n)
44044         .k(1)
44045         .iterations(1)
44046         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44047     }
44048   }
44049 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)44050   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
44051     for (size_t k = 2; k < 10; k++) {
44052       GemmMicrokernelTester()
44053         .mr(2)
44054         .nr(2)
44055         .kr(1)
44056         .sr(1)
44057         .m(2)
44058         .n(2)
44059         .k(k)
44060         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44061     }
44062   }
44063 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)44064   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
44065     for (size_t k = 2; k < 10; k++) {
44066       for (uint32_t n = 1; n <= 2; n++) {
44067         for (uint32_t m = 1; m <= 2; m++) {
44068           GemmMicrokernelTester()
44069             .mr(2)
44070             .nr(2)
44071             .kr(1)
44072             .sr(1)
44073             .m(m)
44074             .n(n)
44075             .k(k)
44076             .iterations(1)
44077             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44078         }
44079       }
44080     }
44081   }
44082 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)44083   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
44084     for (uint32_t n = 3; n < 4; n++) {
44085       for (size_t k = 1; k <= 5; k += 2) {
44086         GemmMicrokernelTester()
44087           .mr(2)
44088           .nr(2)
44089           .kr(1)
44090           .sr(1)
44091           .m(2)
44092           .n(n)
44093           .k(k)
44094           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44095       }
44096     }
44097   }
44098 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)44099   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
44100     for (uint32_t n = 3; n < 4; n++) {
44101       for (size_t k = 1; k <= 5; k += 2) {
44102         GemmMicrokernelTester()
44103           .mr(2)
44104           .nr(2)
44105           .kr(1)
44106           .sr(1)
44107           .m(2)
44108           .n(n)
44109           .k(k)
44110           .cn_stride(5)
44111           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44112       }
44113     }
44114   }
44115 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)44116   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
44117     for (uint32_t n = 3; n < 4; n++) {
44118       for (size_t k = 1; k <= 5; k += 2) {
44119         for (uint32_t m = 1; m <= 2; m++) {
44120           GemmMicrokernelTester()
44121             .mr(2)
44122             .nr(2)
44123             .kr(1)
44124             .sr(1)
44125             .m(m)
44126             .n(n)
44127             .k(k)
44128             .iterations(1)
44129             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44130         }
44131       }
44132     }
44133   }
44134 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)44135   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
44136     for (uint32_t n = 4; n <= 6; n += 2) {
44137       for (size_t k = 1; k <= 5; k += 2) {
44138         GemmMicrokernelTester()
44139           .mr(2)
44140           .nr(2)
44141           .kr(1)
44142           .sr(1)
44143           .m(2)
44144           .n(n)
44145           .k(k)
44146           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44147       }
44148     }
44149   }
44150 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)44151   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
44152     for (uint32_t n = 4; n <= 6; n += 2) {
44153       for (size_t k = 1; k <= 5; k += 2) {
44154         GemmMicrokernelTester()
44155           .mr(2)
44156           .nr(2)
44157           .kr(1)
44158           .sr(1)
44159           .m(2)
44160           .n(n)
44161           .k(k)
44162           .cn_stride(5)
44163           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44164       }
44165     }
44166   }
44167 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)44168   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
44169     for (uint32_t n = 4; n <= 6; n += 2) {
44170       for (size_t k = 1; k <= 5; k += 2) {
44171         for (uint32_t m = 1; m <= 2; m++) {
44172           GemmMicrokernelTester()
44173             .mr(2)
44174             .nr(2)
44175             .kr(1)
44176             .sr(1)
44177             .m(m)
44178             .n(n)
44179             .k(k)
44180             .iterations(1)
44181             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44182         }
44183       }
44184     }
44185   }
44186 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel)44187   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
44188     for (size_t k = 1; k <= 5; k += 2) {
44189       GemmMicrokernelTester()
44190         .mr(2)
44191         .nr(2)
44192         .kr(1)
44193         .sr(1)
44194         .m(2)
44195         .n(2)
44196         .k(k)
44197         .ks(3)
44198         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44199     }
44200   }
44201 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel_subtile)44202   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
44203     for (size_t k = 1; k <= 5; k += 2) {
44204       for (uint32_t n = 1; n <= 2; n++) {
44205         for (uint32_t m = 1; m <= 2; m++) {
44206           GemmMicrokernelTester()
44207             .mr(2)
44208             .nr(2)
44209             .kr(1)
44210             .sr(1)
44211             .m(m)
44212             .n(n)
44213             .k(k)
44214             .ks(3)
44215             .iterations(1)
44216             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44217         }
44218       }
44219     }
44220   }
44221 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_small_kernel)44222   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
44223     for (uint32_t n = 3; n < 4; n++) {
44224       for (size_t k = 1; k <= 5; k += 2) {
44225         GemmMicrokernelTester()
44226           .mr(2)
44227           .nr(2)
44228           .kr(1)
44229           .sr(1)
44230           .m(2)
44231           .n(n)
44232           .k(k)
44233           .ks(3)
44234           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44235       }
44236     }
44237   }
44238 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_small_kernel)44239   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
44240     for (uint32_t n = 4; n <= 6; n += 2) {
44241       for (size_t k = 1; k <= 5; k += 2) {
44242         GemmMicrokernelTester()
44243           .mr(2)
44244           .nr(2)
44245           .kr(1)
44246           .sr(1)
44247           .m(2)
44248           .n(n)
44249           .k(k)
44250           .ks(3)
44251           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44252       }
44253     }
44254   }
44255 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)44256   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
44257     for (size_t k = 1; k <= 5; k += 2) {
44258       for (uint32_t n = 1; n <= 2; n++) {
44259         for (uint32_t m = 1; m <= 2; m++) {
44260           GemmMicrokernelTester()
44261             .mr(2)
44262             .nr(2)
44263             .kr(1)
44264             .sr(1)
44265             .m(m)
44266             .n(n)
44267             .k(k)
44268             .cm_stride(5)
44269             .iterations(1)
44270             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44271         }
44272       }
44273     }
44274   }
44275 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,a_offset)44276   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
44277     for (size_t k = 1; k <= 5; k += 2) {
44278       GemmMicrokernelTester()
44279         .mr(2)
44280         .nr(2)
44281         .kr(1)
44282         .sr(1)
44283         .m(2)
44284         .n(2)
44285         .k(k)
44286         .ks(3)
44287         .a_offset(13)
44288         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44289     }
44290   }
44291 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,zero)44292   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
44293     for (size_t k = 1; k <= 5; k += 2) {
44294       for (uint32_t mz = 0; mz < 2; mz++) {
44295         GemmMicrokernelTester()
44296           .mr(2)
44297           .nr(2)
44298           .kr(1)
44299           .sr(1)
44300           .m(2)
44301           .n(2)
44302           .k(k)
44303           .ks(3)
44304           .a_offset(13)
44305           .zero_index(mz)
44306           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44307       }
44308     }
44309   }
44310 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)44311   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
44312     GemmMicrokernelTester()
44313       .mr(2)
44314       .nr(2)
44315       .kr(1)
44316       .sr(1)
44317       .m(2)
44318       .n(2)
44319       .k(1)
44320       .qmin(128)
44321       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44322   }
44323 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)44324   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
44325     GemmMicrokernelTester()
44326       .mr(2)
44327       .nr(2)
44328       .kr(1)
44329       .sr(1)
44330       .m(2)
44331       .n(2)
44332       .k(1)
44333       .qmax(128)
44334       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44335   }
44336 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)44337   TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
44338     GemmMicrokernelTester()
44339       .mr(2)
44340       .nr(2)
44341       .kr(1)
44342       .sr(1)
44343       .m(2)
44344       .n(2)
44345       .k(1)
44346       .cm_stride(5)
44347       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44348   }
44349 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44350 
44351 
44352 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)44353   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
44354     GemmMicrokernelTester()
44355       .mr(2)
44356       .nr(4)
44357       .kr(1)
44358       .sr(1)
44359       .m(2)
44360       .n(4)
44361       .k(1)
44362       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44363   }
44364 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)44365   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
44366     GemmMicrokernelTester()
44367       .mr(2)
44368       .nr(4)
44369       .kr(1)
44370       .sr(1)
44371       .m(2)
44372       .n(4)
44373       .k(1)
44374       .cn_stride(7)
44375       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44376   }
44377 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)44378   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
44379     for (uint32_t n = 1; n <= 4; n++) {
44380       for (uint32_t m = 1; m <= 2; m++) {
44381         GemmMicrokernelTester()
44382           .mr(2)
44383           .nr(4)
44384           .kr(1)
44385           .sr(1)
44386           .m(m)
44387           .n(n)
44388           .k(1)
44389           .iterations(1)
44390           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44391       }
44392     }
44393   }
44394 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)44395   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
44396     for (uint32_t m = 1; m <= 2; m++) {
44397       GemmMicrokernelTester()
44398         .mr(2)
44399         .nr(4)
44400         .kr(1)
44401         .sr(1)
44402         .m(m)
44403         .n(4)
44404         .k(1)
44405         .iterations(1)
44406         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44407     }
44408   }
44409 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)44410   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
44411     for (uint32_t n = 1; n <= 4; n++) {
44412       GemmMicrokernelTester()
44413         .mr(2)
44414         .nr(4)
44415         .kr(1)
44416         .sr(1)
44417         .m(2)
44418         .n(n)
44419         .k(1)
44420         .iterations(1)
44421         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44422     }
44423   }
44424 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)44425   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
44426     for (size_t k = 2; k < 10; k++) {
44427       GemmMicrokernelTester()
44428         .mr(2)
44429         .nr(4)
44430         .kr(1)
44431         .sr(1)
44432         .m(2)
44433         .n(4)
44434         .k(k)
44435         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44436     }
44437   }
44438 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)44439   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
44440     for (size_t k = 2; k < 10; k++) {
44441       for (uint32_t n = 1; n <= 4; n++) {
44442         for (uint32_t m = 1; m <= 2; m++) {
44443           GemmMicrokernelTester()
44444             .mr(2)
44445             .nr(4)
44446             .kr(1)
44447             .sr(1)
44448             .m(m)
44449             .n(n)
44450             .k(k)
44451             .iterations(1)
44452             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44453         }
44454       }
44455     }
44456   }
44457 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)44458   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
44459     for (uint32_t n = 5; n < 8; n++) {
44460       for (size_t k = 1; k <= 5; k += 2) {
44461         GemmMicrokernelTester()
44462           .mr(2)
44463           .nr(4)
44464           .kr(1)
44465           .sr(1)
44466           .m(2)
44467           .n(n)
44468           .k(k)
44469           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44470       }
44471     }
44472   }
44473 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)44474   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
44475     for (uint32_t n = 5; n < 8; n++) {
44476       for (size_t k = 1; k <= 5; k += 2) {
44477         GemmMicrokernelTester()
44478           .mr(2)
44479           .nr(4)
44480           .kr(1)
44481           .sr(1)
44482           .m(2)
44483           .n(n)
44484           .k(k)
44485           .cn_stride(7)
44486           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44487       }
44488     }
44489   }
44490 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)44491   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
44492     for (uint32_t n = 5; n < 8; n++) {
44493       for (size_t k = 1; k <= 5; k += 2) {
44494         for (uint32_t m = 1; m <= 2; m++) {
44495           GemmMicrokernelTester()
44496             .mr(2)
44497             .nr(4)
44498             .kr(1)
44499             .sr(1)
44500             .m(m)
44501             .n(n)
44502             .k(k)
44503             .iterations(1)
44504             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44505         }
44506       }
44507     }
44508   }
44509 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)44510   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
44511     for (uint32_t n = 8; n <= 12; n += 4) {
44512       for (size_t k = 1; k <= 5; k += 2) {
44513         GemmMicrokernelTester()
44514           .mr(2)
44515           .nr(4)
44516           .kr(1)
44517           .sr(1)
44518           .m(2)
44519           .n(n)
44520           .k(k)
44521           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44522       }
44523     }
44524   }
44525 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)44526   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
44527     for (uint32_t n = 8; n <= 12; n += 4) {
44528       for (size_t k = 1; k <= 5; k += 2) {
44529         GemmMicrokernelTester()
44530           .mr(2)
44531           .nr(4)
44532           .kr(1)
44533           .sr(1)
44534           .m(2)
44535           .n(n)
44536           .k(k)
44537           .cn_stride(7)
44538           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44539       }
44540     }
44541   }
44542 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)44543   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
44544     for (uint32_t n = 8; n <= 12; n += 4) {
44545       for (size_t k = 1; k <= 5; k += 2) {
44546         for (uint32_t m = 1; m <= 2; m++) {
44547           GemmMicrokernelTester()
44548             .mr(2)
44549             .nr(4)
44550             .kr(1)
44551             .sr(1)
44552             .m(m)
44553             .n(n)
44554             .k(k)
44555             .iterations(1)
44556             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44557         }
44558       }
44559     }
44560   }
44561 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel)44562   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
44563     for (size_t k = 1; k <= 5; k += 2) {
44564       GemmMicrokernelTester()
44565         .mr(2)
44566         .nr(4)
44567         .kr(1)
44568         .sr(1)
44569         .m(2)
44570         .n(4)
44571         .k(k)
44572         .ks(3)
44573         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44574     }
44575   }
44576 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel_subtile)44577   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
44578     for (size_t k = 1; k <= 5; k += 2) {
44579       for (uint32_t n = 1; n <= 4; n++) {
44580         for (uint32_t m = 1; m <= 2; m++) {
44581           GemmMicrokernelTester()
44582             .mr(2)
44583             .nr(4)
44584             .kr(1)
44585             .sr(1)
44586             .m(m)
44587             .n(n)
44588             .k(k)
44589             .ks(3)
44590             .iterations(1)
44591             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44592         }
44593       }
44594     }
44595   }
44596 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_small_kernel)44597   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
44598     for (uint32_t n = 5; n < 8; n++) {
44599       for (size_t k = 1; k <= 5; k += 2) {
44600         GemmMicrokernelTester()
44601           .mr(2)
44602           .nr(4)
44603           .kr(1)
44604           .sr(1)
44605           .m(2)
44606           .n(n)
44607           .k(k)
44608           .ks(3)
44609           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44610       }
44611     }
44612   }
44613 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_small_kernel)44614   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
44615     for (uint32_t n = 8; n <= 12; n += 4) {
44616       for (size_t k = 1; k <= 5; k += 2) {
44617         GemmMicrokernelTester()
44618           .mr(2)
44619           .nr(4)
44620           .kr(1)
44621           .sr(1)
44622           .m(2)
44623           .n(n)
44624           .k(k)
44625           .ks(3)
44626           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44627       }
44628     }
44629   }
44630 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)44631   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
44632     for (size_t k = 1; k <= 5; k += 2) {
44633       for (uint32_t n = 1; n <= 4; n++) {
44634         for (uint32_t m = 1; m <= 2; m++) {
44635           GemmMicrokernelTester()
44636             .mr(2)
44637             .nr(4)
44638             .kr(1)
44639             .sr(1)
44640             .m(m)
44641             .n(n)
44642             .k(k)
44643             .cm_stride(7)
44644             .iterations(1)
44645             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44646         }
44647       }
44648     }
44649   }
44650 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,a_offset)44651   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
44652     for (size_t k = 1; k <= 5; k += 2) {
44653       GemmMicrokernelTester()
44654         .mr(2)
44655         .nr(4)
44656         .kr(1)
44657         .sr(1)
44658         .m(2)
44659         .n(4)
44660         .k(k)
44661         .ks(3)
44662         .a_offset(13)
44663         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44664     }
44665   }
44666 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,zero)44667   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
44668     for (size_t k = 1; k <= 5; k += 2) {
44669       for (uint32_t mz = 0; mz < 2; mz++) {
44670         GemmMicrokernelTester()
44671           .mr(2)
44672           .nr(4)
44673           .kr(1)
44674           .sr(1)
44675           .m(2)
44676           .n(4)
44677           .k(k)
44678           .ks(3)
44679           .a_offset(13)
44680           .zero_index(mz)
44681           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44682       }
44683     }
44684   }
44685 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)44686   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
44687     GemmMicrokernelTester()
44688       .mr(2)
44689       .nr(4)
44690       .kr(1)
44691       .sr(1)
44692       .m(2)
44693       .n(4)
44694       .k(1)
44695       .qmin(128)
44696       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44697   }
44698 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)44699   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
44700     GemmMicrokernelTester()
44701       .mr(2)
44702       .nr(4)
44703       .kr(1)
44704       .sr(1)
44705       .m(2)
44706       .n(4)
44707       .k(1)
44708       .qmax(128)
44709       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44710   }
44711 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)44712   TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
44713     GemmMicrokernelTester()
44714       .mr(2)
44715       .nr(4)
44716       .kr(1)
44717       .sr(1)
44718       .m(2)
44719       .n(4)
44720       .k(1)
44721       .cm_stride(7)
44722       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44723   }
44724 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44725 
44726 
44727 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)44728   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
44729     GemmMicrokernelTester()
44730       .mr(4)
44731       .nr(2)
44732       .kr(1)
44733       .sr(1)
44734       .m(4)
44735       .n(2)
44736       .k(1)
44737       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44738   }
44739 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)44740   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
44741     GemmMicrokernelTester()
44742       .mr(4)
44743       .nr(2)
44744       .kr(1)
44745       .sr(1)
44746       .m(4)
44747       .n(2)
44748       .k(1)
44749       .cn_stride(5)
44750       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44751   }
44752 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)44753   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
44754     for (uint32_t n = 1; n <= 2; n++) {
44755       for (uint32_t m = 1; m <= 4; m++) {
44756         GemmMicrokernelTester()
44757           .mr(4)
44758           .nr(2)
44759           .kr(1)
44760           .sr(1)
44761           .m(m)
44762           .n(n)
44763           .k(1)
44764           .iterations(1)
44765           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44766       }
44767     }
44768   }
44769 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)44770   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
44771     for (uint32_t m = 1; m <= 4; m++) {
44772       GemmMicrokernelTester()
44773         .mr(4)
44774         .nr(2)
44775         .kr(1)
44776         .sr(1)
44777         .m(m)
44778         .n(2)
44779         .k(1)
44780         .iterations(1)
44781         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44782     }
44783   }
44784 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)44785   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
44786     for (uint32_t n = 1; n <= 2; n++) {
44787       GemmMicrokernelTester()
44788         .mr(4)
44789         .nr(2)
44790         .kr(1)
44791         .sr(1)
44792         .m(4)
44793         .n(n)
44794         .k(1)
44795         .iterations(1)
44796         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44797     }
44798   }
44799 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)44800   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
44801     for (size_t k = 2; k < 10; k++) {
44802       GemmMicrokernelTester()
44803         .mr(4)
44804         .nr(2)
44805         .kr(1)
44806         .sr(1)
44807         .m(4)
44808         .n(2)
44809         .k(k)
44810         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44811     }
44812   }
44813 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)44814   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
44815     for (size_t k = 2; k < 10; k++) {
44816       for (uint32_t n = 1; n <= 2; n++) {
44817         for (uint32_t m = 1; m <= 4; m++) {
44818           GemmMicrokernelTester()
44819             .mr(4)
44820             .nr(2)
44821             .kr(1)
44822             .sr(1)
44823             .m(m)
44824             .n(n)
44825             .k(k)
44826             .iterations(1)
44827             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44828         }
44829       }
44830     }
44831   }
44832 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)44833   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
44834     for (uint32_t n = 3; n < 4; n++) {
44835       for (size_t k = 1; k <= 5; k += 2) {
44836         GemmMicrokernelTester()
44837           .mr(4)
44838           .nr(2)
44839           .kr(1)
44840           .sr(1)
44841           .m(4)
44842           .n(n)
44843           .k(k)
44844           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44845       }
44846     }
44847   }
44848 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)44849   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
44850     for (uint32_t n = 3; n < 4; n++) {
44851       for (size_t k = 1; k <= 5; k += 2) {
44852         GemmMicrokernelTester()
44853           .mr(4)
44854           .nr(2)
44855           .kr(1)
44856           .sr(1)
44857           .m(4)
44858           .n(n)
44859           .k(k)
44860           .cn_stride(5)
44861           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44862       }
44863     }
44864   }
44865 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)44866   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
44867     for (uint32_t n = 3; n < 4; n++) {
44868       for (size_t k = 1; k <= 5; k += 2) {
44869         for (uint32_t m = 1; m <= 4; m++) {
44870           GemmMicrokernelTester()
44871             .mr(4)
44872             .nr(2)
44873             .kr(1)
44874             .sr(1)
44875             .m(m)
44876             .n(n)
44877             .k(k)
44878             .iterations(1)
44879             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44880         }
44881       }
44882     }
44883   }
44884 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)44885   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
44886     for (uint32_t n = 4; n <= 6; n += 2) {
44887       for (size_t k = 1; k <= 5; k += 2) {
44888         GemmMicrokernelTester()
44889           .mr(4)
44890           .nr(2)
44891           .kr(1)
44892           .sr(1)
44893           .m(4)
44894           .n(n)
44895           .k(k)
44896           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44897       }
44898     }
44899   }
44900 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)44901   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
44902     for (uint32_t n = 4; n <= 6; n += 2) {
44903       for (size_t k = 1; k <= 5; k += 2) {
44904         GemmMicrokernelTester()
44905           .mr(4)
44906           .nr(2)
44907           .kr(1)
44908           .sr(1)
44909           .m(4)
44910           .n(n)
44911           .k(k)
44912           .cn_stride(5)
44913           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44914       }
44915     }
44916   }
44917 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)44918   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
44919     for (uint32_t n = 4; n <= 6; n += 2) {
44920       for (size_t k = 1; k <= 5; k += 2) {
44921         for (uint32_t m = 1; m <= 4; m++) {
44922           GemmMicrokernelTester()
44923             .mr(4)
44924             .nr(2)
44925             .kr(1)
44926             .sr(1)
44927             .m(m)
44928             .n(n)
44929             .k(k)
44930             .iterations(1)
44931             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44932         }
44933       }
44934     }
44935   }
44936 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)44937   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
44938     for (size_t k = 1; k <= 5; k += 2) {
44939       GemmMicrokernelTester()
44940         .mr(4)
44941         .nr(2)
44942         .kr(1)
44943         .sr(1)
44944         .m(4)
44945         .n(2)
44946         .k(k)
44947         .ks(3)
44948         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44949     }
44950   }
44951 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)44952   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
44953     for (size_t k = 1; k <= 5; k += 2) {
44954       for (uint32_t n = 1; n <= 2; n++) {
44955         for (uint32_t m = 1; m <= 4; m++) {
44956           GemmMicrokernelTester()
44957             .mr(4)
44958             .nr(2)
44959             .kr(1)
44960             .sr(1)
44961             .m(m)
44962             .n(n)
44963             .k(k)
44964             .ks(3)
44965             .iterations(1)
44966             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44967         }
44968       }
44969     }
44970   }
44971 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)44972   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
44973     for (uint32_t n = 3; n < 4; n++) {
44974       for (size_t k = 1; k <= 5; k += 2) {
44975         GemmMicrokernelTester()
44976           .mr(4)
44977           .nr(2)
44978           .kr(1)
44979           .sr(1)
44980           .m(4)
44981           .n(n)
44982           .k(k)
44983           .ks(3)
44984           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44985       }
44986     }
44987   }
44988 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)44989   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
44990     for (uint32_t n = 4; n <= 6; n += 2) {
44991       for (size_t k = 1; k <= 5; k += 2) {
44992         GemmMicrokernelTester()
44993           .mr(4)
44994           .nr(2)
44995           .kr(1)
44996           .sr(1)
44997           .m(4)
44998           .n(n)
44999           .k(k)
45000           .ks(3)
45001           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45002       }
45003     }
45004   }
45005 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)45006   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
45007     for (size_t k = 1; k <= 5; k += 2) {
45008       for (uint32_t n = 1; n <= 2; n++) {
45009         for (uint32_t m = 1; m <= 4; m++) {
45010           GemmMicrokernelTester()
45011             .mr(4)
45012             .nr(2)
45013             .kr(1)
45014             .sr(1)
45015             .m(m)
45016             .n(n)
45017             .k(k)
45018             .cm_stride(5)
45019             .iterations(1)
45020             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45021         }
45022       }
45023     }
45024   }
45025 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)45026   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
45027     for (size_t k = 1; k <= 5; k += 2) {
45028       GemmMicrokernelTester()
45029         .mr(4)
45030         .nr(2)
45031         .kr(1)
45032         .sr(1)
45033         .m(4)
45034         .n(2)
45035         .k(k)
45036         .ks(3)
45037         .a_offset(23)
45038         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45039     }
45040   }
45041 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)45042   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
45043     for (size_t k = 1; k <= 5; k += 2) {
45044       for (uint32_t mz = 0; mz < 4; mz++) {
45045         GemmMicrokernelTester()
45046           .mr(4)
45047           .nr(2)
45048           .kr(1)
45049           .sr(1)
45050           .m(4)
45051           .n(2)
45052           .k(k)
45053           .ks(3)
45054           .a_offset(23)
45055           .zero_index(mz)
45056           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45057       }
45058     }
45059   }
45060 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)45061   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
45062     GemmMicrokernelTester()
45063       .mr(4)
45064       .nr(2)
45065       .kr(1)
45066       .sr(1)
45067       .m(4)
45068       .n(2)
45069       .k(1)
45070       .qmin(128)
45071       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45072   }
45073 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)45074   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
45075     GemmMicrokernelTester()
45076       .mr(4)
45077       .nr(2)
45078       .kr(1)
45079       .sr(1)
45080       .m(4)
45081       .n(2)
45082       .k(1)
45083       .qmax(128)
45084       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45085   }
45086 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)45087   TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
45088     GemmMicrokernelTester()
45089       .mr(4)
45090       .nr(2)
45091       .kr(1)
45092       .sr(1)
45093       .m(4)
45094       .n(2)
45095       .k(1)
45096       .cm_stride(5)
45097       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45098   }
45099 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45100 
45101 
45102 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)45103   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
45104     GemmMicrokernelTester()
45105       .mr(4)
45106       .nr(4)
45107       .kr(1)
45108       .sr(1)
45109       .m(4)
45110       .n(4)
45111       .k(1)
45112       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45113   }
45114 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)45115   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
45116     GemmMicrokernelTester()
45117       .mr(4)
45118       .nr(4)
45119       .kr(1)
45120       .sr(1)
45121       .m(4)
45122       .n(4)
45123       .k(1)
45124       .cn_stride(7)
45125       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45126   }
45127 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)45128   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
45129     for (uint32_t n = 1; n <= 4; n++) {
45130       for (uint32_t m = 1; m <= 4; m++) {
45131         GemmMicrokernelTester()
45132           .mr(4)
45133           .nr(4)
45134           .kr(1)
45135           .sr(1)
45136           .m(m)
45137           .n(n)
45138           .k(1)
45139           .iterations(1)
45140           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45141       }
45142     }
45143   }
45144 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)45145   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45146     for (uint32_t m = 1; m <= 4; m++) {
45147       GemmMicrokernelTester()
45148         .mr(4)
45149         .nr(4)
45150         .kr(1)
45151         .sr(1)
45152         .m(m)
45153         .n(4)
45154         .k(1)
45155         .iterations(1)
45156         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45157     }
45158   }
45159 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)45160   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45161     for (uint32_t n = 1; n <= 4; n++) {
45162       GemmMicrokernelTester()
45163         .mr(4)
45164         .nr(4)
45165         .kr(1)
45166         .sr(1)
45167         .m(4)
45168         .n(n)
45169         .k(1)
45170         .iterations(1)
45171         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45172     }
45173   }
45174 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)45175   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
45176     for (size_t k = 2; k < 10; k++) {
45177       GemmMicrokernelTester()
45178         .mr(4)
45179         .nr(4)
45180         .kr(1)
45181         .sr(1)
45182         .m(4)
45183         .n(4)
45184         .k(k)
45185         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45186     }
45187   }
45188 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)45189   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
45190     for (size_t k = 2; k < 10; k++) {
45191       for (uint32_t n = 1; n <= 4; n++) {
45192         for (uint32_t m = 1; m <= 4; m++) {
45193           GemmMicrokernelTester()
45194             .mr(4)
45195             .nr(4)
45196             .kr(1)
45197             .sr(1)
45198             .m(m)
45199             .n(n)
45200             .k(k)
45201             .iterations(1)
45202             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45203         }
45204       }
45205     }
45206   }
45207 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)45208   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
45209     for (uint32_t n = 5; n < 8; n++) {
45210       for (size_t k = 1; k <= 5; k += 2) {
45211         GemmMicrokernelTester()
45212           .mr(4)
45213           .nr(4)
45214           .kr(1)
45215           .sr(1)
45216           .m(4)
45217           .n(n)
45218           .k(k)
45219           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45220       }
45221     }
45222   }
45223 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)45224   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45225     for (uint32_t n = 5; n < 8; n++) {
45226       for (size_t k = 1; k <= 5; k += 2) {
45227         GemmMicrokernelTester()
45228           .mr(4)
45229           .nr(4)
45230           .kr(1)
45231           .sr(1)
45232           .m(4)
45233           .n(n)
45234           .k(k)
45235           .cn_stride(7)
45236           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45237       }
45238     }
45239   }
45240 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)45241   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
45242     for (uint32_t n = 5; n < 8; n++) {
45243       for (size_t k = 1; k <= 5; k += 2) {
45244         for (uint32_t m = 1; m <= 4; m++) {
45245           GemmMicrokernelTester()
45246             .mr(4)
45247             .nr(4)
45248             .kr(1)
45249             .sr(1)
45250             .m(m)
45251             .n(n)
45252             .k(k)
45253             .iterations(1)
45254             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45255         }
45256       }
45257     }
45258   }
45259 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)45260   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
45261     for (uint32_t n = 8; n <= 12; n += 4) {
45262       for (size_t k = 1; k <= 5; k += 2) {
45263         GemmMicrokernelTester()
45264           .mr(4)
45265           .nr(4)
45266           .kr(1)
45267           .sr(1)
45268           .m(4)
45269           .n(n)
45270           .k(k)
45271           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45272       }
45273     }
45274   }
45275 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)45276   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
45277     for (uint32_t n = 8; n <= 12; n += 4) {
45278       for (size_t k = 1; k <= 5; k += 2) {
45279         GemmMicrokernelTester()
45280           .mr(4)
45281           .nr(4)
45282           .kr(1)
45283           .sr(1)
45284           .m(4)
45285           .n(n)
45286           .k(k)
45287           .cn_stride(7)
45288           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45289       }
45290     }
45291   }
45292 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)45293   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
45294     for (uint32_t n = 8; n <= 12; n += 4) {
45295       for (size_t k = 1; k <= 5; k += 2) {
45296         for (uint32_t m = 1; m <= 4; m++) {
45297           GemmMicrokernelTester()
45298             .mr(4)
45299             .nr(4)
45300             .kr(1)
45301             .sr(1)
45302             .m(m)
45303             .n(n)
45304             .k(k)
45305             .iterations(1)
45306             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45307         }
45308       }
45309     }
45310   }
45311 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel)45312   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
45313     for (size_t k = 1; k <= 5; k += 2) {
45314       GemmMicrokernelTester()
45315         .mr(4)
45316         .nr(4)
45317         .kr(1)
45318         .sr(1)
45319         .m(4)
45320         .n(4)
45321         .k(k)
45322         .ks(3)
45323         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45324     }
45325   }
45326 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel_subtile)45327   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
45328     for (size_t k = 1; k <= 5; k += 2) {
45329       for (uint32_t n = 1; n <= 4; n++) {
45330         for (uint32_t m = 1; m <= 4; m++) {
45331           GemmMicrokernelTester()
45332             .mr(4)
45333             .nr(4)
45334             .kr(1)
45335             .sr(1)
45336             .m(m)
45337             .n(n)
45338             .k(k)
45339             .ks(3)
45340             .iterations(1)
45341             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45342         }
45343       }
45344     }
45345   }
45346 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_small_kernel)45347   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
45348     for (uint32_t n = 5; n < 8; n++) {
45349       for (size_t k = 1; k <= 5; k += 2) {
45350         GemmMicrokernelTester()
45351           .mr(4)
45352           .nr(4)
45353           .kr(1)
45354           .sr(1)
45355           .m(4)
45356           .n(n)
45357           .k(k)
45358           .ks(3)
45359           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45360       }
45361     }
45362   }
45363 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_small_kernel)45364   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
45365     for (uint32_t n = 8; n <= 12; n += 4) {
45366       for (size_t k = 1; k <= 5; k += 2) {
45367         GemmMicrokernelTester()
45368           .mr(4)
45369           .nr(4)
45370           .kr(1)
45371           .sr(1)
45372           .m(4)
45373           .n(n)
45374           .k(k)
45375           .ks(3)
45376           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45377       }
45378     }
45379   }
45380 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)45381   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
45382     for (size_t k = 1; k <= 5; k += 2) {
45383       for (uint32_t n = 1; n <= 4; n++) {
45384         for (uint32_t m = 1; m <= 4; m++) {
45385           GemmMicrokernelTester()
45386             .mr(4)
45387             .nr(4)
45388             .kr(1)
45389             .sr(1)
45390             .m(m)
45391             .n(n)
45392             .k(k)
45393             .cm_stride(7)
45394             .iterations(1)
45395             .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45396         }
45397       }
45398     }
45399   }
45400 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,a_offset)45401   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
45402     for (size_t k = 1; k <= 5; k += 2) {
45403       GemmMicrokernelTester()
45404         .mr(4)
45405         .nr(4)
45406         .kr(1)
45407         .sr(1)
45408         .m(4)
45409         .n(4)
45410         .k(k)
45411         .ks(3)
45412         .a_offset(23)
45413         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45414     }
45415   }
45416 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,zero)45417   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
45418     for (size_t k = 1; k <= 5; k += 2) {
45419       for (uint32_t mz = 0; mz < 4; mz++) {
45420         GemmMicrokernelTester()
45421           .mr(4)
45422           .nr(4)
45423           .kr(1)
45424           .sr(1)
45425           .m(4)
45426           .n(4)
45427           .k(k)
45428           .ks(3)
45429           .a_offset(23)
45430           .zero_index(mz)
45431           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45432       }
45433     }
45434   }
45435 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)45436   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
45437     GemmMicrokernelTester()
45438       .mr(4)
45439       .nr(4)
45440       .kr(1)
45441       .sr(1)
45442       .m(4)
45443       .n(4)
45444       .k(1)
45445       .qmin(128)
45446       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45447   }
45448 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)45449   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
45450     GemmMicrokernelTester()
45451       .mr(4)
45452       .nr(4)
45453       .kr(1)
45454       .sr(1)
45455       .m(4)
45456       .n(4)
45457       .k(1)
45458       .qmax(128)
45459       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45460   }
45461 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)45462   TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
45463     GemmMicrokernelTester()
45464       .mr(4)
45465       .nr(4)
45466       .kr(1)
45467       .sr(1)
45468       .m(4)
45469       .n(4)
45470       .k(1)
45471       .cm_stride(7)
45472       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45473   }
45474 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45475 
45476 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)45477 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
45478   GemmMicrokernelTester()
45479     .mr(1)
45480     .nr(2)
45481     .kr(1)
45482     .sr(1)
45483     .m(1)
45484     .n(2)
45485     .k(1)
45486     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45487 }
45488 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)45489 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
45490   GemmMicrokernelTester()
45491     .mr(1)
45492     .nr(2)
45493     .kr(1)
45494     .sr(1)
45495     .m(1)
45496     .n(2)
45497     .k(1)
45498     .cn_stride(5)
45499     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45500 }
45501 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)45502 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
45503   for (uint32_t n = 1; n <= 2; n++) {
45504     for (uint32_t m = 1; m <= 1; m++) {
45505       GemmMicrokernelTester()
45506         .mr(1)
45507         .nr(2)
45508         .kr(1)
45509         .sr(1)
45510         .m(m)
45511         .n(n)
45512         .k(1)
45513         .iterations(1)
45514         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45515     }
45516   }
45517 }
45518 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)45519 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
45520   for (uint32_t m = 1; m <= 1; m++) {
45521     GemmMicrokernelTester()
45522       .mr(1)
45523       .nr(2)
45524       .kr(1)
45525       .sr(1)
45526       .m(m)
45527       .n(2)
45528       .k(1)
45529       .iterations(1)
45530       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45531   }
45532 }
45533 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)45534 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
45535   for (uint32_t n = 1; n <= 2; n++) {
45536     GemmMicrokernelTester()
45537       .mr(1)
45538       .nr(2)
45539       .kr(1)
45540       .sr(1)
45541       .m(1)
45542       .n(n)
45543       .k(1)
45544       .iterations(1)
45545       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45546   }
45547 }
45548 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)45549 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
45550   for (size_t k = 2; k < 10; k++) {
45551     GemmMicrokernelTester()
45552       .mr(1)
45553       .nr(2)
45554       .kr(1)
45555       .sr(1)
45556       .m(1)
45557       .n(2)
45558       .k(k)
45559       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45560   }
45561 }
45562 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)45563 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
45564   for (size_t k = 2; k < 10; k++) {
45565     for (uint32_t n = 1; n <= 2; n++) {
45566       for (uint32_t m = 1; m <= 1; m++) {
45567         GemmMicrokernelTester()
45568           .mr(1)
45569           .nr(2)
45570           .kr(1)
45571           .sr(1)
45572           .m(m)
45573           .n(n)
45574           .k(k)
45575           .iterations(1)
45576           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45577       }
45578     }
45579   }
45580 }
45581 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)45582 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
45583   for (uint32_t n = 3; n < 4; n++) {
45584     for (size_t k = 1; k <= 5; k += 2) {
45585       GemmMicrokernelTester()
45586         .mr(1)
45587         .nr(2)
45588         .kr(1)
45589         .sr(1)
45590         .m(1)
45591         .n(n)
45592         .k(k)
45593         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45594     }
45595   }
45596 }
45597 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)45598 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
45599   for (uint32_t n = 3; n < 4; n++) {
45600     for (size_t k = 1; k <= 5; k += 2) {
45601       GemmMicrokernelTester()
45602         .mr(1)
45603         .nr(2)
45604         .kr(1)
45605         .sr(1)
45606         .m(1)
45607         .n(n)
45608         .k(k)
45609         .cn_stride(5)
45610         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45611     }
45612   }
45613 }
45614 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)45615 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
45616   for (uint32_t n = 3; n < 4; n++) {
45617     for (size_t k = 1; k <= 5; k += 2) {
45618       for (uint32_t m = 1; m <= 1; m++) {
45619         GemmMicrokernelTester()
45620           .mr(1)
45621           .nr(2)
45622           .kr(1)
45623           .sr(1)
45624           .m(m)
45625           .n(n)
45626           .k(k)
45627           .iterations(1)
45628           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45629       }
45630     }
45631   }
45632 }
45633 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)45634 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
45635   for (uint32_t n = 4; n <= 6; n += 2) {
45636     for (size_t k = 1; k <= 5; k += 2) {
45637       GemmMicrokernelTester()
45638         .mr(1)
45639         .nr(2)
45640         .kr(1)
45641         .sr(1)
45642         .m(1)
45643         .n(n)
45644         .k(k)
45645         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45646     }
45647   }
45648 }
45649 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)45650 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
45651   for (uint32_t n = 4; n <= 6; n += 2) {
45652     for (size_t k = 1; k <= 5; k += 2) {
45653       GemmMicrokernelTester()
45654         .mr(1)
45655         .nr(2)
45656         .kr(1)
45657         .sr(1)
45658         .m(1)
45659         .n(n)
45660         .k(k)
45661         .cn_stride(5)
45662         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45663     }
45664   }
45665 }
45666 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)45667 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
45668   for (uint32_t n = 4; n <= 6; n += 2) {
45669     for (size_t k = 1; k <= 5; k += 2) {
45670       for (uint32_t m = 1; m <= 1; m++) {
45671         GemmMicrokernelTester()
45672           .mr(1)
45673           .nr(2)
45674           .kr(1)
45675           .sr(1)
45676           .m(m)
45677           .n(n)
45678           .k(k)
45679           .iterations(1)
45680           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45681       }
45682     }
45683   }
45684 }
45685 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel)45686 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
45687   for (size_t k = 1; k <= 5; k += 2) {
45688     GemmMicrokernelTester()
45689       .mr(1)
45690       .nr(2)
45691       .kr(1)
45692       .sr(1)
45693       .m(1)
45694       .n(2)
45695       .k(k)
45696       .ks(3)
45697       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45698   }
45699 }
45700 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel_subtile)45701 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
45702   for (size_t k = 1; k <= 5; k += 2) {
45703     for (uint32_t n = 1; n <= 2; n++) {
45704       for (uint32_t m = 1; m <= 1; m++) {
45705         GemmMicrokernelTester()
45706           .mr(1)
45707           .nr(2)
45708           .kr(1)
45709           .sr(1)
45710           .m(m)
45711           .n(n)
45712           .k(k)
45713           .ks(3)
45714           .iterations(1)
45715           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45716       }
45717     }
45718   }
45719 }
45720 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_small_kernel)45721 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
45722   for (uint32_t n = 3; n < 4; n++) {
45723     for (size_t k = 1; k <= 5; k += 2) {
45724       GemmMicrokernelTester()
45725         .mr(1)
45726         .nr(2)
45727         .kr(1)
45728         .sr(1)
45729         .m(1)
45730         .n(n)
45731         .k(k)
45732         .ks(3)
45733         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45734     }
45735   }
45736 }
45737 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_small_kernel)45738 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
45739   for (uint32_t n = 4; n <= 6; n += 2) {
45740     for (size_t k = 1; k <= 5; k += 2) {
45741       GemmMicrokernelTester()
45742         .mr(1)
45743         .nr(2)
45744         .kr(1)
45745         .sr(1)
45746         .m(1)
45747         .n(n)
45748         .k(k)
45749         .ks(3)
45750         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45751     }
45752   }
45753 }
45754 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)45755 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
45756   for (size_t k = 1; k <= 5; k += 2) {
45757     for (uint32_t n = 1; n <= 2; n++) {
45758       for (uint32_t m = 1; m <= 1; m++) {
45759         GemmMicrokernelTester()
45760           .mr(1)
45761           .nr(2)
45762           .kr(1)
45763           .sr(1)
45764           .m(m)
45765           .n(n)
45766           .k(k)
45767           .cm_stride(5)
45768           .iterations(1)
45769           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45770       }
45771     }
45772   }
45773 }
45774 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,a_offset)45775 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
45776   for (size_t k = 1; k <= 5; k += 2) {
45777     GemmMicrokernelTester()
45778       .mr(1)
45779       .nr(2)
45780       .kr(1)
45781       .sr(1)
45782       .m(1)
45783       .n(2)
45784       .k(k)
45785       .ks(3)
45786       .a_offset(7)
45787       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45788   }
45789 }
45790 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,zero)45791 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
45792   for (size_t k = 1; k <= 5; k += 2) {
45793     for (uint32_t mz = 0; mz < 1; mz++) {
45794       GemmMicrokernelTester()
45795         .mr(1)
45796         .nr(2)
45797         .kr(1)
45798         .sr(1)
45799         .m(1)
45800         .n(2)
45801         .k(k)
45802         .ks(3)
45803         .a_offset(7)
45804         .zero_index(mz)
45805         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45806     }
45807   }
45808 }
45809 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)45810 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
45811   GemmMicrokernelTester()
45812     .mr(1)
45813     .nr(2)
45814     .kr(1)
45815     .sr(1)
45816     .m(1)
45817     .n(2)
45818     .k(1)
45819     .qmin(128)
45820     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45821 }
45822 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)45823 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
45824   GemmMicrokernelTester()
45825     .mr(1)
45826     .nr(2)
45827     .kr(1)
45828     .sr(1)
45829     .m(1)
45830     .n(2)
45831     .k(1)
45832     .qmax(128)
45833     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45834 }
45835 
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)45836 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
45837   GemmMicrokernelTester()
45838     .mr(1)
45839     .nr(2)
45840     .kr(1)
45841     .sr(1)
45842     .m(1)
45843     .n(2)
45844     .k(1)
45845     .cm_stride(5)
45846     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45847 }
45848 
45849 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)45850 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
45851   GemmMicrokernelTester()
45852     .mr(1)
45853     .nr(4)
45854     .kr(1)
45855     .sr(1)
45856     .m(1)
45857     .n(4)
45858     .k(1)
45859     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45860 }
45861 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)45862 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
45863   GemmMicrokernelTester()
45864     .mr(1)
45865     .nr(4)
45866     .kr(1)
45867     .sr(1)
45868     .m(1)
45869     .n(4)
45870     .k(1)
45871     .cn_stride(7)
45872     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45873 }
45874 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)45875 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
45876   for (uint32_t n = 1; n <= 4; n++) {
45877     for (uint32_t m = 1; m <= 1; m++) {
45878       GemmMicrokernelTester()
45879         .mr(1)
45880         .nr(4)
45881         .kr(1)
45882         .sr(1)
45883         .m(m)
45884         .n(n)
45885         .k(1)
45886         .iterations(1)
45887         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45888     }
45889   }
45890 }
45891 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)45892 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
45893   for (uint32_t m = 1; m <= 1; m++) {
45894     GemmMicrokernelTester()
45895       .mr(1)
45896       .nr(4)
45897       .kr(1)
45898       .sr(1)
45899       .m(m)
45900       .n(4)
45901       .k(1)
45902       .iterations(1)
45903       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45904   }
45905 }
45906 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)45907 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
45908   for (uint32_t n = 1; n <= 4; n++) {
45909     GemmMicrokernelTester()
45910       .mr(1)
45911       .nr(4)
45912       .kr(1)
45913       .sr(1)
45914       .m(1)
45915       .n(n)
45916       .k(1)
45917       .iterations(1)
45918       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45919   }
45920 }
45921 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)45922 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
45923   for (size_t k = 2; k < 10; k++) {
45924     GemmMicrokernelTester()
45925       .mr(1)
45926       .nr(4)
45927       .kr(1)
45928       .sr(1)
45929       .m(1)
45930       .n(4)
45931       .k(k)
45932       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45933   }
45934 }
45935 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)45936 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
45937   for (size_t k = 2; k < 10; k++) {
45938     for (uint32_t n = 1; n <= 4; n++) {
45939       for (uint32_t m = 1; m <= 1; m++) {
45940         GemmMicrokernelTester()
45941           .mr(1)
45942           .nr(4)
45943           .kr(1)
45944           .sr(1)
45945           .m(m)
45946           .n(n)
45947           .k(k)
45948           .iterations(1)
45949           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45950       }
45951     }
45952   }
45953 }
45954 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)45955 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
45956   for (uint32_t n = 5; n < 8; n++) {
45957     for (size_t k = 1; k <= 5; k += 2) {
45958       GemmMicrokernelTester()
45959         .mr(1)
45960         .nr(4)
45961         .kr(1)
45962         .sr(1)
45963         .m(1)
45964         .n(n)
45965         .k(k)
45966         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45967     }
45968   }
45969 }
45970 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)45971 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
45972   for (uint32_t n = 5; n < 8; n++) {
45973     for (size_t k = 1; k <= 5; k += 2) {
45974       GemmMicrokernelTester()
45975         .mr(1)
45976         .nr(4)
45977         .kr(1)
45978         .sr(1)
45979         .m(1)
45980         .n(n)
45981         .k(k)
45982         .cn_stride(7)
45983         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45984     }
45985   }
45986 }
45987 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)45988 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
45989   for (uint32_t n = 5; n < 8; n++) {
45990     for (size_t k = 1; k <= 5; k += 2) {
45991       for (uint32_t m = 1; m <= 1; m++) {
45992         GemmMicrokernelTester()
45993           .mr(1)
45994           .nr(4)
45995           .kr(1)
45996           .sr(1)
45997           .m(m)
45998           .n(n)
45999           .k(k)
46000           .iterations(1)
46001           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46002       }
46003     }
46004   }
46005 }
46006 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)46007 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
46008   for (uint32_t n = 8; n <= 12; n += 4) {
46009     for (size_t k = 1; k <= 5; k += 2) {
46010       GemmMicrokernelTester()
46011         .mr(1)
46012         .nr(4)
46013         .kr(1)
46014         .sr(1)
46015         .m(1)
46016         .n(n)
46017         .k(k)
46018         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46019     }
46020   }
46021 }
46022 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)46023 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
46024   for (uint32_t n = 8; n <= 12; n += 4) {
46025     for (size_t k = 1; k <= 5; k += 2) {
46026       GemmMicrokernelTester()
46027         .mr(1)
46028         .nr(4)
46029         .kr(1)
46030         .sr(1)
46031         .m(1)
46032         .n(n)
46033         .k(k)
46034         .cn_stride(7)
46035         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46036     }
46037   }
46038 }
46039 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)46040 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
46041   for (uint32_t n = 8; n <= 12; n += 4) {
46042     for (size_t k = 1; k <= 5; k += 2) {
46043       for (uint32_t m = 1; m <= 1; m++) {
46044         GemmMicrokernelTester()
46045           .mr(1)
46046           .nr(4)
46047           .kr(1)
46048           .sr(1)
46049           .m(m)
46050           .n(n)
46051           .k(k)
46052           .iterations(1)
46053           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46054       }
46055     }
46056   }
46057 }
46058 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)46059 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
46060   for (size_t k = 1; k <= 5; k += 2) {
46061     GemmMicrokernelTester()
46062       .mr(1)
46063       .nr(4)
46064       .kr(1)
46065       .sr(1)
46066       .m(1)
46067       .n(4)
46068       .k(k)
46069       .ks(3)
46070       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46071   }
46072 }
46073 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)46074 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
46075   for (size_t k = 1; k <= 5; k += 2) {
46076     for (uint32_t n = 1; n <= 4; n++) {
46077       for (uint32_t m = 1; m <= 1; m++) {
46078         GemmMicrokernelTester()
46079           .mr(1)
46080           .nr(4)
46081           .kr(1)
46082           .sr(1)
46083           .m(m)
46084           .n(n)
46085           .k(k)
46086           .ks(3)
46087           .iterations(1)
46088           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46089       }
46090     }
46091   }
46092 }
46093 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)46094 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
46095   for (uint32_t n = 5; n < 8; n++) {
46096     for (size_t k = 1; k <= 5; k += 2) {
46097       GemmMicrokernelTester()
46098         .mr(1)
46099         .nr(4)
46100         .kr(1)
46101         .sr(1)
46102         .m(1)
46103         .n(n)
46104         .k(k)
46105         .ks(3)
46106         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46107     }
46108   }
46109 }
46110 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)46111 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
46112   for (uint32_t n = 8; n <= 12; n += 4) {
46113     for (size_t k = 1; k <= 5; k += 2) {
46114       GemmMicrokernelTester()
46115         .mr(1)
46116         .nr(4)
46117         .kr(1)
46118         .sr(1)
46119         .m(1)
46120         .n(n)
46121         .k(k)
46122         .ks(3)
46123         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46124     }
46125   }
46126 }
46127 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)46128 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
46129   for (size_t k = 1; k <= 5; k += 2) {
46130     for (uint32_t n = 1; n <= 4; n++) {
46131       for (uint32_t m = 1; m <= 1; m++) {
46132         GemmMicrokernelTester()
46133           .mr(1)
46134           .nr(4)
46135           .kr(1)
46136           .sr(1)
46137           .m(m)
46138           .n(n)
46139           .k(k)
46140           .cm_stride(7)
46141           .iterations(1)
46142           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46143       }
46144     }
46145   }
46146 }
46147 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)46148 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
46149   for (size_t k = 1; k <= 5; k += 2) {
46150     GemmMicrokernelTester()
46151       .mr(1)
46152       .nr(4)
46153       .kr(1)
46154       .sr(1)
46155       .m(1)
46156       .n(4)
46157       .k(k)
46158       .ks(3)
46159       .a_offset(7)
46160       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46161   }
46162 }
46163 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)46164 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
46165   for (size_t k = 1; k <= 5; k += 2) {
46166     for (uint32_t mz = 0; mz < 1; mz++) {
46167       GemmMicrokernelTester()
46168         .mr(1)
46169         .nr(4)
46170         .kr(1)
46171         .sr(1)
46172         .m(1)
46173         .n(4)
46174         .k(k)
46175         .ks(3)
46176         .a_offset(7)
46177         .zero_index(mz)
46178         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46179     }
46180   }
46181 }
46182 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)46183 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
46184   GemmMicrokernelTester()
46185     .mr(1)
46186     .nr(4)
46187     .kr(1)
46188     .sr(1)
46189     .m(1)
46190     .n(4)
46191     .k(1)
46192     .qmin(128)
46193     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46194 }
46195 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)46196 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
46197   GemmMicrokernelTester()
46198     .mr(1)
46199     .nr(4)
46200     .kr(1)
46201     .sr(1)
46202     .m(1)
46203     .n(4)
46204     .k(1)
46205     .qmax(128)
46206     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46207 }
46208 
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)46209 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
46210   GemmMicrokernelTester()
46211     .mr(1)
46212     .nr(4)
46213     .kr(1)
46214     .sr(1)
46215     .m(1)
46216     .n(4)
46217     .k(1)
46218     .cm_stride(7)
46219     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46220 }
46221 
46222 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)46223 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
46224   GemmMicrokernelTester()
46225     .mr(2)
46226     .nr(2)
46227     .kr(1)
46228     .sr(1)
46229     .m(2)
46230     .n(2)
46231     .k(1)
46232     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46233 }
46234 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)46235 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
46236   GemmMicrokernelTester()
46237     .mr(2)
46238     .nr(2)
46239     .kr(1)
46240     .sr(1)
46241     .m(2)
46242     .n(2)
46243     .k(1)
46244     .cn_stride(5)
46245     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46246 }
46247 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)46248 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
46249   for (uint32_t n = 1; n <= 2; n++) {
46250     for (uint32_t m = 1; m <= 2; m++) {
46251       GemmMicrokernelTester()
46252         .mr(2)
46253         .nr(2)
46254         .kr(1)
46255         .sr(1)
46256         .m(m)
46257         .n(n)
46258         .k(1)
46259         .iterations(1)
46260         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46261     }
46262   }
46263 }
46264 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)46265 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
46266   for (uint32_t m = 1; m <= 2; m++) {
46267     GemmMicrokernelTester()
46268       .mr(2)
46269       .nr(2)
46270       .kr(1)
46271       .sr(1)
46272       .m(m)
46273       .n(2)
46274       .k(1)
46275       .iterations(1)
46276       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46277   }
46278 }
46279 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)46280 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
46281   for (uint32_t n = 1; n <= 2; n++) {
46282     GemmMicrokernelTester()
46283       .mr(2)
46284       .nr(2)
46285       .kr(1)
46286       .sr(1)
46287       .m(2)
46288       .n(n)
46289       .k(1)
46290       .iterations(1)
46291       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46292   }
46293 }
46294 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)46295 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
46296   for (size_t k = 2; k < 10; k++) {
46297     GemmMicrokernelTester()
46298       .mr(2)
46299       .nr(2)
46300       .kr(1)
46301       .sr(1)
46302       .m(2)
46303       .n(2)
46304       .k(k)
46305       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46306   }
46307 }
46308 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)46309 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
46310   for (size_t k = 2; k < 10; k++) {
46311     for (uint32_t n = 1; n <= 2; n++) {
46312       for (uint32_t m = 1; m <= 2; m++) {
46313         GemmMicrokernelTester()
46314           .mr(2)
46315           .nr(2)
46316           .kr(1)
46317           .sr(1)
46318           .m(m)
46319           .n(n)
46320           .k(k)
46321           .iterations(1)
46322           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46323       }
46324     }
46325   }
46326 }
46327 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)46328 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
46329   for (uint32_t n = 3; n < 4; n++) {
46330     for (size_t k = 1; k <= 5; k += 2) {
46331       GemmMicrokernelTester()
46332         .mr(2)
46333         .nr(2)
46334         .kr(1)
46335         .sr(1)
46336         .m(2)
46337         .n(n)
46338         .k(k)
46339         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46340     }
46341   }
46342 }
46343 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)46344 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
46345   for (uint32_t n = 3; n < 4; n++) {
46346     for (size_t k = 1; k <= 5; k += 2) {
46347       GemmMicrokernelTester()
46348         .mr(2)
46349         .nr(2)
46350         .kr(1)
46351         .sr(1)
46352         .m(2)
46353         .n(n)
46354         .k(k)
46355         .cn_stride(5)
46356         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46357     }
46358   }
46359 }
46360 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)46361 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
46362   for (uint32_t n = 3; n < 4; n++) {
46363     for (size_t k = 1; k <= 5; k += 2) {
46364       for (uint32_t m = 1; m <= 2; m++) {
46365         GemmMicrokernelTester()
46366           .mr(2)
46367           .nr(2)
46368           .kr(1)
46369           .sr(1)
46370           .m(m)
46371           .n(n)
46372           .k(k)
46373           .iterations(1)
46374           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46375       }
46376     }
46377   }
46378 }
46379 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)46380 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
46381   for (uint32_t n = 4; n <= 6; n += 2) {
46382     for (size_t k = 1; k <= 5; k += 2) {
46383       GemmMicrokernelTester()
46384         .mr(2)
46385         .nr(2)
46386         .kr(1)
46387         .sr(1)
46388         .m(2)
46389         .n(n)
46390         .k(k)
46391         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46392     }
46393   }
46394 }
46395 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)46396 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
46397   for (uint32_t n = 4; n <= 6; n += 2) {
46398     for (size_t k = 1; k <= 5; k += 2) {
46399       GemmMicrokernelTester()
46400         .mr(2)
46401         .nr(2)
46402         .kr(1)
46403         .sr(1)
46404         .m(2)
46405         .n(n)
46406         .k(k)
46407         .cn_stride(5)
46408         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46409     }
46410   }
46411 }
46412 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)46413 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
46414   for (uint32_t n = 4; n <= 6; n += 2) {
46415     for (size_t k = 1; k <= 5; k += 2) {
46416       for (uint32_t m = 1; m <= 2; m++) {
46417         GemmMicrokernelTester()
46418           .mr(2)
46419           .nr(2)
46420           .kr(1)
46421           .sr(1)
46422           .m(m)
46423           .n(n)
46424           .k(k)
46425           .iterations(1)
46426           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46427       }
46428     }
46429   }
46430 }
46431 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel)46432 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
46433   for (size_t k = 1; k <= 5; k += 2) {
46434     GemmMicrokernelTester()
46435       .mr(2)
46436       .nr(2)
46437       .kr(1)
46438       .sr(1)
46439       .m(2)
46440       .n(2)
46441       .k(k)
46442       .ks(3)
46443       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46444   }
46445 }
46446 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel_subtile)46447 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
46448   for (size_t k = 1; k <= 5; k += 2) {
46449     for (uint32_t n = 1; n <= 2; n++) {
46450       for (uint32_t m = 1; m <= 2; m++) {
46451         GemmMicrokernelTester()
46452           .mr(2)
46453           .nr(2)
46454           .kr(1)
46455           .sr(1)
46456           .m(m)
46457           .n(n)
46458           .k(k)
46459           .ks(3)
46460           .iterations(1)
46461           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46462       }
46463     }
46464   }
46465 }
46466 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_small_kernel)46467 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
46468   for (uint32_t n = 3; n < 4; n++) {
46469     for (size_t k = 1; k <= 5; k += 2) {
46470       GemmMicrokernelTester()
46471         .mr(2)
46472         .nr(2)
46473         .kr(1)
46474         .sr(1)
46475         .m(2)
46476         .n(n)
46477         .k(k)
46478         .ks(3)
46479         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46480     }
46481   }
46482 }
46483 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_small_kernel)46484 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
46485   for (uint32_t n = 4; n <= 6; n += 2) {
46486     for (size_t k = 1; k <= 5; k += 2) {
46487       GemmMicrokernelTester()
46488         .mr(2)
46489         .nr(2)
46490         .kr(1)
46491         .sr(1)
46492         .m(2)
46493         .n(n)
46494         .k(k)
46495         .ks(3)
46496         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46497     }
46498   }
46499 }
46500 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)46501 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
46502   for (size_t k = 1; k <= 5; k += 2) {
46503     for (uint32_t n = 1; n <= 2; n++) {
46504       for (uint32_t m = 1; m <= 2; m++) {
46505         GemmMicrokernelTester()
46506           .mr(2)
46507           .nr(2)
46508           .kr(1)
46509           .sr(1)
46510           .m(m)
46511           .n(n)
46512           .k(k)
46513           .cm_stride(5)
46514           .iterations(1)
46515           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46516       }
46517     }
46518   }
46519 }
46520 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,a_offset)46521 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
46522   for (size_t k = 1; k <= 5; k += 2) {
46523     GemmMicrokernelTester()
46524       .mr(2)
46525       .nr(2)
46526       .kr(1)
46527       .sr(1)
46528       .m(2)
46529       .n(2)
46530       .k(k)
46531       .ks(3)
46532       .a_offset(13)
46533       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46534   }
46535 }
46536 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,zero)46537 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
46538   for (size_t k = 1; k <= 5; k += 2) {
46539     for (uint32_t mz = 0; mz < 2; mz++) {
46540       GemmMicrokernelTester()
46541         .mr(2)
46542         .nr(2)
46543         .kr(1)
46544         .sr(1)
46545         .m(2)
46546         .n(2)
46547         .k(k)
46548         .ks(3)
46549         .a_offset(13)
46550         .zero_index(mz)
46551         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46552     }
46553   }
46554 }
46555 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)46556 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
46557   GemmMicrokernelTester()
46558     .mr(2)
46559     .nr(2)
46560     .kr(1)
46561     .sr(1)
46562     .m(2)
46563     .n(2)
46564     .k(1)
46565     .qmin(128)
46566     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46567 }
46568 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)46569 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
46570   GemmMicrokernelTester()
46571     .mr(2)
46572     .nr(2)
46573     .kr(1)
46574     .sr(1)
46575     .m(2)
46576     .n(2)
46577     .k(1)
46578     .qmax(128)
46579     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46580 }
46581 
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)46582 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
46583   GemmMicrokernelTester()
46584     .mr(2)
46585     .nr(2)
46586     .kr(1)
46587     .sr(1)
46588     .m(2)
46589     .n(2)
46590     .k(1)
46591     .cm_stride(5)
46592     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46593 }
46594 
46595 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)46596 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
46597   GemmMicrokernelTester()
46598     .mr(2)
46599     .nr(4)
46600     .kr(1)
46601     .sr(1)
46602     .m(2)
46603     .n(4)
46604     .k(1)
46605     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46606 }
46607 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)46608 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
46609   GemmMicrokernelTester()
46610     .mr(2)
46611     .nr(4)
46612     .kr(1)
46613     .sr(1)
46614     .m(2)
46615     .n(4)
46616     .k(1)
46617     .cn_stride(7)
46618     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46619 }
46620 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)46621 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
46622   for (uint32_t n = 1; n <= 4; n++) {
46623     for (uint32_t m = 1; m <= 2; m++) {
46624       GemmMicrokernelTester()
46625         .mr(2)
46626         .nr(4)
46627         .kr(1)
46628         .sr(1)
46629         .m(m)
46630         .n(n)
46631         .k(1)
46632         .iterations(1)
46633         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46634     }
46635   }
46636 }
46637 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)46638 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
46639   for (uint32_t m = 1; m <= 2; m++) {
46640     GemmMicrokernelTester()
46641       .mr(2)
46642       .nr(4)
46643       .kr(1)
46644       .sr(1)
46645       .m(m)
46646       .n(4)
46647       .k(1)
46648       .iterations(1)
46649       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46650   }
46651 }
46652 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)46653 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
46654   for (uint32_t n = 1; n <= 4; n++) {
46655     GemmMicrokernelTester()
46656       .mr(2)
46657       .nr(4)
46658       .kr(1)
46659       .sr(1)
46660       .m(2)
46661       .n(n)
46662       .k(1)
46663       .iterations(1)
46664       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46665   }
46666 }
46667 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)46668 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
46669   for (size_t k = 2; k < 10; k++) {
46670     GemmMicrokernelTester()
46671       .mr(2)
46672       .nr(4)
46673       .kr(1)
46674       .sr(1)
46675       .m(2)
46676       .n(4)
46677       .k(k)
46678       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46679   }
46680 }
46681 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)46682 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
46683   for (size_t k = 2; k < 10; k++) {
46684     for (uint32_t n = 1; n <= 4; n++) {
46685       for (uint32_t m = 1; m <= 2; m++) {
46686         GemmMicrokernelTester()
46687           .mr(2)
46688           .nr(4)
46689           .kr(1)
46690           .sr(1)
46691           .m(m)
46692           .n(n)
46693           .k(k)
46694           .iterations(1)
46695           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46696       }
46697     }
46698   }
46699 }
46700 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)46701 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
46702   for (uint32_t n = 5; n < 8; n++) {
46703     for (size_t k = 1; k <= 5; k += 2) {
46704       GemmMicrokernelTester()
46705         .mr(2)
46706         .nr(4)
46707         .kr(1)
46708         .sr(1)
46709         .m(2)
46710         .n(n)
46711         .k(k)
46712         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46713     }
46714   }
46715 }
46716 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)46717 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
46718   for (uint32_t n = 5; n < 8; n++) {
46719     for (size_t k = 1; k <= 5; k += 2) {
46720       GemmMicrokernelTester()
46721         .mr(2)
46722         .nr(4)
46723         .kr(1)
46724         .sr(1)
46725         .m(2)
46726         .n(n)
46727         .k(k)
46728         .cn_stride(7)
46729         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46730     }
46731   }
46732 }
46733 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)46734 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
46735   for (uint32_t n = 5; n < 8; n++) {
46736     for (size_t k = 1; k <= 5; k += 2) {
46737       for (uint32_t m = 1; m <= 2; m++) {
46738         GemmMicrokernelTester()
46739           .mr(2)
46740           .nr(4)
46741           .kr(1)
46742           .sr(1)
46743           .m(m)
46744           .n(n)
46745           .k(k)
46746           .iterations(1)
46747           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46748       }
46749     }
46750   }
46751 }
46752 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)46753 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
46754   for (uint32_t n = 8; n <= 12; n += 4) {
46755     for (size_t k = 1; k <= 5; k += 2) {
46756       GemmMicrokernelTester()
46757         .mr(2)
46758         .nr(4)
46759         .kr(1)
46760         .sr(1)
46761         .m(2)
46762         .n(n)
46763         .k(k)
46764         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46765     }
46766   }
46767 }
46768 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)46769 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
46770   for (uint32_t n = 8; n <= 12; n += 4) {
46771     for (size_t k = 1; k <= 5; k += 2) {
46772       GemmMicrokernelTester()
46773         .mr(2)
46774         .nr(4)
46775         .kr(1)
46776         .sr(1)
46777         .m(2)
46778         .n(n)
46779         .k(k)
46780         .cn_stride(7)
46781         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46782     }
46783   }
46784 }
46785 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)46786 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
46787   for (uint32_t n = 8; n <= 12; n += 4) {
46788     for (size_t k = 1; k <= 5; k += 2) {
46789       for (uint32_t m = 1; m <= 2; m++) {
46790         GemmMicrokernelTester()
46791           .mr(2)
46792           .nr(4)
46793           .kr(1)
46794           .sr(1)
46795           .m(m)
46796           .n(n)
46797           .k(k)
46798           .iterations(1)
46799           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46800       }
46801     }
46802   }
46803 }
46804 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)46805 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
46806   for (size_t k = 1; k <= 5; k += 2) {
46807     GemmMicrokernelTester()
46808       .mr(2)
46809       .nr(4)
46810       .kr(1)
46811       .sr(1)
46812       .m(2)
46813       .n(4)
46814       .k(k)
46815       .ks(3)
46816       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46817   }
46818 }
46819 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)46820 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
46821   for (size_t k = 1; k <= 5; k += 2) {
46822     for (uint32_t n = 1; n <= 4; n++) {
46823       for (uint32_t m = 1; m <= 2; m++) {
46824         GemmMicrokernelTester()
46825           .mr(2)
46826           .nr(4)
46827           .kr(1)
46828           .sr(1)
46829           .m(m)
46830           .n(n)
46831           .k(k)
46832           .ks(3)
46833           .iterations(1)
46834           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46835       }
46836     }
46837   }
46838 }
46839 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)46840 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
46841   for (uint32_t n = 5; n < 8; n++) {
46842     for (size_t k = 1; k <= 5; k += 2) {
46843       GemmMicrokernelTester()
46844         .mr(2)
46845         .nr(4)
46846         .kr(1)
46847         .sr(1)
46848         .m(2)
46849         .n(n)
46850         .k(k)
46851         .ks(3)
46852         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46853     }
46854   }
46855 }
46856 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)46857 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
46858   for (uint32_t n = 8; n <= 12; n += 4) {
46859     for (size_t k = 1; k <= 5; k += 2) {
46860       GemmMicrokernelTester()
46861         .mr(2)
46862         .nr(4)
46863         .kr(1)
46864         .sr(1)
46865         .m(2)
46866         .n(n)
46867         .k(k)
46868         .ks(3)
46869         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46870     }
46871   }
46872 }
46873 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)46874 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
46875   for (size_t k = 1; k <= 5; k += 2) {
46876     for (uint32_t n = 1; n <= 4; n++) {
46877       for (uint32_t m = 1; m <= 2; m++) {
46878         GemmMicrokernelTester()
46879           .mr(2)
46880           .nr(4)
46881           .kr(1)
46882           .sr(1)
46883           .m(m)
46884           .n(n)
46885           .k(k)
46886           .cm_stride(7)
46887           .iterations(1)
46888           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46889       }
46890     }
46891   }
46892 }
46893 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)46894 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
46895   for (size_t k = 1; k <= 5; k += 2) {
46896     GemmMicrokernelTester()
46897       .mr(2)
46898       .nr(4)
46899       .kr(1)
46900       .sr(1)
46901       .m(2)
46902       .n(4)
46903       .k(k)
46904       .ks(3)
46905       .a_offset(13)
46906       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46907   }
46908 }
46909 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)46910 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
46911   for (size_t k = 1; k <= 5; k += 2) {
46912     for (uint32_t mz = 0; mz < 2; mz++) {
46913       GemmMicrokernelTester()
46914         .mr(2)
46915         .nr(4)
46916         .kr(1)
46917         .sr(1)
46918         .m(2)
46919         .n(4)
46920         .k(k)
46921         .ks(3)
46922         .a_offset(13)
46923         .zero_index(mz)
46924         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46925     }
46926   }
46927 }
46928 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)46929 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
46930   GemmMicrokernelTester()
46931     .mr(2)
46932     .nr(4)
46933     .kr(1)
46934     .sr(1)
46935     .m(2)
46936     .n(4)
46937     .k(1)
46938     .qmin(128)
46939     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46940 }
46941 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)46942 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
46943   GemmMicrokernelTester()
46944     .mr(2)
46945     .nr(4)
46946     .kr(1)
46947     .sr(1)
46948     .m(2)
46949     .n(4)
46950     .k(1)
46951     .qmax(128)
46952     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46953 }
46954 
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)46955 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
46956   GemmMicrokernelTester()
46957     .mr(2)
46958     .nr(4)
46959     .kr(1)
46960     .sr(1)
46961     .m(2)
46962     .n(4)
46963     .k(1)
46964     .cm_stride(7)
46965     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46966 }
46967 
46968 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)46969 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
46970   GemmMicrokernelTester()
46971     .mr(3)
46972     .nr(2)
46973     .kr(1)
46974     .sr(1)
46975     .m(3)
46976     .n(2)
46977     .k(1)
46978     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46979 }
46980 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)46981 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
46982   GemmMicrokernelTester()
46983     .mr(3)
46984     .nr(2)
46985     .kr(1)
46986     .sr(1)
46987     .m(3)
46988     .n(2)
46989     .k(1)
46990     .cn_stride(5)
46991     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46992 }
46993 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)46994 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
46995   for (uint32_t n = 1; n <= 2; n++) {
46996     for (uint32_t m = 1; m <= 3; m++) {
46997       GemmMicrokernelTester()
46998         .mr(3)
46999         .nr(2)
47000         .kr(1)
47001         .sr(1)
47002         .m(m)
47003         .n(n)
47004         .k(1)
47005         .iterations(1)
47006         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47007     }
47008   }
47009 }
47010 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)47011 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47012   for (uint32_t m = 1; m <= 3; m++) {
47013     GemmMicrokernelTester()
47014       .mr(3)
47015       .nr(2)
47016       .kr(1)
47017       .sr(1)
47018       .m(m)
47019       .n(2)
47020       .k(1)
47021       .iterations(1)
47022       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47023   }
47024 }
47025 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)47026 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47027   for (uint32_t n = 1; n <= 2; n++) {
47028     GemmMicrokernelTester()
47029       .mr(3)
47030       .nr(2)
47031       .kr(1)
47032       .sr(1)
47033       .m(3)
47034       .n(n)
47035       .k(1)
47036       .iterations(1)
47037       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47038   }
47039 }
47040 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)47041 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
47042   for (size_t k = 2; k < 10; k++) {
47043     GemmMicrokernelTester()
47044       .mr(3)
47045       .nr(2)
47046       .kr(1)
47047       .sr(1)
47048       .m(3)
47049       .n(2)
47050       .k(k)
47051       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47052   }
47053 }
47054 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)47055 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
47056   for (size_t k = 2; k < 10; k++) {
47057     for (uint32_t n = 1; n <= 2; n++) {
47058       for (uint32_t m = 1; m <= 3; m++) {
47059         GemmMicrokernelTester()
47060           .mr(3)
47061           .nr(2)
47062           .kr(1)
47063           .sr(1)
47064           .m(m)
47065           .n(n)
47066           .k(k)
47067           .iterations(1)
47068           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47069       }
47070     }
47071   }
47072 }
47073 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)47074 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
47075   for (uint32_t n = 3; n < 4; n++) {
47076     for (size_t k = 1; k <= 5; k += 2) {
47077       GemmMicrokernelTester()
47078         .mr(3)
47079         .nr(2)
47080         .kr(1)
47081         .sr(1)
47082         .m(3)
47083         .n(n)
47084         .k(k)
47085         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47086     }
47087   }
47088 }
47089 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)47090 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
47091   for (uint32_t n = 3; n < 4; n++) {
47092     for (size_t k = 1; k <= 5; k += 2) {
47093       GemmMicrokernelTester()
47094         .mr(3)
47095         .nr(2)
47096         .kr(1)
47097         .sr(1)
47098         .m(3)
47099         .n(n)
47100         .k(k)
47101         .cn_stride(5)
47102         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47103     }
47104   }
47105 }
47106 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)47107 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
47108   for (uint32_t n = 3; n < 4; n++) {
47109     for (size_t k = 1; k <= 5; k += 2) {
47110       for (uint32_t m = 1; m <= 3; m++) {
47111         GemmMicrokernelTester()
47112           .mr(3)
47113           .nr(2)
47114           .kr(1)
47115           .sr(1)
47116           .m(m)
47117           .n(n)
47118           .k(k)
47119           .iterations(1)
47120           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47121       }
47122     }
47123   }
47124 }
47125 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)47126 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
47127   for (uint32_t n = 4; n <= 6; n += 2) {
47128     for (size_t k = 1; k <= 5; k += 2) {
47129       GemmMicrokernelTester()
47130         .mr(3)
47131         .nr(2)
47132         .kr(1)
47133         .sr(1)
47134         .m(3)
47135         .n(n)
47136         .k(k)
47137         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47138     }
47139   }
47140 }
47141 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)47142 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
47143   for (uint32_t n = 4; n <= 6; n += 2) {
47144     for (size_t k = 1; k <= 5; k += 2) {
47145       GemmMicrokernelTester()
47146         .mr(3)
47147         .nr(2)
47148         .kr(1)
47149         .sr(1)
47150         .m(3)
47151         .n(n)
47152         .k(k)
47153         .cn_stride(5)
47154         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47155     }
47156   }
47157 }
47158 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)47159 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
47160   for (uint32_t n = 4; n <= 6; n += 2) {
47161     for (size_t k = 1; k <= 5; k += 2) {
47162       for (uint32_t m = 1; m <= 3; m++) {
47163         GemmMicrokernelTester()
47164           .mr(3)
47165           .nr(2)
47166           .kr(1)
47167           .sr(1)
47168           .m(m)
47169           .n(n)
47170           .k(k)
47171           .iterations(1)
47172           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47173       }
47174     }
47175   }
47176 }
47177 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)47178 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
47179   for (size_t k = 1; k <= 5; k += 2) {
47180     GemmMicrokernelTester()
47181       .mr(3)
47182       .nr(2)
47183       .kr(1)
47184       .sr(1)
47185       .m(3)
47186       .n(2)
47187       .k(k)
47188       .ks(3)
47189       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47190   }
47191 }
47192 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)47193 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
47194   for (size_t k = 1; k <= 5; k += 2) {
47195     for (uint32_t n = 1; n <= 2; n++) {
47196       for (uint32_t m = 1; m <= 3; m++) {
47197         GemmMicrokernelTester()
47198           .mr(3)
47199           .nr(2)
47200           .kr(1)
47201           .sr(1)
47202           .m(m)
47203           .n(n)
47204           .k(k)
47205           .ks(3)
47206           .iterations(1)
47207           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47208       }
47209     }
47210   }
47211 }
47212 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)47213 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
47214   for (uint32_t n = 3; n < 4; n++) {
47215     for (size_t k = 1; k <= 5; k += 2) {
47216       GemmMicrokernelTester()
47217         .mr(3)
47218         .nr(2)
47219         .kr(1)
47220         .sr(1)
47221         .m(3)
47222         .n(n)
47223         .k(k)
47224         .ks(3)
47225         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47226     }
47227   }
47228 }
47229 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)47230 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
47231   for (uint32_t n = 4; n <= 6; n += 2) {
47232     for (size_t k = 1; k <= 5; k += 2) {
47233       GemmMicrokernelTester()
47234         .mr(3)
47235         .nr(2)
47236         .kr(1)
47237         .sr(1)
47238         .m(3)
47239         .n(n)
47240         .k(k)
47241         .ks(3)
47242         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47243     }
47244   }
47245 }
47246 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)47247 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
47248   for (size_t k = 1; k <= 5; k += 2) {
47249     for (uint32_t n = 1; n <= 2; n++) {
47250       for (uint32_t m = 1; m <= 3; m++) {
47251         GemmMicrokernelTester()
47252           .mr(3)
47253           .nr(2)
47254           .kr(1)
47255           .sr(1)
47256           .m(m)
47257           .n(n)
47258           .k(k)
47259           .cm_stride(5)
47260           .iterations(1)
47261           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47262       }
47263     }
47264   }
47265 }
47266 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)47267 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
47268   for (size_t k = 1; k <= 5; k += 2) {
47269     GemmMicrokernelTester()
47270       .mr(3)
47271       .nr(2)
47272       .kr(1)
47273       .sr(1)
47274       .m(3)
47275       .n(2)
47276       .k(k)
47277       .ks(3)
47278       .a_offset(17)
47279       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47280   }
47281 }
47282 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)47283 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
47284   for (size_t k = 1; k <= 5; k += 2) {
47285     for (uint32_t mz = 0; mz < 3; mz++) {
47286       GemmMicrokernelTester()
47287         .mr(3)
47288         .nr(2)
47289         .kr(1)
47290         .sr(1)
47291         .m(3)
47292         .n(2)
47293         .k(k)
47294         .ks(3)
47295         .a_offset(17)
47296         .zero_index(mz)
47297         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47298     }
47299   }
47300 }
47301 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)47302 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
47303   GemmMicrokernelTester()
47304     .mr(3)
47305     .nr(2)
47306     .kr(1)
47307     .sr(1)
47308     .m(3)
47309     .n(2)
47310     .k(1)
47311     .qmin(128)
47312     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47313 }
47314 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)47315 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
47316   GemmMicrokernelTester()
47317     .mr(3)
47318     .nr(2)
47319     .kr(1)
47320     .sr(1)
47321     .m(3)
47322     .n(2)
47323     .k(1)
47324     .qmax(128)
47325     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47326 }
47327 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)47328 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
47329   GemmMicrokernelTester()
47330     .mr(3)
47331     .nr(2)
47332     .kr(1)
47333     .sr(1)
47334     .m(3)
47335     .n(2)
47336     .k(1)
47337     .cm_stride(5)
47338     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47339 }
47340 
47341 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)47342 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
47343   GemmMicrokernelTester()
47344     .mr(3)
47345     .nr(2)
47346     .kr(1)
47347     .sr(1)
47348     .m(3)
47349     .n(2)
47350     .k(1)
47351     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47352 }
47353 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)47354 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
47355   GemmMicrokernelTester()
47356     .mr(3)
47357     .nr(2)
47358     .kr(1)
47359     .sr(1)
47360     .m(3)
47361     .n(2)
47362     .k(1)
47363     .cn_stride(5)
47364     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47365 }
47366 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)47367 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
47368   for (uint32_t n = 1; n <= 2; n++) {
47369     for (uint32_t m = 1; m <= 3; m++) {
47370       GemmMicrokernelTester()
47371         .mr(3)
47372         .nr(2)
47373         .kr(1)
47374         .sr(1)
47375         .m(m)
47376         .n(n)
47377         .k(1)
47378         .iterations(1)
47379         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47380     }
47381   }
47382 }
47383 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)47384 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
47385   for (uint32_t m = 1; m <= 3; m++) {
47386     GemmMicrokernelTester()
47387       .mr(3)
47388       .nr(2)
47389       .kr(1)
47390       .sr(1)
47391       .m(m)
47392       .n(2)
47393       .k(1)
47394       .iterations(1)
47395       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47396   }
47397 }
47398 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)47399 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
47400   for (uint32_t n = 1; n <= 2; n++) {
47401     GemmMicrokernelTester()
47402       .mr(3)
47403       .nr(2)
47404       .kr(1)
47405       .sr(1)
47406       .m(3)
47407       .n(n)
47408       .k(1)
47409       .iterations(1)
47410       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47411   }
47412 }
47413 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)47414 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
47415   for (size_t k = 2; k < 10; k++) {
47416     GemmMicrokernelTester()
47417       .mr(3)
47418       .nr(2)
47419       .kr(1)
47420       .sr(1)
47421       .m(3)
47422       .n(2)
47423       .k(k)
47424       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47425   }
47426 }
47427 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)47428 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
47429   for (size_t k = 2; k < 10; k++) {
47430     for (uint32_t n = 1; n <= 2; n++) {
47431       for (uint32_t m = 1; m <= 3; m++) {
47432         GemmMicrokernelTester()
47433           .mr(3)
47434           .nr(2)
47435           .kr(1)
47436           .sr(1)
47437           .m(m)
47438           .n(n)
47439           .k(k)
47440           .iterations(1)
47441           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47442       }
47443     }
47444   }
47445 }
47446 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)47447 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
47448   for (uint32_t n = 3; n < 4; n++) {
47449     for (size_t k = 1; k <= 5; k += 2) {
47450       GemmMicrokernelTester()
47451         .mr(3)
47452         .nr(2)
47453         .kr(1)
47454         .sr(1)
47455         .m(3)
47456         .n(n)
47457         .k(k)
47458         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47459     }
47460   }
47461 }
47462 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)47463 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
47464   for (uint32_t n = 3; n < 4; n++) {
47465     for (size_t k = 1; k <= 5; k += 2) {
47466       GemmMicrokernelTester()
47467         .mr(3)
47468         .nr(2)
47469         .kr(1)
47470         .sr(1)
47471         .m(3)
47472         .n(n)
47473         .k(k)
47474         .cn_stride(5)
47475         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47476     }
47477   }
47478 }
47479 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)47480 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
47481   for (uint32_t n = 3; n < 4; n++) {
47482     for (size_t k = 1; k <= 5; k += 2) {
47483       for (uint32_t m = 1; m <= 3; m++) {
47484         GemmMicrokernelTester()
47485           .mr(3)
47486           .nr(2)
47487           .kr(1)
47488           .sr(1)
47489           .m(m)
47490           .n(n)
47491           .k(k)
47492           .iterations(1)
47493           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47494       }
47495     }
47496   }
47497 }
47498 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)47499 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
47500   for (uint32_t n = 4; n <= 6; n += 2) {
47501     for (size_t k = 1; k <= 5; k += 2) {
47502       GemmMicrokernelTester()
47503         .mr(3)
47504         .nr(2)
47505         .kr(1)
47506         .sr(1)
47507         .m(3)
47508         .n(n)
47509         .k(k)
47510         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47511     }
47512   }
47513 }
47514 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)47515 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
47516   for (uint32_t n = 4; n <= 6; n += 2) {
47517     for (size_t k = 1; k <= 5; k += 2) {
47518       GemmMicrokernelTester()
47519         .mr(3)
47520         .nr(2)
47521         .kr(1)
47522         .sr(1)
47523         .m(3)
47524         .n(n)
47525         .k(k)
47526         .cn_stride(5)
47527         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47528     }
47529   }
47530 }
47531 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)47532 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
47533   for (uint32_t n = 4; n <= 6; n += 2) {
47534     for (size_t k = 1; k <= 5; k += 2) {
47535       for (uint32_t m = 1; m <= 3; m++) {
47536         GemmMicrokernelTester()
47537           .mr(3)
47538           .nr(2)
47539           .kr(1)
47540           .sr(1)
47541           .m(m)
47542           .n(n)
47543           .k(k)
47544           .iterations(1)
47545           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47546       }
47547     }
47548   }
47549 }
47550 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)47551 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
47552   for (size_t k = 1; k <= 5; k += 2) {
47553     GemmMicrokernelTester()
47554       .mr(3)
47555       .nr(2)
47556       .kr(1)
47557       .sr(1)
47558       .m(3)
47559       .n(2)
47560       .k(k)
47561       .ks(3)
47562       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47563   }
47564 }
47565 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)47566 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
47567   for (size_t k = 1; k <= 5; k += 2) {
47568     for (uint32_t n = 1; n <= 2; n++) {
47569       for (uint32_t m = 1; m <= 3; m++) {
47570         GemmMicrokernelTester()
47571           .mr(3)
47572           .nr(2)
47573           .kr(1)
47574           .sr(1)
47575           .m(m)
47576           .n(n)
47577           .k(k)
47578           .ks(3)
47579           .iterations(1)
47580           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47581       }
47582     }
47583   }
47584 }
47585 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)47586 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
47587   for (uint32_t n = 3; n < 4; n++) {
47588     for (size_t k = 1; k <= 5; k += 2) {
47589       GemmMicrokernelTester()
47590         .mr(3)
47591         .nr(2)
47592         .kr(1)
47593         .sr(1)
47594         .m(3)
47595         .n(n)
47596         .k(k)
47597         .ks(3)
47598         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47599     }
47600   }
47601 }
47602 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)47603 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
47604   for (uint32_t n = 4; n <= 6; n += 2) {
47605     for (size_t k = 1; k <= 5; k += 2) {
47606       GemmMicrokernelTester()
47607         .mr(3)
47608         .nr(2)
47609         .kr(1)
47610         .sr(1)
47611         .m(3)
47612         .n(n)
47613         .k(k)
47614         .ks(3)
47615         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47616     }
47617   }
47618 }
47619 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)47620 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
47621   for (size_t k = 1; k <= 5; k += 2) {
47622     for (uint32_t n = 1; n <= 2; n++) {
47623       for (uint32_t m = 1; m <= 3; m++) {
47624         GemmMicrokernelTester()
47625           .mr(3)
47626           .nr(2)
47627           .kr(1)
47628           .sr(1)
47629           .m(m)
47630           .n(n)
47631           .k(k)
47632           .cm_stride(5)
47633           .iterations(1)
47634           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47635       }
47636     }
47637   }
47638 }
47639 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)47640 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
47641   for (size_t k = 1; k <= 5; k += 2) {
47642     GemmMicrokernelTester()
47643       .mr(3)
47644       .nr(2)
47645       .kr(1)
47646       .sr(1)
47647       .m(3)
47648       .n(2)
47649       .k(k)
47650       .ks(3)
47651       .a_offset(17)
47652       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47653   }
47654 }
47655 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)47656 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
47657   for (size_t k = 1; k <= 5; k += 2) {
47658     for (uint32_t mz = 0; mz < 3; mz++) {
47659       GemmMicrokernelTester()
47660         .mr(3)
47661         .nr(2)
47662         .kr(1)
47663         .sr(1)
47664         .m(3)
47665         .n(2)
47666         .k(k)
47667         .ks(3)
47668         .a_offset(17)
47669         .zero_index(mz)
47670         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47671     }
47672   }
47673 }
47674 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)47675 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
47676   GemmMicrokernelTester()
47677     .mr(3)
47678     .nr(2)
47679     .kr(1)
47680     .sr(1)
47681     .m(3)
47682     .n(2)
47683     .k(1)
47684     .qmin(128)
47685     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47686 }
47687 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)47688 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
47689   GemmMicrokernelTester()
47690     .mr(3)
47691     .nr(2)
47692     .kr(1)
47693     .sr(1)
47694     .m(3)
47695     .n(2)
47696     .k(1)
47697     .qmax(128)
47698     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47699 }
47700 
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)47701 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
47702   GemmMicrokernelTester()
47703     .mr(3)
47704     .nr(2)
47705     .kr(1)
47706     .sr(1)
47707     .m(3)
47708     .n(2)
47709     .k(1)
47710     .cm_stride(5)
47711     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47712 }
47713 
47714 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)47715 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
47716   GemmMicrokernelTester()
47717     .mr(3)
47718     .nr(4)
47719     .kr(1)
47720     .sr(1)
47721     .m(3)
47722     .n(4)
47723     .k(1)
47724     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47725 }
47726 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)47727 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
47728   GemmMicrokernelTester()
47729     .mr(3)
47730     .nr(4)
47731     .kr(1)
47732     .sr(1)
47733     .m(3)
47734     .n(4)
47735     .k(1)
47736     .cn_stride(7)
47737     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47738 }
47739 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)47740 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
47741   for (uint32_t n = 1; n <= 4; n++) {
47742     for (uint32_t m = 1; m <= 3; m++) {
47743       GemmMicrokernelTester()
47744         .mr(3)
47745         .nr(4)
47746         .kr(1)
47747         .sr(1)
47748         .m(m)
47749         .n(n)
47750         .k(1)
47751         .iterations(1)
47752         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47753     }
47754   }
47755 }
47756 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)47757 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47758   for (uint32_t m = 1; m <= 3; m++) {
47759     GemmMicrokernelTester()
47760       .mr(3)
47761       .nr(4)
47762       .kr(1)
47763       .sr(1)
47764       .m(m)
47765       .n(4)
47766       .k(1)
47767       .iterations(1)
47768       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47769   }
47770 }
47771 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)47772 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47773   for (uint32_t n = 1; n <= 4; n++) {
47774     GemmMicrokernelTester()
47775       .mr(3)
47776       .nr(4)
47777       .kr(1)
47778       .sr(1)
47779       .m(3)
47780       .n(n)
47781       .k(1)
47782       .iterations(1)
47783       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47784   }
47785 }
47786 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)47787 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
47788   for (size_t k = 2; k < 10; k++) {
47789     GemmMicrokernelTester()
47790       .mr(3)
47791       .nr(4)
47792       .kr(1)
47793       .sr(1)
47794       .m(3)
47795       .n(4)
47796       .k(k)
47797       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47798   }
47799 }
47800 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)47801 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
47802   for (size_t k = 2; k < 10; k++) {
47803     for (uint32_t n = 1; n <= 4; n++) {
47804       for (uint32_t m = 1; m <= 3; m++) {
47805         GemmMicrokernelTester()
47806           .mr(3)
47807           .nr(4)
47808           .kr(1)
47809           .sr(1)
47810           .m(m)
47811           .n(n)
47812           .k(k)
47813           .iterations(1)
47814           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47815       }
47816     }
47817   }
47818 }
47819 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)47820 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
47821   for (uint32_t n = 5; n < 8; n++) {
47822     for (size_t k = 1; k <= 5; k += 2) {
47823       GemmMicrokernelTester()
47824         .mr(3)
47825         .nr(4)
47826         .kr(1)
47827         .sr(1)
47828         .m(3)
47829         .n(n)
47830         .k(k)
47831         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47832     }
47833   }
47834 }
47835 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)47836 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
47837   for (uint32_t n = 5; n < 8; n++) {
47838     for (size_t k = 1; k <= 5; k += 2) {
47839       GemmMicrokernelTester()
47840         .mr(3)
47841         .nr(4)
47842         .kr(1)
47843         .sr(1)
47844         .m(3)
47845         .n(n)
47846         .k(k)
47847         .cn_stride(7)
47848         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47849     }
47850   }
47851 }
47852 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)47853 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
47854   for (uint32_t n = 5; n < 8; n++) {
47855     for (size_t k = 1; k <= 5; k += 2) {
47856       for (uint32_t m = 1; m <= 3; m++) {
47857         GemmMicrokernelTester()
47858           .mr(3)
47859           .nr(4)
47860           .kr(1)
47861           .sr(1)
47862           .m(m)
47863           .n(n)
47864           .k(k)
47865           .iterations(1)
47866           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47867       }
47868     }
47869   }
47870 }
47871 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)47872 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
47873   for (uint32_t n = 8; n <= 12; n += 4) {
47874     for (size_t k = 1; k <= 5; k += 2) {
47875       GemmMicrokernelTester()
47876         .mr(3)
47877         .nr(4)
47878         .kr(1)
47879         .sr(1)
47880         .m(3)
47881         .n(n)
47882         .k(k)
47883         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47884     }
47885   }
47886 }
47887 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)47888 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
47889   for (uint32_t n = 8; n <= 12; n += 4) {
47890     for (size_t k = 1; k <= 5; k += 2) {
47891       GemmMicrokernelTester()
47892         .mr(3)
47893         .nr(4)
47894         .kr(1)
47895         .sr(1)
47896         .m(3)
47897         .n(n)
47898         .k(k)
47899         .cn_stride(7)
47900         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47901     }
47902   }
47903 }
47904 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)47905 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
47906   for (uint32_t n = 8; n <= 12; n += 4) {
47907     for (size_t k = 1; k <= 5; k += 2) {
47908       for (uint32_t m = 1; m <= 3; m++) {
47909         GemmMicrokernelTester()
47910           .mr(3)
47911           .nr(4)
47912           .kr(1)
47913           .sr(1)
47914           .m(m)
47915           .n(n)
47916           .k(k)
47917           .iterations(1)
47918           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47919       }
47920     }
47921   }
47922 }
47923 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel)47924 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
47925   for (size_t k = 1; k <= 5; k += 2) {
47926     GemmMicrokernelTester()
47927       .mr(3)
47928       .nr(4)
47929       .kr(1)
47930       .sr(1)
47931       .m(3)
47932       .n(4)
47933       .k(k)
47934       .ks(3)
47935       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47936   }
47937 }
47938 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel_subtile)47939 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
47940   for (size_t k = 1; k <= 5; k += 2) {
47941     for (uint32_t n = 1; n <= 4; n++) {
47942       for (uint32_t m = 1; m <= 3; m++) {
47943         GemmMicrokernelTester()
47944           .mr(3)
47945           .nr(4)
47946           .kr(1)
47947           .sr(1)
47948           .m(m)
47949           .n(n)
47950           .k(k)
47951           .ks(3)
47952           .iterations(1)
47953           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47954       }
47955     }
47956   }
47957 }
47958 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_small_kernel)47959 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
47960   for (uint32_t n = 5; n < 8; n++) {
47961     for (size_t k = 1; k <= 5; k += 2) {
47962       GemmMicrokernelTester()
47963         .mr(3)
47964         .nr(4)
47965         .kr(1)
47966         .sr(1)
47967         .m(3)
47968         .n(n)
47969         .k(k)
47970         .ks(3)
47971         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47972     }
47973   }
47974 }
47975 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_small_kernel)47976 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
47977   for (uint32_t n = 8; n <= 12; n += 4) {
47978     for (size_t k = 1; k <= 5; k += 2) {
47979       GemmMicrokernelTester()
47980         .mr(3)
47981         .nr(4)
47982         .kr(1)
47983         .sr(1)
47984         .m(3)
47985         .n(n)
47986         .k(k)
47987         .ks(3)
47988         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47989     }
47990   }
47991 }
47992 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)47993 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
47994   for (size_t k = 1; k <= 5; k += 2) {
47995     for (uint32_t n = 1; n <= 4; n++) {
47996       for (uint32_t m = 1; m <= 3; m++) {
47997         GemmMicrokernelTester()
47998           .mr(3)
47999           .nr(4)
48000           .kr(1)
48001           .sr(1)
48002           .m(m)
48003           .n(n)
48004           .k(k)
48005           .cm_stride(7)
48006           .iterations(1)
48007           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48008       }
48009     }
48010   }
48011 }
48012 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,a_offset)48013 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
48014   for (size_t k = 1; k <= 5; k += 2) {
48015     GemmMicrokernelTester()
48016       .mr(3)
48017       .nr(4)
48018       .kr(1)
48019       .sr(1)
48020       .m(3)
48021       .n(4)
48022       .k(k)
48023       .ks(3)
48024       .a_offset(17)
48025       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48026   }
48027 }
48028 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,zero)48029 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
48030   for (size_t k = 1; k <= 5; k += 2) {
48031     for (uint32_t mz = 0; mz < 3; mz++) {
48032       GemmMicrokernelTester()
48033         .mr(3)
48034         .nr(4)
48035         .kr(1)
48036         .sr(1)
48037         .m(3)
48038         .n(4)
48039         .k(k)
48040         .ks(3)
48041         .a_offset(17)
48042         .zero_index(mz)
48043         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48044     }
48045   }
48046 }
48047 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)48048 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
48049   GemmMicrokernelTester()
48050     .mr(3)
48051     .nr(4)
48052     .kr(1)
48053     .sr(1)
48054     .m(3)
48055     .n(4)
48056     .k(1)
48057     .qmin(128)
48058     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48059 }
48060 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)48061 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
48062   GemmMicrokernelTester()
48063     .mr(3)
48064     .nr(4)
48065     .kr(1)
48066     .sr(1)
48067     .m(3)
48068     .n(4)
48069     .k(1)
48070     .qmax(128)
48071     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48072 }
48073 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)48074 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
48075   GemmMicrokernelTester()
48076     .mr(3)
48077     .nr(4)
48078     .kr(1)
48079     .sr(1)
48080     .m(3)
48081     .n(4)
48082     .k(1)
48083     .cm_stride(7)
48084     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48085 }
48086 
48087 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)48088 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
48089   GemmMicrokernelTester()
48090     .mr(3)
48091     .nr(4)
48092     .kr(1)
48093     .sr(1)
48094     .m(3)
48095     .n(4)
48096     .k(1)
48097     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48098 }
48099 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)48100 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
48101   GemmMicrokernelTester()
48102     .mr(3)
48103     .nr(4)
48104     .kr(1)
48105     .sr(1)
48106     .m(3)
48107     .n(4)
48108     .k(1)
48109     .cn_stride(7)
48110     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48111 }
48112 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)48113 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
48114   for (uint32_t n = 1; n <= 4; n++) {
48115     for (uint32_t m = 1; m <= 3; m++) {
48116       GemmMicrokernelTester()
48117         .mr(3)
48118         .nr(4)
48119         .kr(1)
48120         .sr(1)
48121         .m(m)
48122         .n(n)
48123         .k(1)
48124         .iterations(1)
48125         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48126     }
48127   }
48128 }
48129 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)48130 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
48131   for (uint32_t m = 1; m <= 3; m++) {
48132     GemmMicrokernelTester()
48133       .mr(3)
48134       .nr(4)
48135       .kr(1)
48136       .sr(1)
48137       .m(m)
48138       .n(4)
48139       .k(1)
48140       .iterations(1)
48141       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48142   }
48143 }
48144 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)48145 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
48146   for (uint32_t n = 1; n <= 4; n++) {
48147     GemmMicrokernelTester()
48148       .mr(3)
48149       .nr(4)
48150       .kr(1)
48151       .sr(1)
48152       .m(3)
48153       .n(n)
48154       .k(1)
48155       .iterations(1)
48156       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48157   }
48158 }
48159 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)48160 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
48161   for (size_t k = 2; k < 10; k++) {
48162     GemmMicrokernelTester()
48163       .mr(3)
48164       .nr(4)
48165       .kr(1)
48166       .sr(1)
48167       .m(3)
48168       .n(4)
48169       .k(k)
48170       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48171   }
48172 }
48173 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)48174 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
48175   for (size_t k = 2; k < 10; k++) {
48176     for (uint32_t n = 1; n <= 4; n++) {
48177       for (uint32_t m = 1; m <= 3; m++) {
48178         GemmMicrokernelTester()
48179           .mr(3)
48180           .nr(4)
48181           .kr(1)
48182           .sr(1)
48183           .m(m)
48184           .n(n)
48185           .k(k)
48186           .iterations(1)
48187           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48188       }
48189     }
48190   }
48191 }
48192 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)48193 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
48194   for (uint32_t n = 5; n < 8; n++) {
48195     for (size_t k = 1; k <= 5; k += 2) {
48196       GemmMicrokernelTester()
48197         .mr(3)
48198         .nr(4)
48199         .kr(1)
48200         .sr(1)
48201         .m(3)
48202         .n(n)
48203         .k(k)
48204         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48205     }
48206   }
48207 }
48208 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)48209 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
48210   for (uint32_t n = 5; n < 8; n++) {
48211     for (size_t k = 1; k <= 5; k += 2) {
48212       GemmMicrokernelTester()
48213         .mr(3)
48214         .nr(4)
48215         .kr(1)
48216         .sr(1)
48217         .m(3)
48218         .n(n)
48219         .k(k)
48220         .cn_stride(7)
48221         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48222     }
48223   }
48224 }
48225 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)48226 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
48227   for (uint32_t n = 5; n < 8; n++) {
48228     for (size_t k = 1; k <= 5; k += 2) {
48229       for (uint32_t m = 1; m <= 3; m++) {
48230         GemmMicrokernelTester()
48231           .mr(3)
48232           .nr(4)
48233           .kr(1)
48234           .sr(1)
48235           .m(m)
48236           .n(n)
48237           .k(k)
48238           .iterations(1)
48239           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48240       }
48241     }
48242   }
48243 }
48244 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)48245 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
48246   for (uint32_t n = 8; n <= 12; n += 4) {
48247     for (size_t k = 1; k <= 5; k += 2) {
48248       GemmMicrokernelTester()
48249         .mr(3)
48250         .nr(4)
48251         .kr(1)
48252         .sr(1)
48253         .m(3)
48254         .n(n)
48255         .k(k)
48256         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48257     }
48258   }
48259 }
48260 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)48261 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
48262   for (uint32_t n = 8; n <= 12; n += 4) {
48263     for (size_t k = 1; k <= 5; k += 2) {
48264       GemmMicrokernelTester()
48265         .mr(3)
48266         .nr(4)
48267         .kr(1)
48268         .sr(1)
48269         .m(3)
48270         .n(n)
48271         .k(k)
48272         .cn_stride(7)
48273         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48274     }
48275   }
48276 }
48277 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)48278 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
48279   for (uint32_t n = 8; n <= 12; n += 4) {
48280     for (size_t k = 1; k <= 5; k += 2) {
48281       for (uint32_t m = 1; m <= 3; m++) {
48282         GemmMicrokernelTester()
48283           .mr(3)
48284           .nr(4)
48285           .kr(1)
48286           .sr(1)
48287           .m(m)
48288           .n(n)
48289           .k(k)
48290           .iterations(1)
48291           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48292       }
48293     }
48294   }
48295 }
48296 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel)48297 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
48298   for (size_t k = 1; k <= 5; k += 2) {
48299     GemmMicrokernelTester()
48300       .mr(3)
48301       .nr(4)
48302       .kr(1)
48303       .sr(1)
48304       .m(3)
48305       .n(4)
48306       .k(k)
48307       .ks(3)
48308       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48309   }
48310 }
48311 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel_subtile)48312 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
48313   for (size_t k = 1; k <= 5; k += 2) {
48314     for (uint32_t n = 1; n <= 4; n++) {
48315       for (uint32_t m = 1; m <= 3; m++) {
48316         GemmMicrokernelTester()
48317           .mr(3)
48318           .nr(4)
48319           .kr(1)
48320           .sr(1)
48321           .m(m)
48322           .n(n)
48323           .k(k)
48324           .ks(3)
48325           .iterations(1)
48326           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48327       }
48328     }
48329   }
48330 }
48331 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_small_kernel)48332 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
48333   for (uint32_t n = 5; n < 8; n++) {
48334     for (size_t k = 1; k <= 5; k += 2) {
48335       GemmMicrokernelTester()
48336         .mr(3)
48337         .nr(4)
48338         .kr(1)
48339         .sr(1)
48340         .m(3)
48341         .n(n)
48342         .k(k)
48343         .ks(3)
48344         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48345     }
48346   }
48347 }
48348 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_small_kernel)48349 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
48350   for (uint32_t n = 8; n <= 12; n += 4) {
48351     for (size_t k = 1; k <= 5; k += 2) {
48352       GemmMicrokernelTester()
48353         .mr(3)
48354         .nr(4)
48355         .kr(1)
48356         .sr(1)
48357         .m(3)
48358         .n(n)
48359         .k(k)
48360         .ks(3)
48361         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48362     }
48363   }
48364 }
48365 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)48366 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
48367   for (size_t k = 1; k <= 5; k += 2) {
48368     for (uint32_t n = 1; n <= 4; n++) {
48369       for (uint32_t m = 1; m <= 3; m++) {
48370         GemmMicrokernelTester()
48371           .mr(3)
48372           .nr(4)
48373           .kr(1)
48374           .sr(1)
48375           .m(m)
48376           .n(n)
48377           .k(k)
48378           .cm_stride(7)
48379           .iterations(1)
48380           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48381       }
48382     }
48383   }
48384 }
48385 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,a_offset)48386 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
48387   for (size_t k = 1; k <= 5; k += 2) {
48388     GemmMicrokernelTester()
48389       .mr(3)
48390       .nr(4)
48391       .kr(1)
48392       .sr(1)
48393       .m(3)
48394       .n(4)
48395       .k(k)
48396       .ks(3)
48397       .a_offset(17)
48398       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48399   }
48400 }
48401 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,zero)48402 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
48403   for (size_t k = 1; k <= 5; k += 2) {
48404     for (uint32_t mz = 0; mz < 3; mz++) {
48405       GemmMicrokernelTester()
48406         .mr(3)
48407         .nr(4)
48408         .kr(1)
48409         .sr(1)
48410         .m(3)
48411         .n(4)
48412         .k(k)
48413         .ks(3)
48414         .a_offset(17)
48415         .zero_index(mz)
48416         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48417     }
48418   }
48419 }
48420 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)48421 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
48422   GemmMicrokernelTester()
48423     .mr(3)
48424     .nr(4)
48425     .kr(1)
48426     .sr(1)
48427     .m(3)
48428     .n(4)
48429     .k(1)
48430     .qmin(128)
48431     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48432 }
48433 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)48434 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
48435   GemmMicrokernelTester()
48436     .mr(3)
48437     .nr(4)
48438     .kr(1)
48439     .sr(1)
48440     .m(3)
48441     .n(4)
48442     .k(1)
48443     .qmax(128)
48444     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48445 }
48446 
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)48447 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
48448   GemmMicrokernelTester()
48449     .mr(3)
48450     .nr(4)
48451     .kr(1)
48452     .sr(1)
48453     .m(3)
48454     .n(4)
48455     .k(1)
48456     .cm_stride(7)
48457     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48458 }
48459 
48460 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)48461 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
48462   GemmMicrokernelTester()
48463     .mr(4)
48464     .nr(2)
48465     .kr(1)
48466     .sr(1)
48467     .m(4)
48468     .n(2)
48469     .k(1)
48470     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48471 }
48472 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)48473 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
48474   GemmMicrokernelTester()
48475     .mr(4)
48476     .nr(2)
48477     .kr(1)
48478     .sr(1)
48479     .m(4)
48480     .n(2)
48481     .k(1)
48482     .cn_stride(5)
48483     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48484 }
48485 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)48486 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
48487   for (uint32_t n = 1; n <= 2; n++) {
48488     for (uint32_t m = 1; m <= 4; m++) {
48489       GemmMicrokernelTester()
48490         .mr(4)
48491         .nr(2)
48492         .kr(1)
48493         .sr(1)
48494         .m(m)
48495         .n(n)
48496         .k(1)
48497         .iterations(1)
48498         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48499     }
48500   }
48501 }
48502 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)48503 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
48504   for (uint32_t m = 1; m <= 4; m++) {
48505     GemmMicrokernelTester()
48506       .mr(4)
48507       .nr(2)
48508       .kr(1)
48509       .sr(1)
48510       .m(m)
48511       .n(2)
48512       .k(1)
48513       .iterations(1)
48514       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48515   }
48516 }
48517 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)48518 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
48519   for (uint32_t n = 1; n <= 2; n++) {
48520     GemmMicrokernelTester()
48521       .mr(4)
48522       .nr(2)
48523       .kr(1)
48524       .sr(1)
48525       .m(4)
48526       .n(n)
48527       .k(1)
48528       .iterations(1)
48529       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48530   }
48531 }
48532 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)48533 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
48534   for (size_t k = 2; k < 10; k++) {
48535     GemmMicrokernelTester()
48536       .mr(4)
48537       .nr(2)
48538       .kr(1)
48539       .sr(1)
48540       .m(4)
48541       .n(2)
48542       .k(k)
48543       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48544   }
48545 }
48546 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)48547 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
48548   for (size_t k = 2; k < 10; k++) {
48549     for (uint32_t n = 1; n <= 2; n++) {
48550       for (uint32_t m = 1; m <= 4; m++) {
48551         GemmMicrokernelTester()
48552           .mr(4)
48553           .nr(2)
48554           .kr(1)
48555           .sr(1)
48556           .m(m)
48557           .n(n)
48558           .k(k)
48559           .iterations(1)
48560           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48561       }
48562     }
48563   }
48564 }
48565 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)48566 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
48567   for (uint32_t n = 3; n < 4; n++) {
48568     for (size_t k = 1; k <= 5; k += 2) {
48569       GemmMicrokernelTester()
48570         .mr(4)
48571         .nr(2)
48572         .kr(1)
48573         .sr(1)
48574         .m(4)
48575         .n(n)
48576         .k(k)
48577         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48578     }
48579   }
48580 }
48581 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)48582 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
48583   for (uint32_t n = 3; n < 4; n++) {
48584     for (size_t k = 1; k <= 5; k += 2) {
48585       GemmMicrokernelTester()
48586         .mr(4)
48587         .nr(2)
48588         .kr(1)
48589         .sr(1)
48590         .m(4)
48591         .n(n)
48592         .k(k)
48593         .cn_stride(5)
48594         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48595     }
48596   }
48597 }
48598 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)48599 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
48600   for (uint32_t n = 3; n < 4; n++) {
48601     for (size_t k = 1; k <= 5; k += 2) {
48602       for (uint32_t m = 1; m <= 4; m++) {
48603         GemmMicrokernelTester()
48604           .mr(4)
48605           .nr(2)
48606           .kr(1)
48607           .sr(1)
48608           .m(m)
48609           .n(n)
48610           .k(k)
48611           .iterations(1)
48612           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48613       }
48614     }
48615   }
48616 }
48617 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)48618 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
48619   for (uint32_t n = 4; n <= 6; n += 2) {
48620     for (size_t k = 1; k <= 5; k += 2) {
48621       GemmMicrokernelTester()
48622         .mr(4)
48623         .nr(2)
48624         .kr(1)
48625         .sr(1)
48626         .m(4)
48627         .n(n)
48628         .k(k)
48629         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48630     }
48631   }
48632 }
48633 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)48634 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
48635   for (uint32_t n = 4; n <= 6; n += 2) {
48636     for (size_t k = 1; k <= 5; k += 2) {
48637       GemmMicrokernelTester()
48638         .mr(4)
48639         .nr(2)
48640         .kr(1)
48641         .sr(1)
48642         .m(4)
48643         .n(n)
48644         .k(k)
48645         .cn_stride(5)
48646         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48647     }
48648   }
48649 }
48650 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)48651 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
48652   for (uint32_t n = 4; n <= 6; n += 2) {
48653     for (size_t k = 1; k <= 5; k += 2) {
48654       for (uint32_t m = 1; m <= 4; m++) {
48655         GemmMicrokernelTester()
48656           .mr(4)
48657           .nr(2)
48658           .kr(1)
48659           .sr(1)
48660           .m(m)
48661           .n(n)
48662           .k(k)
48663           .iterations(1)
48664           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48665       }
48666     }
48667   }
48668 }
48669 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel)48670 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
48671   for (size_t k = 1; k <= 5; k += 2) {
48672     GemmMicrokernelTester()
48673       .mr(4)
48674       .nr(2)
48675       .kr(1)
48676       .sr(1)
48677       .m(4)
48678       .n(2)
48679       .k(k)
48680       .ks(3)
48681       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48682   }
48683 }
48684 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel_subtile)48685 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
48686   for (size_t k = 1; k <= 5; k += 2) {
48687     for (uint32_t n = 1; n <= 2; n++) {
48688       for (uint32_t m = 1; m <= 4; m++) {
48689         GemmMicrokernelTester()
48690           .mr(4)
48691           .nr(2)
48692           .kr(1)
48693           .sr(1)
48694           .m(m)
48695           .n(n)
48696           .k(k)
48697           .ks(3)
48698           .iterations(1)
48699           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48700       }
48701     }
48702   }
48703 }
48704 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_small_kernel)48705 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
48706   for (uint32_t n = 3; n < 4; n++) {
48707     for (size_t k = 1; k <= 5; k += 2) {
48708       GemmMicrokernelTester()
48709         .mr(4)
48710         .nr(2)
48711         .kr(1)
48712         .sr(1)
48713         .m(4)
48714         .n(n)
48715         .k(k)
48716         .ks(3)
48717         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48718     }
48719   }
48720 }
48721 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_small_kernel)48722 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
48723   for (uint32_t n = 4; n <= 6; n += 2) {
48724     for (size_t k = 1; k <= 5; k += 2) {
48725       GemmMicrokernelTester()
48726         .mr(4)
48727         .nr(2)
48728         .kr(1)
48729         .sr(1)
48730         .m(4)
48731         .n(n)
48732         .k(k)
48733         .ks(3)
48734         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48735     }
48736   }
48737 }
48738 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)48739 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
48740   for (size_t k = 1; k <= 5; k += 2) {
48741     for (uint32_t n = 1; n <= 2; n++) {
48742       for (uint32_t m = 1; m <= 4; m++) {
48743         GemmMicrokernelTester()
48744           .mr(4)
48745           .nr(2)
48746           .kr(1)
48747           .sr(1)
48748           .m(m)
48749           .n(n)
48750           .k(k)
48751           .cm_stride(5)
48752           .iterations(1)
48753           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48754       }
48755     }
48756   }
48757 }
48758 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,a_offset)48759 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
48760   for (size_t k = 1; k <= 5; k += 2) {
48761     GemmMicrokernelTester()
48762       .mr(4)
48763       .nr(2)
48764       .kr(1)
48765       .sr(1)
48766       .m(4)
48767       .n(2)
48768       .k(k)
48769       .ks(3)
48770       .a_offset(23)
48771       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48772   }
48773 }
48774 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,zero)48775 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
48776   for (size_t k = 1; k <= 5; k += 2) {
48777     for (uint32_t mz = 0; mz < 4; mz++) {
48778       GemmMicrokernelTester()
48779         .mr(4)
48780         .nr(2)
48781         .kr(1)
48782         .sr(1)
48783         .m(4)
48784         .n(2)
48785         .k(k)
48786         .ks(3)
48787         .a_offset(23)
48788         .zero_index(mz)
48789         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48790     }
48791   }
48792 }
48793 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)48794 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
48795   GemmMicrokernelTester()
48796     .mr(4)
48797     .nr(2)
48798     .kr(1)
48799     .sr(1)
48800     .m(4)
48801     .n(2)
48802     .k(1)
48803     .qmin(128)
48804     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48805 }
48806 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)48807 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
48808   GemmMicrokernelTester()
48809     .mr(4)
48810     .nr(2)
48811     .kr(1)
48812     .sr(1)
48813     .m(4)
48814     .n(2)
48815     .k(1)
48816     .qmax(128)
48817     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48818 }
48819 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)48820 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
48821   GemmMicrokernelTester()
48822     .mr(4)
48823     .nr(2)
48824     .kr(1)
48825     .sr(1)
48826     .m(4)
48827     .n(2)
48828     .k(1)
48829     .cm_stride(5)
48830     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48831 }
48832 
48833 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)48834 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
48835   GemmMicrokernelTester()
48836     .mr(4)
48837     .nr(2)
48838     .kr(1)
48839     .sr(1)
48840     .m(4)
48841     .n(2)
48842     .k(1)
48843     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48844 }
48845 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)48846 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
48847   GemmMicrokernelTester()
48848     .mr(4)
48849     .nr(2)
48850     .kr(1)
48851     .sr(1)
48852     .m(4)
48853     .n(2)
48854     .k(1)
48855     .cn_stride(5)
48856     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48857 }
48858 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)48859 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
48860   for (uint32_t n = 1; n <= 2; n++) {
48861     for (uint32_t m = 1; m <= 4; m++) {
48862       GemmMicrokernelTester()
48863         .mr(4)
48864         .nr(2)
48865         .kr(1)
48866         .sr(1)
48867         .m(m)
48868         .n(n)
48869         .k(1)
48870         .iterations(1)
48871         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48872     }
48873   }
48874 }
48875 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)48876 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
48877   for (uint32_t m = 1; m <= 4; m++) {
48878     GemmMicrokernelTester()
48879       .mr(4)
48880       .nr(2)
48881       .kr(1)
48882       .sr(1)
48883       .m(m)
48884       .n(2)
48885       .k(1)
48886       .iterations(1)
48887       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48888   }
48889 }
48890 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)48891 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
48892   for (uint32_t n = 1; n <= 2; n++) {
48893     GemmMicrokernelTester()
48894       .mr(4)
48895       .nr(2)
48896       .kr(1)
48897       .sr(1)
48898       .m(4)
48899       .n(n)
48900       .k(1)
48901       .iterations(1)
48902       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48903   }
48904 }
48905 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)48906 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
48907   for (size_t k = 2; k < 10; k++) {
48908     GemmMicrokernelTester()
48909       .mr(4)
48910       .nr(2)
48911       .kr(1)
48912       .sr(1)
48913       .m(4)
48914       .n(2)
48915       .k(k)
48916       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48917   }
48918 }
48919 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)48920 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
48921   for (size_t k = 2; k < 10; k++) {
48922     for (uint32_t n = 1; n <= 2; n++) {
48923       for (uint32_t m = 1; m <= 4; m++) {
48924         GemmMicrokernelTester()
48925           .mr(4)
48926           .nr(2)
48927           .kr(1)
48928           .sr(1)
48929           .m(m)
48930           .n(n)
48931           .k(k)
48932           .iterations(1)
48933           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48934       }
48935     }
48936   }
48937 }
48938 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)48939 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
48940   for (uint32_t n = 3; n < 4; n++) {
48941     for (size_t k = 1; k <= 5; k += 2) {
48942       GemmMicrokernelTester()
48943         .mr(4)
48944         .nr(2)
48945         .kr(1)
48946         .sr(1)
48947         .m(4)
48948         .n(n)
48949         .k(k)
48950         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48951     }
48952   }
48953 }
48954 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)48955 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
48956   for (uint32_t n = 3; n < 4; n++) {
48957     for (size_t k = 1; k <= 5; k += 2) {
48958       GemmMicrokernelTester()
48959         .mr(4)
48960         .nr(2)
48961         .kr(1)
48962         .sr(1)
48963         .m(4)
48964         .n(n)
48965         .k(k)
48966         .cn_stride(5)
48967         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48968     }
48969   }
48970 }
48971 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)48972 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
48973   for (uint32_t n = 3; n < 4; n++) {
48974     for (size_t k = 1; k <= 5; k += 2) {
48975       for (uint32_t m = 1; m <= 4; m++) {
48976         GemmMicrokernelTester()
48977           .mr(4)
48978           .nr(2)
48979           .kr(1)
48980           .sr(1)
48981           .m(m)
48982           .n(n)
48983           .k(k)
48984           .iterations(1)
48985           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48986       }
48987     }
48988   }
48989 }
48990 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)48991 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
48992   for (uint32_t n = 4; n <= 6; n += 2) {
48993     for (size_t k = 1; k <= 5; k += 2) {
48994       GemmMicrokernelTester()
48995         .mr(4)
48996         .nr(2)
48997         .kr(1)
48998         .sr(1)
48999         .m(4)
49000         .n(n)
49001         .k(k)
49002         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49003     }
49004   }
49005 }
49006 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)49007 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
49008   for (uint32_t n = 4; n <= 6; n += 2) {
49009     for (size_t k = 1; k <= 5; k += 2) {
49010       GemmMicrokernelTester()
49011         .mr(4)
49012         .nr(2)
49013         .kr(1)
49014         .sr(1)
49015         .m(4)
49016         .n(n)
49017         .k(k)
49018         .cn_stride(5)
49019         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49020     }
49021   }
49022 }
49023 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)49024 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
49025   for (uint32_t n = 4; n <= 6; n += 2) {
49026     for (size_t k = 1; k <= 5; k += 2) {
49027       for (uint32_t m = 1; m <= 4; m++) {
49028         GemmMicrokernelTester()
49029           .mr(4)
49030           .nr(2)
49031           .kr(1)
49032           .sr(1)
49033           .m(m)
49034           .n(n)
49035           .k(k)
49036           .iterations(1)
49037           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49038       }
49039     }
49040   }
49041 }
49042 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)49043 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
49044   for (size_t k = 1; k <= 5; k += 2) {
49045     GemmMicrokernelTester()
49046       .mr(4)
49047       .nr(2)
49048       .kr(1)
49049       .sr(1)
49050       .m(4)
49051       .n(2)
49052       .k(k)
49053       .ks(3)
49054       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49055   }
49056 }
49057 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)49058 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
49059   for (size_t k = 1; k <= 5; k += 2) {
49060     for (uint32_t n = 1; n <= 2; n++) {
49061       for (uint32_t m = 1; m <= 4; m++) {
49062         GemmMicrokernelTester()
49063           .mr(4)
49064           .nr(2)
49065           .kr(1)
49066           .sr(1)
49067           .m(m)
49068           .n(n)
49069           .k(k)
49070           .ks(3)
49071           .iterations(1)
49072           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49073       }
49074     }
49075   }
49076 }
49077 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)49078 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
49079   for (uint32_t n = 3; n < 4; n++) {
49080     for (size_t k = 1; k <= 5; k += 2) {
49081       GemmMicrokernelTester()
49082         .mr(4)
49083         .nr(2)
49084         .kr(1)
49085         .sr(1)
49086         .m(4)
49087         .n(n)
49088         .k(k)
49089         .ks(3)
49090         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49091     }
49092   }
49093 }
49094 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)49095 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
49096   for (uint32_t n = 4; n <= 6; n += 2) {
49097     for (size_t k = 1; k <= 5; k += 2) {
49098       GemmMicrokernelTester()
49099         .mr(4)
49100         .nr(2)
49101         .kr(1)
49102         .sr(1)
49103         .m(4)
49104         .n(n)
49105         .k(k)
49106         .ks(3)
49107         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49108     }
49109   }
49110 }
49111 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)49112 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
49113   for (size_t k = 1; k <= 5; k += 2) {
49114     for (uint32_t n = 1; n <= 2; n++) {
49115       for (uint32_t m = 1; m <= 4; m++) {
49116         GemmMicrokernelTester()
49117           .mr(4)
49118           .nr(2)
49119           .kr(1)
49120           .sr(1)
49121           .m(m)
49122           .n(n)
49123           .k(k)
49124           .cm_stride(5)
49125           .iterations(1)
49126           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49127       }
49128     }
49129   }
49130 }
49131 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)49132 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
49133   for (size_t k = 1; k <= 5; k += 2) {
49134     GemmMicrokernelTester()
49135       .mr(4)
49136       .nr(2)
49137       .kr(1)
49138       .sr(1)
49139       .m(4)
49140       .n(2)
49141       .k(k)
49142       .ks(3)
49143       .a_offset(23)
49144       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49145   }
49146 }
49147 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)49148 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
49149   for (size_t k = 1; k <= 5; k += 2) {
49150     for (uint32_t mz = 0; mz < 4; mz++) {
49151       GemmMicrokernelTester()
49152         .mr(4)
49153         .nr(2)
49154         .kr(1)
49155         .sr(1)
49156         .m(4)
49157         .n(2)
49158         .k(k)
49159         .ks(3)
49160         .a_offset(23)
49161         .zero_index(mz)
49162         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49163     }
49164   }
49165 }
49166 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)49167 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
49168   GemmMicrokernelTester()
49169     .mr(4)
49170     .nr(2)
49171     .kr(1)
49172     .sr(1)
49173     .m(4)
49174     .n(2)
49175     .k(1)
49176     .qmin(128)
49177     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49178 }
49179 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)49180 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
49181   GemmMicrokernelTester()
49182     .mr(4)
49183     .nr(2)
49184     .kr(1)
49185     .sr(1)
49186     .m(4)
49187     .n(2)
49188     .k(1)
49189     .qmax(128)
49190     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49191 }
49192 
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)49193 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
49194   GemmMicrokernelTester()
49195     .mr(4)
49196     .nr(2)
49197     .kr(1)
49198     .sr(1)
49199     .m(4)
49200     .n(2)
49201     .k(1)
49202     .cm_stride(5)
49203     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49204 }
49205 
49206 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)49207 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
49208   GemmMicrokernelTester()
49209     .mr(4)
49210     .nr(4)
49211     .kr(1)
49212     .sr(1)
49213     .m(4)
49214     .n(4)
49215     .k(1)
49216     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49217 }
49218 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)49219 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
49220   GemmMicrokernelTester()
49221     .mr(4)
49222     .nr(4)
49223     .kr(1)
49224     .sr(1)
49225     .m(4)
49226     .n(4)
49227     .k(1)
49228     .cn_stride(7)
49229     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49230 }
49231 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)49232 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
49233   for (uint32_t n = 1; n <= 4; n++) {
49234     for (uint32_t m = 1; m <= 4; m++) {
49235       GemmMicrokernelTester()
49236         .mr(4)
49237         .nr(4)
49238         .kr(1)
49239         .sr(1)
49240         .m(m)
49241         .n(n)
49242         .k(1)
49243         .iterations(1)
49244         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49245     }
49246   }
49247 }
49248 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)49249 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
49250   for (uint32_t m = 1; m <= 4; m++) {
49251     GemmMicrokernelTester()
49252       .mr(4)
49253       .nr(4)
49254       .kr(1)
49255       .sr(1)
49256       .m(m)
49257       .n(4)
49258       .k(1)
49259       .iterations(1)
49260       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49261   }
49262 }
49263 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)49264 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
49265   for (uint32_t n = 1; n <= 4; n++) {
49266     GemmMicrokernelTester()
49267       .mr(4)
49268       .nr(4)
49269       .kr(1)
49270       .sr(1)
49271       .m(4)
49272       .n(n)
49273       .k(1)
49274       .iterations(1)
49275       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49276   }
49277 }
49278 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)49279 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
49280   for (size_t k = 2; k < 10; k++) {
49281     GemmMicrokernelTester()
49282       .mr(4)
49283       .nr(4)
49284       .kr(1)
49285       .sr(1)
49286       .m(4)
49287       .n(4)
49288       .k(k)
49289       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49290   }
49291 }
49292 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)49293 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
49294   for (size_t k = 2; k < 10; k++) {
49295     for (uint32_t n = 1; n <= 4; n++) {
49296       for (uint32_t m = 1; m <= 4; m++) {
49297         GemmMicrokernelTester()
49298           .mr(4)
49299           .nr(4)
49300           .kr(1)
49301           .sr(1)
49302           .m(m)
49303           .n(n)
49304           .k(k)
49305           .iterations(1)
49306           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49307       }
49308     }
49309   }
49310 }
49311 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)49312 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
49313   for (uint32_t n = 5; n < 8; n++) {
49314     for (size_t k = 1; k <= 5; k += 2) {
49315       GemmMicrokernelTester()
49316         .mr(4)
49317         .nr(4)
49318         .kr(1)
49319         .sr(1)
49320         .m(4)
49321         .n(n)
49322         .k(k)
49323         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49324     }
49325   }
49326 }
49327 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)49328 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
49329   for (uint32_t n = 5; n < 8; n++) {
49330     for (size_t k = 1; k <= 5; k += 2) {
49331       GemmMicrokernelTester()
49332         .mr(4)
49333         .nr(4)
49334         .kr(1)
49335         .sr(1)
49336         .m(4)
49337         .n(n)
49338         .k(k)
49339         .cn_stride(7)
49340         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49341     }
49342   }
49343 }
49344 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)49345 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
49346   for (uint32_t n = 5; n < 8; n++) {
49347     for (size_t k = 1; k <= 5; k += 2) {
49348       for (uint32_t m = 1; m <= 4; m++) {
49349         GemmMicrokernelTester()
49350           .mr(4)
49351           .nr(4)
49352           .kr(1)
49353           .sr(1)
49354           .m(m)
49355           .n(n)
49356           .k(k)
49357           .iterations(1)
49358           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49359       }
49360     }
49361   }
49362 }
49363 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)49364 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
49365   for (uint32_t n = 8; n <= 12; n += 4) {
49366     for (size_t k = 1; k <= 5; k += 2) {
49367       GemmMicrokernelTester()
49368         .mr(4)
49369         .nr(4)
49370         .kr(1)
49371         .sr(1)
49372         .m(4)
49373         .n(n)
49374         .k(k)
49375         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49376     }
49377   }
49378 }
49379 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)49380 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
49381   for (uint32_t n = 8; n <= 12; n += 4) {
49382     for (size_t k = 1; k <= 5; k += 2) {
49383       GemmMicrokernelTester()
49384         .mr(4)
49385         .nr(4)
49386         .kr(1)
49387         .sr(1)
49388         .m(4)
49389         .n(n)
49390         .k(k)
49391         .cn_stride(7)
49392         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49393     }
49394   }
49395 }
49396 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)49397 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
49398   for (uint32_t n = 8; n <= 12; n += 4) {
49399     for (size_t k = 1; k <= 5; k += 2) {
49400       for (uint32_t m = 1; m <= 4; m++) {
49401         GemmMicrokernelTester()
49402           .mr(4)
49403           .nr(4)
49404           .kr(1)
49405           .sr(1)
49406           .m(m)
49407           .n(n)
49408           .k(k)
49409           .iterations(1)
49410           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49411       }
49412     }
49413   }
49414 }
49415 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)49416 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
49417   for (size_t k = 1; k <= 5; k += 2) {
49418     GemmMicrokernelTester()
49419       .mr(4)
49420       .nr(4)
49421       .kr(1)
49422       .sr(1)
49423       .m(4)
49424       .n(4)
49425       .k(k)
49426       .ks(3)
49427       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49428   }
49429 }
49430 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)49431 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
49432   for (size_t k = 1; k <= 5; k += 2) {
49433     for (uint32_t n = 1; n <= 4; n++) {
49434       for (uint32_t m = 1; m <= 4; m++) {
49435         GemmMicrokernelTester()
49436           .mr(4)
49437           .nr(4)
49438           .kr(1)
49439           .sr(1)
49440           .m(m)
49441           .n(n)
49442           .k(k)
49443           .ks(3)
49444           .iterations(1)
49445           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49446       }
49447     }
49448   }
49449 }
49450 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)49451 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
49452   for (uint32_t n = 5; n < 8; n++) {
49453     for (size_t k = 1; k <= 5; k += 2) {
49454       GemmMicrokernelTester()
49455         .mr(4)
49456         .nr(4)
49457         .kr(1)
49458         .sr(1)
49459         .m(4)
49460         .n(n)
49461         .k(k)
49462         .ks(3)
49463         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49464     }
49465   }
49466 }
49467 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)49468 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
49469   for (uint32_t n = 8; n <= 12; n += 4) {
49470     for (size_t k = 1; k <= 5; k += 2) {
49471       GemmMicrokernelTester()
49472         .mr(4)
49473         .nr(4)
49474         .kr(1)
49475         .sr(1)
49476         .m(4)
49477         .n(n)
49478         .k(k)
49479         .ks(3)
49480         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49481     }
49482   }
49483 }
49484 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)49485 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
49486   for (size_t k = 1; k <= 5; k += 2) {
49487     for (uint32_t n = 1; n <= 4; n++) {
49488       for (uint32_t m = 1; m <= 4; m++) {
49489         GemmMicrokernelTester()
49490           .mr(4)
49491           .nr(4)
49492           .kr(1)
49493           .sr(1)
49494           .m(m)
49495           .n(n)
49496           .k(k)
49497           .cm_stride(7)
49498           .iterations(1)
49499           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49500       }
49501     }
49502   }
49503 }
49504 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)49505 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
49506   for (size_t k = 1; k <= 5; k += 2) {
49507     GemmMicrokernelTester()
49508       .mr(4)
49509       .nr(4)
49510       .kr(1)
49511       .sr(1)
49512       .m(4)
49513       .n(4)
49514       .k(k)
49515       .ks(3)
49516       .a_offset(23)
49517       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49518   }
49519 }
49520 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)49521 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
49522   for (size_t k = 1; k <= 5; k += 2) {
49523     for (uint32_t mz = 0; mz < 4; mz++) {
49524       GemmMicrokernelTester()
49525         .mr(4)
49526         .nr(4)
49527         .kr(1)
49528         .sr(1)
49529         .m(4)
49530         .n(4)
49531         .k(k)
49532         .ks(3)
49533         .a_offset(23)
49534         .zero_index(mz)
49535         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49536     }
49537   }
49538 }
49539 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)49540 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
49541   GemmMicrokernelTester()
49542     .mr(4)
49543     .nr(4)
49544     .kr(1)
49545     .sr(1)
49546     .m(4)
49547     .n(4)
49548     .k(1)
49549     .qmin(128)
49550     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49551 }
49552 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)49553 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
49554   GemmMicrokernelTester()
49555     .mr(4)
49556     .nr(4)
49557     .kr(1)
49558     .sr(1)
49559     .m(4)
49560     .n(4)
49561     .k(1)
49562     .qmax(128)
49563     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49564 }
49565 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)49566 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
49567   GemmMicrokernelTester()
49568     .mr(4)
49569     .nr(4)
49570     .kr(1)
49571     .sr(1)
49572     .m(4)
49573     .n(4)
49574     .k(1)
49575     .cm_stride(7)
49576     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49577 }
49578 
49579 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)49580 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
49581   GemmMicrokernelTester()
49582     .mr(4)
49583     .nr(4)
49584     .kr(1)
49585     .sr(1)
49586     .m(4)
49587     .n(4)
49588     .k(1)
49589     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49590 }
49591 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)49592 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
49593   GemmMicrokernelTester()
49594     .mr(4)
49595     .nr(4)
49596     .kr(1)
49597     .sr(1)
49598     .m(4)
49599     .n(4)
49600     .k(1)
49601     .cn_stride(7)
49602     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49603 }
49604 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)49605 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
49606   for (uint32_t n = 1; n <= 4; n++) {
49607     for (uint32_t m = 1; m <= 4; m++) {
49608       GemmMicrokernelTester()
49609         .mr(4)
49610         .nr(4)
49611         .kr(1)
49612         .sr(1)
49613         .m(m)
49614         .n(n)
49615         .k(1)
49616         .iterations(1)
49617         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49618     }
49619   }
49620 }
49621 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)49622 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
49623   for (uint32_t m = 1; m <= 4; m++) {
49624     GemmMicrokernelTester()
49625       .mr(4)
49626       .nr(4)
49627       .kr(1)
49628       .sr(1)
49629       .m(m)
49630       .n(4)
49631       .k(1)
49632       .iterations(1)
49633       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49634   }
49635 }
49636 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)49637 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
49638   for (uint32_t n = 1; n <= 4; n++) {
49639     GemmMicrokernelTester()
49640       .mr(4)
49641       .nr(4)
49642       .kr(1)
49643       .sr(1)
49644       .m(4)
49645       .n(n)
49646       .k(1)
49647       .iterations(1)
49648       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49649   }
49650 }
49651 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)49652 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
49653   for (size_t k = 2; k < 10; k++) {
49654     GemmMicrokernelTester()
49655       .mr(4)
49656       .nr(4)
49657       .kr(1)
49658       .sr(1)
49659       .m(4)
49660       .n(4)
49661       .k(k)
49662       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49663   }
49664 }
49665 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)49666 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
49667   for (size_t k = 2; k < 10; k++) {
49668     for (uint32_t n = 1; n <= 4; n++) {
49669       for (uint32_t m = 1; m <= 4; m++) {
49670         GemmMicrokernelTester()
49671           .mr(4)
49672           .nr(4)
49673           .kr(1)
49674           .sr(1)
49675           .m(m)
49676           .n(n)
49677           .k(k)
49678           .iterations(1)
49679           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49680       }
49681     }
49682   }
49683 }
49684 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)49685 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
49686   for (uint32_t n = 5; n < 8; n++) {
49687     for (size_t k = 1; k <= 5; k += 2) {
49688       GemmMicrokernelTester()
49689         .mr(4)
49690         .nr(4)
49691         .kr(1)
49692         .sr(1)
49693         .m(4)
49694         .n(n)
49695         .k(k)
49696         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49697     }
49698   }
49699 }
49700 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)49701 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
49702   for (uint32_t n = 5; n < 8; n++) {
49703     for (size_t k = 1; k <= 5; k += 2) {
49704       GemmMicrokernelTester()
49705         .mr(4)
49706         .nr(4)
49707         .kr(1)
49708         .sr(1)
49709         .m(4)
49710         .n(n)
49711         .k(k)
49712         .cn_stride(7)
49713         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49714     }
49715   }
49716 }
49717 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)49718 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
49719   for (uint32_t n = 5; n < 8; n++) {
49720     for (size_t k = 1; k <= 5; k += 2) {
49721       for (uint32_t m = 1; m <= 4; m++) {
49722         GemmMicrokernelTester()
49723           .mr(4)
49724           .nr(4)
49725           .kr(1)
49726           .sr(1)
49727           .m(m)
49728           .n(n)
49729           .k(k)
49730           .iterations(1)
49731           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49732       }
49733     }
49734   }
49735 }
49736 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)49737 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
49738   for (uint32_t n = 8; n <= 12; n += 4) {
49739     for (size_t k = 1; k <= 5; k += 2) {
49740       GemmMicrokernelTester()
49741         .mr(4)
49742         .nr(4)
49743         .kr(1)
49744         .sr(1)
49745         .m(4)
49746         .n(n)
49747         .k(k)
49748         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49749     }
49750   }
49751 }
49752 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)49753 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
49754   for (uint32_t n = 8; n <= 12; n += 4) {
49755     for (size_t k = 1; k <= 5; k += 2) {
49756       GemmMicrokernelTester()
49757         .mr(4)
49758         .nr(4)
49759         .kr(1)
49760         .sr(1)
49761         .m(4)
49762         .n(n)
49763         .k(k)
49764         .cn_stride(7)
49765         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49766     }
49767   }
49768 }
49769 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)49770 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
49771   for (uint32_t n = 8; n <= 12; n += 4) {
49772     for (size_t k = 1; k <= 5; k += 2) {
49773       for (uint32_t m = 1; m <= 4; m++) {
49774         GemmMicrokernelTester()
49775           .mr(4)
49776           .nr(4)
49777           .kr(1)
49778           .sr(1)
49779           .m(m)
49780           .n(n)
49781           .k(k)
49782           .iterations(1)
49783           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49784       }
49785     }
49786   }
49787 }
49788 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel)49789 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
49790   for (size_t k = 1; k <= 5; k += 2) {
49791     GemmMicrokernelTester()
49792       .mr(4)
49793       .nr(4)
49794       .kr(1)
49795       .sr(1)
49796       .m(4)
49797       .n(4)
49798       .k(k)
49799       .ks(3)
49800       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49801   }
49802 }
49803 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel_subtile)49804 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
49805   for (size_t k = 1; k <= 5; k += 2) {
49806     for (uint32_t n = 1; n <= 4; n++) {
49807       for (uint32_t m = 1; m <= 4; m++) {
49808         GemmMicrokernelTester()
49809           .mr(4)
49810           .nr(4)
49811           .kr(1)
49812           .sr(1)
49813           .m(m)
49814           .n(n)
49815           .k(k)
49816           .ks(3)
49817           .iterations(1)
49818           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49819       }
49820     }
49821   }
49822 }
49823 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_small_kernel)49824 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
49825   for (uint32_t n = 5; n < 8; n++) {
49826     for (size_t k = 1; k <= 5; k += 2) {
49827       GemmMicrokernelTester()
49828         .mr(4)
49829         .nr(4)
49830         .kr(1)
49831         .sr(1)
49832         .m(4)
49833         .n(n)
49834         .k(k)
49835         .ks(3)
49836         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49837     }
49838   }
49839 }
49840 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_small_kernel)49841 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
49842   for (uint32_t n = 8; n <= 12; n += 4) {
49843     for (size_t k = 1; k <= 5; k += 2) {
49844       GemmMicrokernelTester()
49845         .mr(4)
49846         .nr(4)
49847         .kr(1)
49848         .sr(1)
49849         .m(4)
49850         .n(n)
49851         .k(k)
49852         .ks(3)
49853         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49854     }
49855   }
49856 }
49857 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)49858 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
49859   for (size_t k = 1; k <= 5; k += 2) {
49860     for (uint32_t n = 1; n <= 4; n++) {
49861       for (uint32_t m = 1; m <= 4; m++) {
49862         GemmMicrokernelTester()
49863           .mr(4)
49864           .nr(4)
49865           .kr(1)
49866           .sr(1)
49867           .m(m)
49868           .n(n)
49869           .k(k)
49870           .cm_stride(7)
49871           .iterations(1)
49872           .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49873       }
49874     }
49875   }
49876 }
49877 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,a_offset)49878 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
49879   for (size_t k = 1; k <= 5; k += 2) {
49880     GemmMicrokernelTester()
49881       .mr(4)
49882       .nr(4)
49883       .kr(1)
49884       .sr(1)
49885       .m(4)
49886       .n(4)
49887       .k(k)
49888       .ks(3)
49889       .a_offset(23)
49890       .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49891   }
49892 }
49893 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,zero)49894 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
49895   for (size_t k = 1; k <= 5; k += 2) {
49896     for (uint32_t mz = 0; mz < 4; mz++) {
49897       GemmMicrokernelTester()
49898         .mr(4)
49899         .nr(4)
49900         .kr(1)
49901         .sr(1)
49902         .m(4)
49903         .n(4)
49904         .k(k)
49905         .ks(3)
49906         .a_offset(23)
49907         .zero_index(mz)
49908         .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49909     }
49910   }
49911 }
49912 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)49913 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
49914   GemmMicrokernelTester()
49915     .mr(4)
49916     .nr(4)
49917     .kr(1)
49918     .sr(1)
49919     .m(4)
49920     .n(4)
49921     .k(1)
49922     .qmin(128)
49923     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49924 }
49925 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)49926 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
49927   GemmMicrokernelTester()
49928     .mr(4)
49929     .nr(4)
49930     .kr(1)
49931     .sr(1)
49932     .m(4)
49933     .n(4)
49934     .k(1)
49935     .qmax(128)
49936     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49937 }
49938 
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)49939 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
49940   GemmMicrokernelTester()
49941     .mr(4)
49942     .nr(4)
49943     .kr(1)
49944     .sr(1)
49945     .m(4)
49946     .n(4)
49947     .k(1)
49948     .cm_stride(7)
49949     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49950 }
49951